Skip to content

Commit

Permalink
Add generated code samples. (#9153)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rebecca Taylor authored and telpirion committed Mar 13, 2023
1 parent 7ecd999 commit e6294c2
Show file tree
Hide file tree
Showing 47 changed files with 2,202 additions and 0 deletions.
Binary file added speech/resources/brooklyn_bridge.flac
Binary file not shown.
Binary file added speech/resources/brooklyn_bridge.mp3
Binary file not shown.
Binary file added speech/resources/brooklyn_bridge.raw
Binary file not shown.
Binary file added speech/resources/brooklyn_bridge.wav
Binary file not shown.
Binary file added speech/resources/commercial_mono.wav
Binary file not shown.
Binary file added speech/resources/hello.raw
Binary file not shown.
Binary file added speech/resources/hello.wav
Binary file not shown.
Binary file added speech/resources/multi.flac
Binary file not shown.
Binary file added speech/resources/multi.wav
Binary file not shown.
90 changes: 90 additions & 0 deletions speech/v1/speech_transcribe_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async")

# To install the latest published package dependency, execute the following:
# pip install google-cloud-speech

# sample-metadata
# title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
# description: Transcribe a long audio file using asynchronous speech recognition
# usage: python3 samples/v1/speech_transcribe_async.py [--local_file_path "resources/brooklyn_bridge.raw"]

# [START speech_transcribe_async]
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums
import io


def sample_long_running_recognize(local_file_path):
"""
Transcribe a long audio file using asynchronous speech recognition
Args:
local_file_path Path to local audio file, e.g. /path/audio.wav
"""

client = speech_v1.SpeechClient()

# local_file_path = 'resources/brooklyn_bridge.raw'

# The language of the supplied audio
language_code = "en-US"

# Sample rate in Hertz of the audio data sent
sample_rate_hertz = 16000

# Encoding of audio data sent. This sample sets this explicitly.
# This field is optional for FLAC and WAV audio formats.
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
config = {
"language_code": language_code,
"sample_rate_hertz": sample_rate_hertz,
"encoding": encoding,
}
with io.open(local_file_path, "rb") as f:
content = f.read()
audio = {"content": content}

operation = client.long_running_recognize(config, audio)

print(u"Waiting for operation to complete...")
response = operation.result()

for result in response.results:
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))


# [END speech_transcribe_async]


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
"--local_file_path", type=str, default="resources/brooklyn_bridge.raw"
)
args = parser.parse_args()

sample_long_running_recognize(args.local_file_path)


if __name__ == "__main__":
main()
91 changes: 91 additions & 0 deletions speech/v1/speech_transcribe_async_gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async_gcs")

# To install the latest published package dependency, execute the following:
# pip install google-cloud-speech

# sample-metadata
# title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
# description: Transcribe long audio file from Cloud Storage using asynchronous speech
# recognition
# usage: python3 samples/v1/speech_transcribe_async_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"]

# [START speech_transcribe_async_gcs]
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums


def sample_long_running_recognize(storage_uri):
"""
Transcribe long audio file from Cloud Storage using asynchronous speech
recognition
Args:
storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
"""

client = speech_v1.SpeechClient()

# storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw'

# Sample rate in Hertz of the audio data sent
sample_rate_hertz = 16000

# The language of the supplied audio
language_code = "en-US"

# Encoding of audio data sent. This sample sets this explicitly.
# This field is optional for FLAC and WAV audio formats.
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
config = {
"sample_rate_hertz": sample_rate_hertz,
"language_code": language_code,
"encoding": encoding,
}
audio = {"uri": storage_uri}

operation = client.long_running_recognize(config, audio)

print(u"Waiting for operation to complete...")
response = operation.result()

for result in response.results:
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))


# [END speech_transcribe_async_gcs]


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
"--storage_uri",
type=str,
default="gs://cloud-samples-data/speech/brooklyn_bridge.raw",
)
args = parser.parse_args()

sample_long_running_recognize(args.storage_uri)


if __name__ == "__main__":
main()
98 changes: 98 additions & 0 deletions speech/v1/speech_transcribe_async_word_time_offsets_gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async_word_time_offsets_gcs")

# To install the latest published package dependency, execute the following:
# pip install google-cloud-speech

# sample-metadata
# title: Getting word timestamps (Cloud Storage) (LRO)
# description: Print start and end time of each word spoken in audio file from Cloud Storage
# usage: python3 samples/v1/speech_transcribe_async_word_time_offsets_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.flac"]

# [START speech_transcribe_async_word_time_offsets_gcs]
from google.cloud import speech_v1


def sample_long_running_recognize(storage_uri):
"""
Print start and end time of each word spoken in audio file from Cloud Storage
Args:
storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
"""

client = speech_v1.SpeechClient()

# storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.flac'

# When enabled, the first result returned by the API will include a list
# of words and the start and end time offsets (timestamps) for those words.
enable_word_time_offsets = True

# The language of the supplied audio
language_code = "en-US"
config = {
"enable_word_time_offsets": enable_word_time_offsets,
"language_code": language_code,
}
audio = {"uri": storage_uri}

operation = client.long_running_recognize(config, audio)

print(u"Waiting for operation to complete...")
response = operation.result()

# The first result includes start and end time word offsets
result = response.results[0]
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))
# Print the start and end time of each word
for word in alternative.words:
print(u"Word: {}".format(word.word))
print(
u"Start time: {} seconds {} nanos".format(
word.start_time.seconds, word.start_time.nanos
)
)
print(
u"End time: {} seconds {} nanos".format(
word.end_time.seconds, word.end_time.nanos
)
)


# [END speech_transcribe_async_word_time_offsets_gcs]


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
"--storage_uri",
type=str,
default="gs://cloud-samples-data/speech/brooklyn_bridge.flac",
)
args = parser.parse_args()

sample_long_running_recognize(args.storage_uri)


if __name__ == "__main__":
main()
85 changes: 85 additions & 0 deletions speech/v1/speech_transcribe_enhanced_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_enhanced_model")

# To install the latest published package dependency, execute the following:
# pip install google-cloud-speech

# sample-metadata
# title: Using Enhanced Models (Local File)
# description: Transcribe a short audio file using an enhanced model
# usage: python3 samples/v1/speech_transcribe_enhanced_model.py [--local_file_path "resources/hello.wav"]

# [START speech_transcribe_enhanced_model]
from google.cloud import speech_v1
import io


def sample_recognize(local_file_path):
"""
Transcribe a short audio file using an enhanced model
Args:
local_file_path Path to local audio file, e.g. /path/audio.wav
"""

client = speech_v1.SpeechClient()

# local_file_path = 'resources/hello.wav'

# The enhanced model to use, e.g. phone_call
# Currently phone_call is the only model available as an enhanced model.
model = "phone_call"

# Use an enhanced model for speech recognition (when set to true).
# Project must be eligible for requesting enhanced models.
# Enhanced speech models require that you opt-in to data logging.
use_enhanced = True

# The language of the supplied audio
language_code = "en-US"
config = {
"model": model,
"use_enhanced": use_enhanced,
"language_code": language_code,
}
with io.open(local_file_path, "rb") as f:
content = f.read()
audio = {"content": content}

response = client.recognize(config, audio)
for result in response.results:
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))


# [END speech_transcribe_enhanced_model]


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--local_file_path", type=str, default="resources/hello.wav")
args = parser.parse_args()

sample_recognize(args.local_file_path)


if __name__ == "__main__":
main()
Loading

0 comments on commit e6294c2

Please sign in to comment.