Add generated code samples. (#9153)

GoogleCloudPlatform · Mar 13, 2023 · e6294c2 · e6294c2
1 parent 7ecd999
commit e6294c2
Show file tree

Hide file tree

Showing 47 changed files with 2,202 additions and 0 deletions.
diff --git a/speech/resources/brooklyn_bridge.flac b/speech/resources/brooklyn_bridge.flac
diff --git a/speech/resources/brooklyn_bridge.mp3 b/speech/resources/brooklyn_bridge.mp3
diff --git a/speech/resources/brooklyn_bridge.raw b/speech/resources/brooklyn_bridge.raw
diff --git a/speech/resources/brooklyn_bridge.wav b/speech/resources/brooklyn_bridge.wav
diff --git a/speech/resources/commercial_mono.wav b/speech/resources/commercial_mono.wav
diff --git a/speech/resources/hello.raw b/speech/resources/hello.raw
diff --git a/speech/resources/hello.wav b/speech/resources/hello.wav
diff --git a/speech/resources/multi.flac b/speech/resources/multi.flac
diff --git a/speech/resources/multi.wav b/speech/resources/multi.wav
diff --git a/speech/v1/speech_transcribe_async.py b/speech/v1/speech_transcribe_async.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample ("LongRunningPromise",  "speech_transcribe_async")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-speech
+
+# sample-metadata
+#   title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
+#   description: Transcribe a long audio file using asynchronous speech recognition
+#   usage: python3 samples/v1/speech_transcribe_async.py [--local_file_path "resources/brooklyn_bridge.raw"]
+
+# [START speech_transcribe_async]
+from google.cloud import speech_v1
+from google.cloud.speech_v1 import enums
+import io
+
+
+def sample_long_running_recognize(local_file_path):
+    """
+    Transcribe a long audio file using asynchronous speech recognition
+
+    Args:
+      local_file_path Path to local audio file, e.g. /path/audio.wav
+    """
+
+    client = speech_v1.SpeechClient()
+
+    # local_file_path = 'resources/brooklyn_bridge.raw'
+
+    # The language of the supplied audio
+    language_code = "en-US"
+
+    # Sample rate in Hertz of the audio data sent
+    sample_rate_hertz = 16000
+
+    # Encoding of audio data sent. This sample sets this explicitly.
+    # This field is optional for FLAC and WAV audio formats.
+    encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
+    config = {
+        "language_code": language_code,
+        "sample_rate_hertz": sample_rate_hertz,
+        "encoding": encoding,
+    }
+    with io.open(local_file_path, "rb") as f:
+        content = f.read()
+    audio = {"content": content}
+
+    operation = client.long_running_recognize(config, audio)
+
+    print(u"Waiting for operation to complete...")
+    response = operation.result()
+
+    for result in response.results:
+        # First alternative is the most probable result
+        alternative = result.alternatives[0]
+        print(u"Transcript: {}".format(alternative.transcript))
+
+
+# [END speech_transcribe_async]
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--local_file_path", type=str, default="resources/brooklyn_bridge.raw"
+    )
+    args = parser.parse_args()
+
+    sample_long_running_recognize(args.local_file_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/speech/v1/speech_transcribe_async_gcs.py b/speech/v1/speech_transcribe_async_gcs.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample ("LongRunningPromise",  "speech_transcribe_async_gcs")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-speech
+
+# sample-metadata
+#   title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
+#   description: Transcribe long audio file from Cloud Storage using asynchronous speech
+#     recognition
+#   usage: python3 samples/v1/speech_transcribe_async_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"]
+
+# [START speech_transcribe_async_gcs]
+from google.cloud import speech_v1
+from google.cloud.speech_v1 import enums
+
+
+def sample_long_running_recognize(storage_uri):
+    """
+    Transcribe long audio file from Cloud Storage using asynchronous speech
+    recognition
+
+    Args:
+      storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
+    """
+
+    client = speech_v1.SpeechClient()
+
+    # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw'
+
+    # Sample rate in Hertz of the audio data sent
+    sample_rate_hertz = 16000
+
+    # The language of the supplied audio
+    language_code = "en-US"
+
+    # Encoding of audio data sent. This sample sets this explicitly.
+    # This field is optional for FLAC and WAV audio formats.
+    encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
+    config = {
+        "sample_rate_hertz": sample_rate_hertz,
+        "language_code": language_code,
+        "encoding": encoding,
+    }
+    audio = {"uri": storage_uri}
+
+    operation = client.long_running_recognize(config, audio)
+
+    print(u"Waiting for operation to complete...")
+    response = operation.result()
+
+    for result in response.results:
+        # First alternative is the most probable result
+        alternative = result.alternatives[0]
+        print(u"Transcript: {}".format(alternative.transcript))
+
+
+# [END speech_transcribe_async_gcs]
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--storage_uri",
+        type=str,
+        default="gs://cloud-samples-data/speech/brooklyn_bridge.raw",
+    )
+    args = parser.parse_args()
+
+    sample_long_running_recognize(args.storage_uri)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/speech/v1/speech_transcribe_async_word_time_offsets_gcs.py b/speech/v1/speech_transcribe_async_word_time_offsets_gcs.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample ("LongRunningPromise",  "speech_transcribe_async_word_time_offsets_gcs")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-speech
+
+# sample-metadata
+#   title: Getting word timestamps (Cloud Storage) (LRO)
+#   description: Print start and end time of each word spoken in audio file from Cloud Storage
+#   usage: python3 samples/v1/speech_transcribe_async_word_time_offsets_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.flac"]
+
+# [START speech_transcribe_async_word_time_offsets_gcs]
+from google.cloud import speech_v1
+
+
+def sample_long_running_recognize(storage_uri):
+    """
+    Print start and end time of each word spoken in audio file from Cloud Storage
+
+    Args:
+      storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
+    """
+
+    client = speech_v1.SpeechClient()
+
+    # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.flac'
+
+    # When enabled, the first result returned by the API will include a list
+    # of words and the start and end time offsets (timestamps) for those words.
+    enable_word_time_offsets = True
+
+    # The language of the supplied audio
+    language_code = "en-US"
+    config = {
+        "enable_word_time_offsets": enable_word_time_offsets,
+        "language_code": language_code,
+    }
+    audio = {"uri": storage_uri}
+
+    operation = client.long_running_recognize(config, audio)
+
+    print(u"Waiting for operation to complete...")
+    response = operation.result()
+
+    # The first result includes start and end time word offsets
+    result = response.results[0]
+    # First alternative is the most probable result
+    alternative = result.alternatives[0]
+    print(u"Transcript: {}".format(alternative.transcript))
+    # Print the start and end time of each word
+    for word in alternative.words:
+        print(u"Word: {}".format(word.word))
+        print(
+            u"Start time: {} seconds {} nanos".format(
+                word.start_time.seconds, word.start_time.nanos
+            )
+        )
+        print(
+            u"End time: {} seconds {} nanos".format(
+                word.end_time.seconds, word.end_time.nanos
+            )
+        )
+
+
+# [END speech_transcribe_async_word_time_offsets_gcs]
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--storage_uri",
+        type=str,
+        default="gs://cloud-samples-data/speech/brooklyn_bridge.flac",
+    )
+    args = parser.parse_args()
+
+    sample_long_running_recognize(args.storage_uri)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/speech/v1/speech_transcribe_enhanced_model.py b/speech/v1/speech_transcribe_enhanced_model.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DO NOT EDIT! This is a generated sample ("Request",  "speech_transcribe_enhanced_model")
+
+# To install the latest published package dependency, execute the following:
+#   pip install google-cloud-speech
+
+# sample-metadata
+#   title: Using Enhanced Models (Local File)
+#   description: Transcribe a short audio file using an enhanced model
+#   usage: python3 samples/v1/speech_transcribe_enhanced_model.py [--local_file_path "resources/hello.wav"]
+
+# [START speech_transcribe_enhanced_model]
+from google.cloud import speech_v1
+import io
+
+
+def sample_recognize(local_file_path):
+    """
+    Transcribe a short audio file using an enhanced model
+
+    Args:
+      local_file_path Path to local audio file, e.g. /path/audio.wav
+    """
+
+    client = speech_v1.SpeechClient()
+
+    # local_file_path = 'resources/hello.wav'
+
+    # The enhanced model to use, e.g. phone_call
+    # Currently phone_call is the only model available as an enhanced model.
+    model = "phone_call"
+
+    # Use an enhanced model for speech recognition (when set to true).
+    # Project must be eligible for requesting enhanced models.
+    # Enhanced speech models require that you opt-in to data logging.
+    use_enhanced = True
+
+    # The language of the supplied audio
+    language_code = "en-US"
+    config = {
+        "model": model,
+        "use_enhanced": use_enhanced,
+        "language_code": language_code,
+    }
+    with io.open(local_file_path, "rb") as f:
+        content = f.read()
+    audio = {"content": content}
+
+    response = client.recognize(config, audio)
+    for result in response.results:
+        # First alternative is the most probable result
+        alternative = result.alternatives[0]
+        print(u"Transcript: {}".format(alternative.transcript))
+
+
+# [END speech_transcribe_enhanced_model]
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_file_path", type=str, default="resources/hello.wav")
+    args = parser.parse_args()
+
+    sample_recognize(args.local_file_path)
+
+
+if __name__ == "__main__":
+    main()