-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
47 changed files
with
2,202 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async") | ||
|
||
# To install the latest published package dependency, execute the following: | ||
# pip install google-cloud-speech | ||
|
||
# sample-metadata | ||
# title: Transcribe Audio File using Long Running Operation (Local File) (LRO) | ||
# description: Transcribe a long audio file using asynchronous speech recognition | ||
# usage: python3 samples/v1/speech_transcribe_async.py [--local_file_path "resources/brooklyn_bridge.raw"] | ||
|
||
# [START speech_transcribe_async] | ||
from google.cloud import speech_v1 | ||
from google.cloud.speech_v1 import enums | ||
import io | ||
|
||
|
||
def sample_long_running_recognize(local_file_path): | ||
""" | ||
Transcribe a long audio file using asynchronous speech recognition | ||
Args: | ||
local_file_path Path to local audio file, e.g. /path/audio.wav | ||
""" | ||
|
||
client = speech_v1.SpeechClient() | ||
|
||
# local_file_path = 'resources/brooklyn_bridge.raw' | ||
|
||
# The language of the supplied audio | ||
language_code = "en-US" | ||
|
||
# Sample rate in Hertz of the audio data sent | ||
sample_rate_hertz = 16000 | ||
|
||
# Encoding of audio data sent. This sample sets this explicitly. | ||
# This field is optional for FLAC and WAV audio formats. | ||
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 | ||
config = { | ||
"language_code": language_code, | ||
"sample_rate_hertz": sample_rate_hertz, | ||
"encoding": encoding, | ||
} | ||
with io.open(local_file_path, "rb") as f: | ||
content = f.read() | ||
audio = {"content": content} | ||
|
||
operation = client.long_running_recognize(config, audio) | ||
|
||
print(u"Waiting for operation to complete...") | ||
response = operation.result() | ||
|
||
for result in response.results: | ||
# First alternative is the most probable result | ||
alternative = result.alternatives[0] | ||
print(u"Transcript: {}".format(alternative.transcript)) | ||
|
||
|
||
# [END speech_transcribe_async] | ||
|
||
|
||
def main(): | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--local_file_path", type=str, default="resources/brooklyn_bridge.raw" | ||
) | ||
args = parser.parse_args() | ||
|
||
sample_long_running_recognize(args.local_file_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async_gcs") | ||
|
||
# To install the latest published package dependency, execute the following: | ||
# pip install google-cloud-speech | ||
|
||
# sample-metadata | ||
# title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) | ||
# description: Transcribe long audio file from Cloud Storage using asynchronous speech | ||
# recognition | ||
# usage: python3 samples/v1/speech_transcribe_async_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"] | ||
|
||
# [START speech_transcribe_async_gcs] | ||
from google.cloud import speech_v1 | ||
from google.cloud.speech_v1 import enums | ||
|
||
|
||
def sample_long_running_recognize(storage_uri): | ||
""" | ||
Transcribe long audio file from Cloud Storage using asynchronous speech | ||
recognition | ||
Args: | ||
storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] | ||
""" | ||
|
||
client = speech_v1.SpeechClient() | ||
|
||
# storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw' | ||
|
||
# Sample rate in Hertz of the audio data sent | ||
sample_rate_hertz = 16000 | ||
|
||
# The language of the supplied audio | ||
language_code = "en-US" | ||
|
||
# Encoding of audio data sent. This sample sets this explicitly. | ||
# This field is optional for FLAC and WAV audio formats. | ||
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 | ||
config = { | ||
"sample_rate_hertz": sample_rate_hertz, | ||
"language_code": language_code, | ||
"encoding": encoding, | ||
} | ||
audio = {"uri": storage_uri} | ||
|
||
operation = client.long_running_recognize(config, audio) | ||
|
||
print(u"Waiting for operation to complete...") | ||
response = operation.result() | ||
|
||
for result in response.results: | ||
# First alternative is the most probable result | ||
alternative = result.alternatives[0] | ||
print(u"Transcript: {}".format(alternative.transcript)) | ||
|
||
|
||
# [END speech_transcribe_async_gcs] | ||
|
||
|
||
def main(): | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--storage_uri", | ||
type=str, | ||
default="gs://cloud-samples-data/speech/brooklyn_bridge.raw", | ||
) | ||
args = parser.parse_args() | ||
|
||
sample_long_running_recognize(args.storage_uri) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
98 changes: 98 additions & 0 deletions
98
speech/v1/speech_transcribe_async_word_time_offsets_gcs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async_word_time_offsets_gcs") | ||
|
||
# To install the latest published package dependency, execute the following: | ||
# pip install google-cloud-speech | ||
|
||
# sample-metadata | ||
# title: Getting word timestamps (Cloud Storage) (LRO) | ||
# description: Print start and end time of each word spoken in audio file from Cloud Storage | ||
# usage: python3 samples/v1/speech_transcribe_async_word_time_offsets_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.flac"] | ||
|
||
# [START speech_transcribe_async_word_time_offsets_gcs] | ||
from google.cloud import speech_v1 | ||
|
||
|
||
def sample_long_running_recognize(storage_uri): | ||
""" | ||
Print start and end time of each word spoken in audio file from Cloud Storage | ||
Args: | ||
storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] | ||
""" | ||
|
||
client = speech_v1.SpeechClient() | ||
|
||
# storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.flac' | ||
|
||
# When enabled, the first result returned by the API will include a list | ||
# of words and the start and end time offsets (timestamps) for those words. | ||
enable_word_time_offsets = True | ||
|
||
# The language of the supplied audio | ||
language_code = "en-US" | ||
config = { | ||
"enable_word_time_offsets": enable_word_time_offsets, | ||
"language_code": language_code, | ||
} | ||
audio = {"uri": storage_uri} | ||
|
||
operation = client.long_running_recognize(config, audio) | ||
|
||
print(u"Waiting for operation to complete...") | ||
response = operation.result() | ||
|
||
# The first result includes start and end time word offsets | ||
result = response.results[0] | ||
# First alternative is the most probable result | ||
alternative = result.alternatives[0] | ||
print(u"Transcript: {}".format(alternative.transcript)) | ||
# Print the start and end time of each word | ||
for word in alternative.words: | ||
print(u"Word: {}".format(word.word)) | ||
print( | ||
u"Start time: {} seconds {} nanos".format( | ||
word.start_time.seconds, word.start_time.nanos | ||
) | ||
) | ||
print( | ||
u"End time: {} seconds {} nanos".format( | ||
word.end_time.seconds, word.end_time.nanos | ||
) | ||
) | ||
|
||
|
||
# [END speech_transcribe_async_word_time_offsets_gcs] | ||
|
||
|
||
def main(): | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--storage_uri", | ||
type=str, | ||
default="gs://cloud-samples-data/speech/brooklyn_bridge.flac", | ||
) | ||
args = parser.parse_args() | ||
|
||
sample_long_running_recognize(args.storage_uri) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_enhanced_model") | ||
|
||
# To install the latest published package dependency, execute the following: | ||
# pip install google-cloud-speech | ||
|
||
# sample-metadata | ||
# title: Using Enhanced Models (Local File) | ||
# description: Transcribe a short audio file using an enhanced model | ||
# usage: python3 samples/v1/speech_transcribe_enhanced_model.py [--local_file_path "resources/hello.wav"] | ||
|
||
# [START speech_transcribe_enhanced_model] | ||
from google.cloud import speech_v1 | ||
import io | ||
|
||
|
||
def sample_recognize(local_file_path): | ||
""" | ||
Transcribe a short audio file using an enhanced model | ||
Args: | ||
local_file_path Path to local audio file, e.g. /path/audio.wav | ||
""" | ||
|
||
client = speech_v1.SpeechClient() | ||
|
||
# local_file_path = 'resources/hello.wav' | ||
|
||
# The enhanced model to use, e.g. phone_call | ||
# Currently phone_call is the only model available as an enhanced model. | ||
model = "phone_call" | ||
|
||
# Use an enhanced model for speech recognition (when set to true). | ||
# Project must be eligible for requesting enhanced models. | ||
# Enhanced speech models require that you opt-in to data logging. | ||
use_enhanced = True | ||
|
||
# The language of the supplied audio | ||
language_code = "en-US" | ||
config = { | ||
"model": model, | ||
"use_enhanced": use_enhanced, | ||
"language_code": language_code, | ||
} | ||
with io.open(local_file_path, "rb") as f: | ||
content = f.read() | ||
audio = {"content": content} | ||
|
||
response = client.recognize(config, audio) | ||
for result in response.results: | ||
# First alternative is the most probable result | ||
alternative = result.alternatives[0] | ||
print(u"Transcript: {}".format(alternative.transcript)) | ||
|
||
|
||
# [END speech_transcribe_enhanced_model] | ||
|
||
|
||
def main(): | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--local_file_path", type=str, default="resources/hello.wav") | ||
args = parser.parse_args() | ||
|
||
sample_recognize(args.local_file_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.