From 4e1fbe767c796ea1b999c54ce73c2855eef38815 Mon Sep 17 00:00:00 2001 From: Franklin Nunez <69214580+b-loved-dreamer@users.noreply.github.com> Date: Tue, 15 Jun 2021 11:02:57 -0700 Subject: [PATCH] samples: adds speech export to gcs sample (#176) --- speech/microphone/noxfile.py | 33 +++++----- speech/snippets/noxfile.py | 41 ++++++------ speech/snippets/requirements.txt | 1 + .../snippets/speech_model_adaptation_beta.py | 4 +- speech/snippets/speech_to_storage_beta.py | 59 +++++++++++++++++ .../snippets/speech_to_storage_beta_test.py | 66 +++++++++++++++++++ 6 files changed, 166 insertions(+), 38 deletions(-) create mode 100644 speech/snippets/speech_to_storage_beta.py create mode 100644 speech/snippets/speech_to_storage_beta_test.py diff --git a/speech/microphone/noxfile.py b/speech/microphone/noxfile.py index 5ff9e1db5808..3a7956b79873 100644 --- a/speech/microphone/noxfile.py +++ b/speech/microphone/noxfile.py @@ -38,17 +38,15 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - 'ignored_versions': ["2.7"], - + "ignored_versions": ["2.7"], # Old samples are opted out of enforcing Python type hints # All new samples should feature them - 'enforce_type_hints': False, - + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. - 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', # If you need to use a specific version of pip, # change pip_version_override to the string representation @@ -56,13 +54,13 @@ "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. - 'envs': {}, + "envs": {}, } try: # Ensure we can import noxfile_config in the project's directory. - sys.path.append('.') + sys.path.append(".") from noxfile_config import TEST_CONFIG_OVERRIDE except ImportError as e: print("No user noxfile_config found: detail: {}".format(e)) @@ -77,12 +75,12 @@ def get_pytest_env_vars() -> Dict[str, str]: ret = {} # Override the GCLOUD_PROJECT and the alias. - env_key = TEST_CONFIG['gcloud_project_env'] + env_key = TEST_CONFIG["gcloud_project_env"] # This should error out if not set. - ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] # Apply user supplied envs. - ret.update(TEST_CONFIG['envs']) + ret.update(TEST_CONFIG["envs"]) return ret @@ -91,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. -IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) @@ -140,7 +138,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: - if not TEST_CONFIG['enforce_type_hints']: + if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: session.install("flake8", "flake8-import-order", "flake8-annotations") @@ -149,9 +147,11 @@ def lint(session: nox.sessions.Session) -> None: args = FLAKE8_COMMON_ARGS + [ "--application-import-names", ",".join(local_names), - "." + ".", ] session.run("flake8", *args) + + # # Black # @@ -164,6 +164,7 @@ def blacken(session: nox.sessions.Session) -> None: session.run("black", *python_files) + # # Sample Tests # @@ -212,9 +213,9 @@ def py(session: nox.sessions.Session) -> None: if session.python in TESTED_VERSIONS: _session_tests(session) else: - session.skip("SKIPPED: {} tests are disabled for this sample.".format( - session.python - )) + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) # diff --git a/speech/snippets/noxfile.py b/speech/snippets/noxfile.py index 5ff9e1db5808..b3c8658a3a72 100644 --- a/speech/snippets/noxfile.py +++ b/speech/snippets/noxfile.py @@ -38,17 +38,15 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - 'ignored_versions': ["2.7"], - + "ignored_versions": ["2.7"], # Old samples are opted out of enforcing Python type hints # All new samples should feature them - 'enforce_type_hints': False, - + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. - 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', # If you need to use a specific version of pip, # change pip_version_override to the string representation @@ -56,13 +54,13 @@ "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. - 'envs': {}, + "envs": {}, } try: # Ensure we can import noxfile_config in the project's directory. - sys.path.append('.') + sys.path.append(".") from noxfile_config import TEST_CONFIG_OVERRIDE except ImportError as e: print("No user noxfile_config found: detail: {}".format(e)) @@ -77,12 +75,12 @@ def get_pytest_env_vars() -> Dict[str, str]: ret = {} # Override the GCLOUD_PROJECT and the alias. - env_key = TEST_CONFIG['gcloud_project_env'] + env_key = TEST_CONFIG["gcloud_project_env"] # This should error out if not set. - ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] # Apply user supplied envs. - ret.update(TEST_CONFIG['envs']) + ret.update(TEST_CONFIG["envs"]) return ret @@ -91,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. -IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) @@ -140,7 +138,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: - if not TEST_CONFIG['enforce_type_hints']: + if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: session.install("flake8", "flake8-import-order", "flake8-annotations") @@ -149,9 +147,11 @@ def lint(session: nox.sessions.Session) -> None: args = FLAKE8_COMMON_ARGS + [ "--application-import-names", ",".join(local_names), - "." + ".", ] session.run("flake8", *args) + + # # Black # @@ -164,6 +164,7 @@ def blacken(session: nox.sessions.Session) -> None: session.run("black", *python_files) + # # Sample Tests # @@ -172,7 +173,9 @@ def blacken(session: nox.sessions.Session) -> None: PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None: +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: if TEST_CONFIG["pip_version_override"]: pip_version = TEST_CONFIG["pip_version_override"] session.install(f"pip=={pip_version}") @@ -202,7 +205,7 @@ def _session_tests(session: nox.sessions.Session, post_install: Callable = None) # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) @@ -212,9 +215,9 @@ def py(session: nox.sessions.Session) -> None: if session.python in TESTED_VERSIONS: _session_tests(session) else: - session.skip("SKIPPED: {} tests are disabled for this sample.".format( - session.python - )) + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) # @@ -223,7 +226,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/speech/snippets/requirements.txt b/speech/snippets/requirements.txt index c9627288f9f0..3f7351a6750f 100644 --- a/speech/snippets/requirements.txt +++ b/speech/snippets/requirements.txt @@ -1 +1,2 @@ google-cloud-speech==2.4.0 +google-cloud-storage==1.38.0 diff --git a/speech/snippets/speech_model_adaptation_beta.py b/speech/snippets/speech_model_adaptation_beta.py index 95f03c4ace4c..49fd4ad28a92 100644 --- a/speech/snippets/speech_model_adaptation_beta.py +++ b/speech/snippets/speech_model_adaptation_beta.py @@ -64,9 +64,7 @@ def transcribe_with_model_adaptation( # class and phrase set to send a transcription request with speech adaptation # Speech adaptation configuration - speech_adaptation = speech.SpeechAdaptation( - phrase_set_references=[phrase_set_name] - ) + speech_adaptation = speech.SpeechAdaptation(phrase_set_references=[phrase_set_name]) # speech configuration object config = speech.RecognitionConfig( diff --git a/speech/snippets/speech_to_storage_beta.py b/speech/snippets/speech_to_storage_beta.py new file mode 100644 index 000000000000..d6adcc8ae51b --- /dev/null +++ b/speech/snippets/speech_to_storage_beta.py @@ -0,0 +1,59 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_with_speech_to_storage_beta] + +from google.cloud import speech_v1p1beta1 as speech + + +def export_transcript_to_storage_beta( + input_storage_uri, output_storage_uri, encoding, sample_rate_hertz, language_code +): + + # input_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + audio = speech.RecognitionAudio(uri=input_storage_uri) + + # Pass in the URI of the Cloud Storage bucket to hold the transcription + output_config = speech.TranscriptOutputConfig(gcs_uri=output_storage_uri) + + # Speech configuration object + config = speech.RecognitionConfig( + encoding=encoding, + sample_rate_hertz=sample_rate_hertz, + language_code=language_code, + ) + + # Compose the long-running request + request = speech.LongRunningRecognizeRequest( + audio=audio, config=config, output_config=output_config + ) + + # Create the speech client + speech_client = speech.SpeechClient() + + operation = speech_client.long_running_recognize(request=request) + + print("Waiting for operation to complete...") + response = operation.result(timeout=90) + + # Each result is for a consecutive portion of the audio. Iterate through + # them to get the transcripts for the entire audio file. + for result in response.results: + # The first alternative is the most likely one for this portion. + print("Transcript: {}".format(result.alternatives[0].transcript)) + print("Confidence: {}".format(result.alternatives[0].confidence)) + + # [END speech_transcribe_with_speech_to_storage_beta] + return response.results[0].alternatives[0].transcript diff --git a/speech/snippets/speech_to_storage_beta_test.py b/speech/snippets/speech_to_storage_beta_test.py new file mode 100644 index 000000000000..af90719e7845 --- /dev/null +++ b/speech/snippets/speech_to_storage_beta_test.py @@ -0,0 +1,66 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +from google.cloud import speech_v1p1beta1 as speech +from google.cloud import storage +import pytest + +import speech_to_storage_beta + +STORAGE_URI = "gs://cloud-samples-data/speech/brooklyn_bridge.raw" + + +storage_client = storage.Client() + +BUCKET_UUID = str(uuid.uuid4())[:8] +BUCKET_NAME = f"speech-{BUCKET_UUID}" +BUCKET_PREFIX = "export-transcript-output-test" +DELIMETER = None + +INPUT_STORAGE_URI = "gs://cloud-samples-data/speech/commercial_mono.wav" +OUTPUT_STORAGE_URI = f"gs://{BUCKET_NAME}/{BUCKET_PREFIX}" +encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 +sample_rate_hertz = 8000 +language_code = "en-US" + + +def test_export_transcript_to_storage_beta(bucket, capsys): + transcript = speech_to_storage_beta.export_transcript_to_storage_beta( + INPUT_STORAGE_URI, + OUTPUT_STORAGE_URI, + encoding, + sample_rate_hertz, + language_code, + ) + assert "I'm here" in transcript + + +@pytest.fixture +def bucket(): + """Yields a bucket that is deleted after the test completes.""" + bucket = None + while bucket is None or bucket.exists(): + bucket = storage_client.bucket(BUCKET_NAME) + bucket.storage_class = "COLDLINE" + storage_client.create_bucket(bucket, location="us") + yield bucket + + blobs = storage_client.list_blobs(BUCKET_NAME, prefix=BUCKET_PREFIX) + + for blob in blobs: + blob.delete() + + bucket.delete(force=True)