From f7205dd6bef7c2b33cb2d1daa171a141f854fdf4 Mon Sep 17 00:00:00 2001 From: Mike <45373284+munkhuushmgl@users.noreply.github.com> Date: Mon, 2 Mar 2020 15:29:23 -0800 Subject: [PATCH] Translate: migrate published v3 translate batch samples [(#2914)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2914) * Translate: migrate published b v3 tch samples * added missing requirements * extended wait time * inlined some vals and specified input and output * added link to supported file types & modified default values of input uri * fixed small nit --- .../translate_v3_batch_translate_text.py | 55 ++++++++++++++ .../translate_v3_batch_translate_text_test.py | 43 +++++++++++ ...e_v3_batch_translate_text_with_glossary.py | 74 +++++++++++++++++++ ...batch_translate_text_with_glossary_test.py | 64 ++++++++++++++++ 4 files changed, 236 insertions(+) create mode 100644 translation/samples/snippets/translate_v3_batch_translate_text.py create mode 100644 translation/samples/snippets/translate_v3_batch_translate_text_test.py create mode 100644 translation/samples/snippets/translate_v3_batch_translate_text_with_glossary.py create mode 100644 translation/samples/snippets/translate_v3_batch_translate_text_with_glossary_test.py diff --git a/translation/samples/snippets/translate_v3_batch_translate_text.py b/translation/samples/snippets/translate_v3_batch_translate_text.py new file mode 100644 index 000000000000..b4650e054b24 --- /dev/null +++ b/translation/samples/snippets/translate_v3_batch_translate_text.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START translate_v3_batch_translate_text] +from google.cloud import translate + + +def batch_translate_text( + input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", + output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", + project_id="YOUR_PROJECT_ID" +): + """Translates a batch of texts on GCS and stores the result in a GCS location.""" + + client = translate.TranslationServiceClient() + + location = "us-central1" + # Supported file types: https://cloud.google.com/translate/docs/supported-formats + gcs_source = {"input_uri": input_uri} + + input_configs_element = { + "gcs_source": gcs_source, + "mime_type": "text/plain" # Can be "text/plain" or "text/html". + } + gcs_destination = {"output_uri_prefix": output_uri} + output_config = {"gcs_destination": gcs_destination} + parent = client.location_path(project_id, location) + + # Supported language codes: https://cloud.google.com/translate/docs/language + operation = client.batch_translate_text( + parent=parent, + source_language_code="en", + target_language_codes=["ja"], # Up to 10 language codes here. + input_configs=[input_configs_element], + output_config=output_config) + + print(u"Waiting for operation to complete...") + response = operation.result(90) + + print(u"Total Characters: {}".format(response.total_characters)) + print(u"Translated Characters: {}".format(response.translated_characters)) + + +# [END translate_v3_batch_translate_text] diff --git a/translation/samples/snippets/translate_v3_batch_translate_text_test.py b/translation/samples/snippets/translate_v3_batch_translate_text_test.py new file mode 100644 index 000000000000..c6e1e6e352e7 --- /dev/null +++ b/translation/samples/snippets/translate_v3_batch_translate_text_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pytest +import translate_v3_batch_translate_text +import uuid +from google.cloud import storage + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] + + +@pytest.fixture(scope="function") +def bucket(): + """Create a temporary bucket to store annotation output.""" + bucket_name = str(uuid.uuid1()) + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name) + + yield bucket + + bucket.delete(force=True) + + +def test_batch_translate_text(capsys, bucket): + translate_v3_batch_translate_text.batch_translate_text( + "gs://cloud-samples-data/translation/text.txt", + "gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), + PROJECT_ID, + ) + out, _ = capsys.readouterr() + assert "Total Characters" in out diff --git a/translation/samples/snippets/translate_v3_batch_translate_text_with_glossary.py b/translation/samples/snippets/translate_v3_batch_translate_text_with_glossary.py new file mode 100644 index 000000000000..fa08d641b5ec --- /dev/null +++ b/translation/samples/snippets/translate_v3_batch_translate_text_with_glossary.py @@ -0,0 +1,74 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START translate_v3_batch_translate_text_with_glossary] +from google.cloud import translate + + +def batch_translate_text_with_glossary( + input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", + output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", + project_id="YOUR_PROJECT_ID", + glossary_id="YOUR_GLOSSARY_ID", +): + """Translates a batch of texts on GCS and stores the result in a GCS location. + Glossary is applied for translation.""" + + client = translate.TranslationServiceClient() + + # Supported language codes: https://cloud.google.com/translate/docs/languages + location = "us-central1" + + # Supported file types: https://cloud.google.com/translate/docs/supported-formats + gcs_source = {"input_uri": input_uri} + + input_configs_element = { + "gcs_source": gcs_source, + "mime_type": "text/plain" # Can be "text/plain" or "text/html". + } + gcs_destination = {"output_uri_prefix": output_uri} + output_config = {"gcs_destination": gcs_destination} + + parent = client.location_path(project_id, location) + + # glossary is a custom dictionary Translation API uses + # to translate the domain-specific terminology. + glossary_path = client.glossary_path( + project_id, "us-central1", glossary_id # The location of the glossary + ) + + glossary_config = translate.types.TranslateTextGlossaryConfig( + glossary=glossary_path + ) + + glossaries = {"ja": glossary_config} # target lang as key + + operation = client.batch_translate_text( + parent=parent, + source_language_code="en", + target_language_codes=["ja"], # Up to 10 language codes here. + input_configs=[input_configs_element], + glossaries=glossaries, + output_config=output_config, + ) + + print(u"Waiting for operation to complete...") + response = operation.result(120) + + print(u"Total Characters: {}".format(response.total_characters)) + print(u"Translated Characters: {}".format(response.translated_characters)) + + +# [END translate_v3_batch_translate_text_with_glossary] diff --git a/translation/samples/snippets/translate_v3_batch_translate_text_with_glossary_test.py b/translation/samples/snippets/translate_v3_batch_translate_text_with_glossary_test.py new file mode 100644 index 000000000000..87d97b73c044 --- /dev/null +++ b/translation/samples/snippets/translate_v3_batch_translate_text_with_glossary_test.py @@ -0,0 +1,64 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import pytest +import uuid +import translate_v3_batch_translate_text_with_glossary +import translate_v3_create_glossary +import translate_v3_delete_glossary +from google.cloud import storage + +PROJECT_ID = os.environ["GCLOUD_PROJECT"] +GLOSSARY_INPUT_URI = "gs://cloud-samples-data/translation/glossary_ja.csv" + + +@pytest.fixture(scope="session") +def glossary(): + """Get the ID of a glossary available to session (do not mutate/delete).""" + glossary_id = "must-start-with-letters-" + str(uuid.uuid1()) + translate_v3_create_glossary.create_glossary( + PROJECT_ID, GLOSSARY_INPUT_URI, glossary_id + ) + + yield glossary_id + + try: + translate_v3_delete_glossary.delete_glossary(PROJECT_ID, glossary_id) + except Exception: + pass + + +@pytest.fixture(scope="function") +def bucket(): + """Create a temporary bucket to store annotation output.""" + bucket_name = str(uuid.uuid1()) + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name) + + yield bucket + + bucket.delete(force=True) + + +def test_batch_translate_text_with_glossary(capsys, bucket, glossary): + translate_v3_batch_translate_text_with_glossary.batch_translate_text_with_glossary( + "gs://cloud-samples-data/translation/text_with_glossary.txt", + "gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), + PROJECT_ID, + glossary, + ) + + out, _ = capsys.readouterr() + assert "Total Characters: 9" in out