diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index eaf23aba0d71..8d0e50c0eb32 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -55,6 +55,7 @@ /kms/**/** @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/python-samples-reviewers /kubernetes_engine/**/* @GoogleCloudPlatform/python-samples-reviewers /kubernetes_engine/django_tutorial/**/* @glasnt @GoogleCloudPlatform/python-samples-reviewers +/language/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /media_cdn/**/* @justin-mp @msampathkumar @GoogleCloudPlatform/python-samples-reviewers /memorystore/**/* @GoogleCloudPlatform/python-samples-reviewers /ml_engine/**/* @ivanmkc @GoogleCloudPlatform/python-samples-reviewers diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index 6a8ef3d7f341..70830901f191 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -85,6 +85,10 @@ assign_issues_by: - 'api: cloudiot' to: - GoogleCloudPlatform/api-iot +- labels: + - 'api: language' + to: + - GoogleCloudPlatform/dee-data-ai - labels: - 'api: ml' to: diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index 9fba9f2ee475..0dcaff7d0df6 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -18,6 +18,8 @@ ignoreFiles: - "**/ghcnd-stations.txt" - "texttospeech/snippets/resources/example.txt" - "texttospeech/snippets/resources/hello.txt" + - "language/**/resources/*.txt" + - "language/snippets/classify_text/resources/texts/*.txt" ignoreLicenseYear: true diff --git a/language/AUTHORING_GUIDE.md b/language/AUTHORING_GUIDE.md new file mode 100644 index 000000000000..8249522ffc2d --- /dev/null +++ b/language/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/language/CONTRIBUTING.md b/language/CONTRIBUTING.md new file mode 100644 index 000000000000..f5fe2e6baf13 --- /dev/null +++ b/language/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/CONTRIBUTING.md \ No newline at end of file diff --git a/language/snippets/README.md b/language/snippets/README.md new file mode 100644 index 000000000000..5689d7c21ab3 --- /dev/null +++ b/language/snippets/README.md @@ -0,0 +1,15 @@ +# Google Cloud Natural Language API examples + +[![Open in Cloud Shell][shell_img]][shell_link] + +[shell_img]: http://gstatic.com/cloudssh/images/open-btn.png +[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/README.md + +This directory contains Python examples that use the +[Google Cloud Natural Language API](https://cloud.google.com/natural-language/). + +- [api](api) has a simple command line tool that shows off the API's features. + +- [sentiment](sentiment) contains the [Sentiment Analysis + Tutorial](https://cloud.google.com/natural-language/docs/sentiment-tutorial) +code as used within the documentation. diff --git a/language/snippets/api/README.rst b/language/snippets/api/README.rst new file mode 100644 index 000000000000..0d9d945111a5 --- /dev/null +++ b/language/snippets/api/README.rst @@ -0,0 +1,98 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Natural Language API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/api/README.rst + + +This directory contains samples for Google Cloud Natural Language API. The `Google Cloud Natural Language API`_ provides natural language understanding technologies to developers, including sentiment analysis, entity recognition, and syntax analysis. This API is part of the larger Cloud Machine Learning API. + + + + +.. _Google Cloud Natural Language API: https://cloud.google.com/natural-language/docs/ + + + + + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Samples +------------------------------------------------------------------------------- + +Analyze syntax ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/api/analyze.py,language/api/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python analyze.py + + usage: analyze.py [-h] {entities,sentiment,syntax} text + + Analyzes text using the Google Cloud Natural Language API. + + positional arguments: + {entities,sentiment,syntax} + text + + optional arguments: + -h, --help show this help message and exit + + + + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/language/snippets/api/README.rst.in b/language/snippets/api/README.rst.in new file mode 100644 index 000000000000..f3195edf6b42 --- /dev/null +++ b/language/snippets/api/README.rst.in @@ -0,0 +1,22 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Natural Language API + short_name: Cloud Natural Language API + url: https://cloud.google.com/natural-language/docs/ + description: > + The `Google Cloud Natural Language API`_ provides natural language + understanding technologies to developers, including sentiment analysis, + entity recognition, and syntax analysis. This API is part of the larger + Cloud Machine Learning API. + +setup: +- auth +- install_deps + +samples: +- name: Analyze syntax + file: analyze.py + show_help: true + +folder: language/api \ No newline at end of file diff --git a/language/snippets/api/analyze.py b/language/snippets/api/analyze.py new file mode 100644 index 000000000000..be8652269b96 --- /dev/null +++ b/language/snippets/api/analyze.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Copyright 2016 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Analyzes text using the Google Cloud Natural Language API.""" + +import argparse +import json +import sys + +import googleapiclient.discovery + + +def get_native_encoding_type(): + """Returns the encoding type that matches Python's native strings.""" + if sys.maxunicode == 65535: + return "UTF16" + else: + return "UTF32" + + +def analyze_entities(text, encoding="UTF32"): + body = { + "document": {"type": "PLAIN_TEXT", "content": text}, + "encoding_type": encoding, + } + + service = googleapiclient.discovery.build("language", "v1") + + request = service.documents().analyzeEntities(body=body) + response = request.execute() + + return response + + +def analyze_sentiment(text, encoding="UTF32"): + body = { + "document": {"type": "PLAIN_TEXT", "content": text}, + "encoding_type": encoding, + } + + service = googleapiclient.discovery.build("language", "v1") + + request = service.documents().analyzeSentiment(body=body) + response = request.execute() + + return response + + +def analyze_syntax(text, encoding="UTF32"): + body = { + "document": {"type": "PLAIN_TEXT", "content": text}, + "encoding_type": encoding, + } + + service = googleapiclient.discovery.build("language", "v1") + + request = service.documents().analyzeSyntax(body=body) + response = request.execute() + + return response + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("command", choices=["entities", "sentiment", "syntax"]) + parser.add_argument("text") + + args = parser.parse_args() + + if args.command == "entities": + result = analyze_entities(args.text, get_native_encoding_type()) + elif args.command == "sentiment": + result = analyze_sentiment(args.text, get_native_encoding_type()) + elif args.command == "syntax": + result = analyze_syntax(args.text, get_native_encoding_type()) + + print(json.dumps(result, indent=2)) diff --git a/language/snippets/api/analyze_test.py b/language/snippets/api/analyze_test.py new file mode 100644 index 000000000000..0b4a72bbab86 --- /dev/null +++ b/language/snippets/api/analyze_test.py @@ -0,0 +1,278 @@ +# Copyright 2016 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import textwrap + +import analyze + + +def test_analyze_entities(): + result = analyze.analyze_entities( + "Tom Sawyer is a book written by a guy known as Mark Twain." + ) + + assert result["language"] == "en" + entities = result["entities"] + assert len(entities) + subject = entities[0] + assert subject["type"] == "PERSON" + assert subject["name"].startswith("Tom") + + +def test_analyze_sentiment(capsys): + result = analyze.analyze_sentiment("your face is really ugly and i hate it.") + + sentiment = result["documentSentiment"] + assert sentiment["score"] < 0 + assert sentiment["magnitude"] < 1 + + result = analyze.analyze_sentiment( + "cheerio, mate - I greatly admire the pallor of your visage, and your angle of repose leaves little room for improvement." + ) + + sentiment = result["documentSentiment"] + assert sentiment["score"] > 0 + assert sentiment["magnitude"] < 1 + + +def test_analyze_syntax(capsys): + result = analyze.analyze_syntax( + textwrap.dedent( + """\ + Keep away from people who try to belittle your ambitions. Small people + always do that, but the really great make you feel that you, too, can + become great. + - Mark Twain""" + ) + ) + + assert len(result["tokens"]) + first_token = result["tokens"][0] + assert first_token["text"]["content"] == "Keep" + assert first_token["partOfSpeech"]["tag"] == "VERB" + assert len(result["sentences"]) > 1 + assert result["language"] == "en" + + +def test_analyze_syntax_utf8(): + """Demonstrate the interpretation of the offsets when encoding=utf8. + + UTF8 is a variable-length encoding, where each character is at least 8 + bits. The offsets we get should be the index of the first byte of the + character. + """ + test_string = "a \u00e3 \u0201 \U0001f636 b" + byte_array = test_string.encode("utf8") + result = analyze.analyze_syntax(test_string, encoding="UTF8") + tokens = result["tokens"] + + assert tokens[0]["text"]["content"] == "a" + offset = tokens[0]["text"].get("beginOffset", 0) + assert ( + byte_array[offset : offset + 1].decode("utf8") == tokens[0]["text"]["content"] + ) + + assert tokens[1]["text"]["content"] == "\u00e3" + offset = tokens[1]["text"].get("beginOffset", 0) + assert ( + byte_array[offset : offset + 2].decode("utf8") == tokens[1]["text"]["content"] + ) + + assert tokens[2]["text"]["content"] == "\u0201" + offset = tokens[2]["text"].get("beginOffset", 0) + assert ( + byte_array[offset : offset + 2].decode("utf8") == tokens[2]["text"]["content"] + ) + + assert tokens[3]["text"]["content"] == "\U0001f636" + offset = tokens[3]["text"].get("beginOffset", 0) + assert ( + byte_array[offset : offset + 4].decode("utf8") == tokens[3]["text"]["content"] + ) + + # This demonstrates that the offset takes into account the variable-length + # characters before the target token. + assert tokens[4]["text"]["content"] == "b" + offset = tokens[4]["text"].get("beginOffset", 0) + # 'b' is only one byte long + assert ( + byte_array[offset : offset + 1].decode("utf8") == tokens[4]["text"]["content"] + ) + + +def test_analyze_syntax_utf16(): + """Demonstrate the interpretation of the offsets when encoding=utf16. + + UTF16 is a variable-length encoding, where each character is at least 16 + bits. The returned offsets will be the index of the first 2-byte character + of the token. + """ + test_string = "a \u00e3 \u0201 \U0001f636 b" + byte_array = test_string.encode("utf16") + # Remove the byte order marker, which the offsets don't account for + byte_array = byte_array[2:] + result = analyze.analyze_syntax(test_string, encoding="UTF16") + tokens = result["tokens"] + + assert tokens[0]["text"]["content"] == "a" + # The offset is an offset into an array where each entry is 16 bits. Since + # we have an 8-bit array, the offsets should be doubled to index into our + # array. + offset = 2 * tokens[0]["text"].get("beginOffset", 0) + assert ( + byte_array[offset : offset + 2].decode("utf16") == tokens[0]["text"]["content"] + ) + + assert tokens[1]["text"]["content"] == "\u00e3" + offset = 2 * tokens[1]["text"].get("beginOffset", 0) + # A UTF16 character with a low codepoint is 16 bits (2 bytes) long, so + # slice out 2 bytes starting from the offset. Then interpret the bytes as + # utf16 for comparison. + assert ( + byte_array[offset : offset + 2].decode("utf16") == tokens[1]["text"]["content"] + ) + + assert tokens[2]["text"]["content"] == "\u0201" + offset = 2 * tokens[2]["text"].get("beginOffset", 0) + # A UTF16 character with a low codepoint is 16 bits (2 bytes) long, so + # slice out 2 bytes starting from the offset. Then interpret the bytes as + # utf16 for comparison. + assert ( + byte_array[offset : offset + 2].decode("utf16") == tokens[2]["text"]["content"] + ) + + assert tokens[3]["text"]["content"] == "\U0001f636" + offset = 2 * tokens[3]["text"].get("beginOffset", 0) + # A UTF16 character with a high codepoint is 32 bits (4 bytes) long, so + # slice out 4 bytes starting from the offset. Then interpret those bytes as + # utf16 for comparison. + assert ( + byte_array[offset : offset + 4].decode("utf16") == tokens[3]["text"]["content"] + ) + + # This demonstrates that the offset takes into account the variable-length + # characters before the target token. + assert tokens[4]["text"]["content"] == "b" + offset = 2 * tokens[4]["text"].get("beginOffset", 0) + # Even though 'b' is only one byte long, utf16 still encodes it using 16 + # bits + assert ( + byte_array[offset : offset + 2].decode("utf16") == tokens[4]["text"]["content"] + ) + + +def test_annotate_text_utf32(): + """Demonstrate the interpretation of the offsets when encoding=utf32. + + UTF32 is a fixed-length encoding, where each character is exactly 32 bits. + The returned offsets will be the index of the first 4-byte character + of the token. + + Python unicode objects index by the interpreted unicode character. This + means a given unicode character only ever takes up one slot in a unicode + string. This is equivalent to indexing into a UTF32 string, where all + characters are a fixed length and thus will only ever take up one slot. + + Thus, if you're indexing into a python unicode object, you can set + encoding to UTF32 to index directly into the unicode object (as opposed to + the byte arrays, as these examples do). + + Nonetheless, this test still demonstrates indexing into the byte array, for + consistency. Note that you could just index into the origin test_string + unicode object with the raw offset returned by the api (ie without + multiplying it by 4, as it is below). + """ + test_string = "a \u00e3 \u0201 \U0001f636 b" + byte_array = test_string.encode("utf32") + # Remove the byte order marker, which the offsets don't account for + byte_array = byte_array[4:] + result = analyze.analyze_syntax(test_string, encoding="UTF32") + tokens = result["tokens"] + + assert tokens[0]["text"]["content"] == "a" + # The offset is an offset into an array where each entry is 32 bits. Since + # we have an 8-bit array, the offsets should be quadrupled to index into + # our array. + offset = 4 * tokens[0]["text"].get("beginOffset", 0) + assert ( + byte_array[offset : offset + 4].decode("utf32") == tokens[0]["text"]["content"] + ) + + assert tokens[1]["text"]["content"] == "\u00e3" + offset = 4 * tokens[1]["text"].get("beginOffset", 0) + # A UTF32 character with a low codepoint is 32 bits (4 bytes) long, so + # slice out 4 bytes starting from the offset. Then interpret the bytes as + # utf32 for comparison. + assert ( + byte_array[offset : offset + 4].decode("utf32") == tokens[1]["text"]["content"] + ) + + assert tokens[2]["text"]["content"] == "\u0201" + offset = 4 * tokens[2]["text"].get("beginOffset", 0) + # A UTF32 character with a low codepoint is 32 bits (4 bytes) long, so + # slice out 4 bytes starting from the offset. Then interpret the bytes as + # utf32 for comparison. + assert ( + byte_array[offset : offset + 4].decode("utf32") == tokens[2]["text"]["content"] + ) + + assert tokens[3]["text"]["content"] == "\U0001f636" + offset = 4 * tokens[3]["text"].get("beginOffset", 0) + # A UTF32 character with a high codepoint is 32 bits (4 bytes) long, so + # slice out 4 bytes starting from the offset. Then interpret those bytes as + # utf32 for comparison. + assert ( + byte_array[offset : offset + 4].decode("utf32") == tokens[3]["text"]["content"] + ) + + # This demonstrates that the offset takes into account the variable-length + # characters before the target token. + assert tokens[4]["text"]["content"] == "b" + offset = 4 * tokens[4]["text"].get("beginOffset", 0) + # Even though 'b' is only one byte long, utf32 still encodes it using 32 + # bits + assert ( + byte_array[offset : offset + 4].decode("utf32") == tokens[4]["text"]["content"] + ) + + +def test_annotate_text_utf32_directly_index_into_unicode(): + """Demonstrate using offsets directly, using encoding=utf32. + + See the explanation for test_annotate_text_utf32. Essentially, indexing + into a utf32 array is equivalent to indexing into a python unicode object. + """ + test_string = "a \u00e3 \u0201 \U0001f636 b" + result = analyze.analyze_syntax(test_string, encoding="UTF32") + tokens = result["tokens"] + + assert tokens[0]["text"]["content"] == "a" + offset = tokens[0]["text"].get("beginOffset", 0) + assert test_string[offset] == tokens[0]["text"]["content"] + + assert tokens[1]["text"]["content"] == "\u00e3" + offset = tokens[1]["text"].get("beginOffset", 0) + assert test_string[offset] == tokens[1]["text"]["content"] + + assert tokens[2]["text"]["content"] == "\u0201" + offset = tokens[2]["text"].get("beginOffset", 0) + assert test_string[offset] == tokens[2]["text"]["content"] + + # Temporarily disabled + # assert tokens[3]['text']['content'] == u'\U0001f636' + # offset = tokens[3]['text'].get('beginOffset', 0) + # assert test_string[offset] == tokens[3]['text']['content'] + + # assert tokens[4]['text']['content'] == u'b' + # offset = tokens[4]['text'].get('beginOffset', 0) + # assert test_string[offset] == tokens[4]['text']['content'] diff --git a/language/snippets/api/requirements-test.txt b/language/snippets/api/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/language/snippets/api/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/language/snippets/api/requirements.txt b/language/snippets/api/requirements.txt new file mode 100644 index 000000000000..8cef7de4e373 --- /dev/null +++ b/language/snippets/api/requirements.txt @@ -0,0 +1,3 @@ +google-api-python-client==2.65.0 +google-auth==2.14.0 +google-auth-httplib2==0.1.0 diff --git a/language/snippets/classify_text/README.rst b/language/snippets/classify_text/README.rst new file mode 100644 index 000000000000..757debb0946f --- /dev/null +++ b/language/snippets/classify_text/README.rst @@ -0,0 +1,130 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Natural Language API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/classify_text/README.rst + + +This directory contains samples for Google Cloud Natural Language API. The `Google Cloud Natural Language API`_ provides natural language understanding technologies to developers. + +This tutorial demostrates how to use the `classify_text` method to classify content category of text files, and use the result to compare texts by their similarity to each other. See the `tutorial page`_ for details about this sample. + +.. _tutorial page: https://cloud.google.com/natural-language/docs/classify-text-tutorial + + + + +.. _Google Cloud Natural Language API: https://cloud.google.com/natural-language/docs/ + + + + + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Samples +------------------------------------------------------------------------------- + +Classify Text Tutorial ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/classify_text/classify_text_tutorial.py,language/classify_text/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python classify_text_tutorial.py + + usage: classify_text_tutorial.py [-h] + {classify,index,query,query-category} ... + + Using the classify_text method to find content categories of text files, + Then use the content category labels to compare text similarity. + + For more information, see the tutorial page at + https://cloud.google.com/natural-language/docs/classify-text-tutorial. + + positional arguments: + {classify,index,query,query-category} + classify Classify the input text into categories. + index Classify each text file in a directory and write the + results to the index_file. + query Find the indexed files that are the most similar to + the query text. + query-category Find the indexed files that are the most similar to + the query label. The list of all available labels: + https://cloud.google.com/natural- + language/docs/categories + + optional arguments: + -h, --help show this help message and exit + + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/language/snippets/classify_text/README.rst.in b/language/snippets/classify_text/README.rst.in new file mode 100644 index 000000000000..14ee6dc9aa45 --- /dev/null +++ b/language/snippets/classify_text/README.rst.in @@ -0,0 +1,28 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Natural Language API + short_name: Cloud Natural Language API + url: https://cloud.google.com/natural-language/docs/ + description: > + The `Google Cloud Natural Language API`_ provides natural language + understanding technologies to developers. + + + This tutorial demostrates how to use the `classify_text` method to classify content category of text files, and use the result to compare texts by their similarity to each other. See the `tutorial page`_ for details about this sample. + + + .. _tutorial page: https://cloud.google.com/natural-language/docs/classify-text-tutorial + +setup: +- auth +- install_deps + +samples: +- name: Classify Text Tutorial + file: classify_text_tutorial.py + show_help: true + +cloud_client_library: true + +folder: language/classify_text \ No newline at end of file diff --git a/language/snippets/classify_text/classify_text_tutorial.py b/language/snippets/classify_text/classify_text_tutorial.py new file mode 100644 index 000000000000..11adb1be4d5e --- /dev/null +++ b/language/snippets/classify_text/classify_text_tutorial.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python + +# Copyright 2017 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Using the classify_text method to find content categories of text files, +Then use the content category labels to compare text similarity. + +For more information, see the tutorial page at +https://cloud.google.com/natural-language/docs/classify-text-tutorial. +""" + +# [START language_classify_text_tutorial_imports] +import argparse +import io +import json +import os + +from google.cloud import language_v1 +import numpy +import six + +# [END language_classify_text_tutorial_imports] + + +# [START language_classify_text_tutorial_classify] +def classify(text, verbose=True): + """Classify the input text into categories.""" + + language_client = language_v1.LanguageServiceClient() + + document = language_v1.Document( + content=text, type_=language_v1.Document.Type.PLAIN_TEXT + ) + response = language_client.classify_text(request={"document": document}) + categories = response.categories + + result = {} + + for category in categories: + # Turn the categories into a dictionary of the form: + # {category.name: category.confidence}, so that they can + # be treated as a sparse vector. + result[category.name] = category.confidence + + if verbose: + print(text) + for category in categories: + print("=" * 20) + print("{:<16}: {}".format("category", category.name)) + print("{:<16}: {}".format("confidence", category.confidence)) + + return result + + +# [END language_classify_text_tutorial_classify] + + +# [START language_classify_text_tutorial_index] +def index(path, index_file): + """Classify each text file in a directory and write + the results to the index_file. + """ + + result = {} + for filename in os.listdir(path): + file_path = os.path.join(path, filename) + + if not os.path.isfile(file_path): + continue + + try: + with io.open(file_path, "r") as f: + text = f.read() + categories = classify(text, verbose=False) + + result[filename] = categories + except Exception: + print("Failed to process {}".format(file_path)) + + with io.open(index_file, "w", encoding="utf-8") as f: + f.write(json.dumps(result, ensure_ascii=False)) + + print("Texts indexed in file: {}".format(index_file)) + return result + + +# [END language_classify_text_tutorial_index] + + +def split_labels(categories): + """The category labels are of the form "/a/b/c" up to three levels, + for example "/Computers & Electronics/Software", and these labels + are used as keys in the categories dictionary, whose values are + confidence scores. + + The split_labels function splits the keys into individual levels + while duplicating the confidence score, which allows a natural + boost in how we calculate similarity when more levels are in common. + + Example: + If we have + + x = {"/a/b/c": 0.5} + y = {"/a/b": 0.5} + z = {"/a": 0.5} + + Then x and y are considered more similar than y and z. + """ + _categories = {} + for name, confidence in six.iteritems(categories): + labels = [label for label in name.split("/") if label] + for label in labels: + _categories[label] = confidence + + return _categories + + +def similarity(categories1, categories2): + """Cosine similarity of the categories treated as sparse vectors.""" + categories1 = split_labels(categories1) + categories2 = split_labels(categories2) + + norm1 = numpy.linalg.norm(list(categories1.values())) + norm2 = numpy.linalg.norm(list(categories2.values())) + + # Return the smallest possible similarity if either categories is empty. + if norm1 == 0 or norm2 == 0: + return 0.0 + + # Compute the cosine similarity. + dot = 0.0 + for label, confidence in six.iteritems(categories1): + dot += confidence * categories2.get(label, 0.0) + + return dot / (norm1 * norm2) + + +# [START language_classify_text_tutorial_query] +def query(index_file, text, n_top=3): + """Find the indexed files that are the most similar to + the query text. + """ + + with io.open(index_file, "r") as f: + index = json.load(f) + + # Get the categories of the query text. + query_categories = classify(text, verbose=False) + + similarities = [] + for filename, categories in six.iteritems(index): + similarities.append((filename, similarity(query_categories, categories))) + + similarities = sorted(similarities, key=lambda p: p[1], reverse=True) + + print("=" * 20) + print("Query: {}\n".format(text)) + for category, confidence in six.iteritems(query_categories): + print("\tCategory: {}, confidence: {}".format(category, confidence)) + print("\nMost similar {} indexed texts:".format(n_top)) + for filename, sim in similarities[:n_top]: + print("\tFilename: {}".format(filename)) + print("\tSimilarity: {}".format(sim)) + print("\n") + + return similarities + + +# [END language_classify_text_tutorial_query] + + +# [START language_classify_text_tutorial_query_category] +def query_category(index_file, category_string, n_top=3): + """Find the indexed files that are the most similar to + the query label. + + The list of all available labels: + https://cloud.google.com/natural-language/docs/categories + """ + + with io.open(index_file, "r") as f: + index = json.load(f) + + # Make the category_string into a dictionary so that it is + # of the same format as what we get by calling classify. + query_categories = {category_string: 1.0} + + similarities = [] + for filename, categories in six.iteritems(index): + similarities.append((filename, similarity(query_categories, categories))) + + similarities = sorted(similarities, key=lambda p: p[1], reverse=True) + + print("=" * 20) + print("Query: {}\n".format(category_string)) + print("\nMost similar {} indexed texts:".format(n_top)) + for filename, sim in similarities[:n_top]: + print("\tFilename: {}".format(filename)) + print("\tSimilarity: {}".format(sim)) + print("\n") + + return similarities + + +# [END language_classify_text_tutorial_query_category] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + subparsers = parser.add_subparsers(dest="command") + classify_parser = subparsers.add_parser("classify", help=classify.__doc__) + classify_parser.add_argument( + "text", + help="The text to be classified. " "The text needs to have at least 20 tokens.", + ) + index_parser = subparsers.add_parser("index", help=index.__doc__) + index_parser.add_argument( + "path", help="The directory that contains " "text files to be indexed." + ) + index_parser.add_argument( + "--index_file", help="Filename for the output JSON.", default="index.json" + ) + query_parser = subparsers.add_parser("query", help=query.__doc__) + query_parser.add_argument("index_file", help="Path to the index JSON file.") + query_parser.add_argument("text", help="Query text.") + query_category_parser = subparsers.add_parser( + "query-category", help=query_category.__doc__ + ) + query_category_parser.add_argument( + "index_file", help="Path to the index JSON file." + ) + query_category_parser.add_argument("category", help="Query category.") + + args = parser.parse_args() + + if args.command == "classify": + classify(args.text) + if args.command == "index": + index(args.path, args.index_file) + if args.command == "query": + query(args.index_file, args.text) + if args.command == "query-category": + query_category(args.index_file, args.category) diff --git a/language/snippets/classify_text/classify_text_tutorial_test.py b/language/snippets/classify_text/classify_text_tutorial_test.py new file mode 100644 index 000000000000..1f095d0ebb64 --- /dev/null +++ b/language/snippets/classify_text/classify_text_tutorial_test.py @@ -0,0 +1,89 @@ +# Copyright 2016 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import classify_text_tutorial + +OUTPUT = "index.json" +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") +QUERY_TEXT = """Google Home enables users to speak voice commands to interact +with services through the Home\'s intelligent personal assistant called +Google Assistant. A large number of services, both in-house and third-party, +are integrated, allowing users to listen to music, look at videos or photos, +or receive news updates entirely by voice.""" +QUERY_CATEGORY = "/Computers & Electronics/Software" + + +@pytest.fixture(scope="session") +def index_file(tmpdir_factory): + temp_file = tmpdir_factory.mktemp("tmp").join(OUTPUT) + temp_out = temp_file.strpath + classify_text_tutorial.index(os.path.join(RESOURCES, "texts"), temp_out) + return temp_file + + +def test_classify(capsys): + with open(os.path.join(RESOURCES, "query_text1.txt"), "r") as f: + text = f.read() + classify_text_tutorial.classify(text) + out, err = capsys.readouterr() + assert "category" in out + + +def test_index(capsys, tmpdir): + temp_dir = tmpdir.mkdir("tmp") + temp_out = temp_dir.join(OUTPUT).strpath + + classify_text_tutorial.index(os.path.join(RESOURCES, "texts"), temp_out) + out, err = capsys.readouterr() + + assert OUTPUT in out + assert len(temp_dir.listdir()) == 1 + + +def test_query_text(capsys, index_file): + temp_out = index_file.strpath + + classify_text_tutorial.query(temp_out, QUERY_TEXT) + out, err = capsys.readouterr() + + assert "Filename: cloud_computing.txt" in out + + +def test_query_category(capsys, index_file): + temp_out = index_file.strpath + + classify_text_tutorial.query_category(temp_out, QUERY_CATEGORY) + out, err = capsys.readouterr() + + assert "Filename: cloud_computing.txt" in out + + +def test_split_labels(): + categories = {"/a/b/c": 1.0} + split_categories = {"a": 1.0, "b": 1.0, "c": 1.0} + assert classify_text_tutorial.split_labels(categories) == split_categories + + +def test_similarity(): + empty_categories = {} + categories1 = {"/a/b/c": 1.0, "/d/e": 1.0} + categories2 = {"/a/b": 1.0} + + assert classify_text_tutorial.similarity(empty_categories, categories1) == 0.0 + assert classify_text_tutorial.similarity(categories1, categories1) > 0.99 + assert classify_text_tutorial.similarity(categories1, categories2) > 0 + assert classify_text_tutorial.similarity(categories1, categories2) < 1 diff --git a/language/snippets/classify_text/requirements-test.txt b/language/snippets/classify_text/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/language/snippets/classify_text/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/language/snippets/classify_text/requirements.txt b/language/snippets/classify_text/requirements.txt new file mode 100644 index 000000000000..f53284c69dd3 --- /dev/null +++ b/language/snippets/classify_text/requirements.txt @@ -0,0 +1,3 @@ +google-cloud-language==2.6.1 +numpy==1.23.4; python_version > '3.7' +numpy===1.21.4; python_version == '3.7' diff --git a/language/snippets/classify_text/resources/query_text1.txt b/language/snippets/classify_text/resources/query_text1.txt new file mode 100644 index 000000000000..304727304d1d --- /dev/null +++ b/language/snippets/classify_text/resources/query_text1.txt @@ -0,0 +1 @@ +Google Home enables users to speak voice commands to interact with services through the Home's intelligent personal assistant called Google Assistant. A large number of services, both in-house and third-party, are integrated, allowing users to listen to music, look at videos or photos, or receive news updates entirely by voice. diff --git a/language/snippets/classify_text/resources/query_text2.txt b/language/snippets/classify_text/resources/query_text2.txt new file mode 100644 index 000000000000..eef573c60077 --- /dev/null +++ b/language/snippets/classify_text/resources/query_text2.txt @@ -0,0 +1 @@ +The Hitchhiker's Guide to the Galaxy is the first of five books in the Hitchhiker's Guide to the Galaxy comedy science fiction "trilogy" by Douglas Adams (with the sixth written by Eoin Colfer). \ No newline at end of file diff --git a/language/snippets/classify_text/resources/query_text3.txt b/language/snippets/classify_text/resources/query_text3.txt new file mode 100644 index 000000000000..1337d3c64770 --- /dev/null +++ b/language/snippets/classify_text/resources/query_text3.txt @@ -0,0 +1 @@ +Goodnight Moon is an American children's picture book written by Margaret Wise Brown and illustrated by Clement Hurd. It was published on September 3, 1947, and is a highly acclaimed example of a bedtime story. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/android.txt b/language/snippets/classify_text/resources/texts/android.txt new file mode 100644 index 000000000000..29dc1449c55c --- /dev/null +++ b/language/snippets/classify_text/resources/texts/android.txt @@ -0,0 +1 @@ +Android is a mobile operating system developed by Google, based on the Linux kernel and designed primarily for touchscreen mobile devices such as smartphones and tablets. diff --git a/language/snippets/classify_text/resources/texts/cat_in_the_hat.txt b/language/snippets/classify_text/resources/texts/cat_in_the_hat.txt new file mode 100644 index 000000000000..bb5a853c694d --- /dev/null +++ b/language/snippets/classify_text/resources/texts/cat_in_the_hat.txt @@ -0,0 +1 @@ +The Cat in the Hat is a children's book written and illustrated by Theodor Geisel under the pen name Dr. Seuss and first published in 1957. The story centers on a tall anthropomorphic cat, who wears a red and white-striped hat and a red bow tie. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/cloud_computing.txt b/language/snippets/classify_text/resources/texts/cloud_computing.txt new file mode 100644 index 000000000000..88172adf1f46 --- /dev/null +++ b/language/snippets/classify_text/resources/texts/cloud_computing.txt @@ -0,0 +1 @@ +Cloud computing is a computing-infrastructure and software model for enabling ubiquitous access to shared pools of configurable resources (such as computer networks, servers, storage, applications and services), which can be rapidly provisioned with minimal management effort, often over the Internet. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/eclipse.txt b/language/snippets/classify_text/resources/texts/eclipse.txt new file mode 100644 index 000000000000..5d16217e520a --- /dev/null +++ b/language/snippets/classify_text/resources/texts/eclipse.txt @@ -0,0 +1 @@ +A solar eclipse (as seen from the planet Earth) is a type of eclipse that occurs when the Moon passes between the Sun and Earth, and when the Moon fully or partially blocks (occults) the Sun. diff --git a/language/snippets/classify_text/resources/texts/eclipse_of_the_sun.txt b/language/snippets/classify_text/resources/texts/eclipse_of_the_sun.txt new file mode 100644 index 000000000000..7236fc9d806a --- /dev/null +++ b/language/snippets/classify_text/resources/texts/eclipse_of_the_sun.txt @@ -0,0 +1 @@ +Eclipse of the Sun is the debut novel by English author Phil Whitaker. It won the 1997 John Llewellyn Rhys Prize a Betty Trask Award in 1998, and was shortlisted for the 1997 Whitbread First Novel Award. diff --git a/language/snippets/classify_text/resources/texts/email.txt b/language/snippets/classify_text/resources/texts/email.txt new file mode 100644 index 000000000000..3d430527b755 --- /dev/null +++ b/language/snippets/classify_text/resources/texts/email.txt @@ -0,0 +1 @@ +Electronic mail (email or e-mail) is a method of exchanging messages between people using electronics. Email first entered substantial use in the 1960s and by the mid-1970s had taken the form now recognized as email. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/gcp.txt b/language/snippets/classify_text/resources/texts/gcp.txt new file mode 100644 index 000000000000..1ed09b2c758a --- /dev/null +++ b/language/snippets/classify_text/resources/texts/gcp.txt @@ -0,0 +1 @@ +Google Cloud Platform, offered by Google, is a suite of cloud computing services that runs on the same infrastructure that Google uses internally for its end-user products, such as Google Search and YouTube. Alongside a set of management tools, it provides a series of modular cloud services including computing, data storage, data analytics and machine learning. diff --git a/language/snippets/classify_text/resources/texts/gmail.txt b/language/snippets/classify_text/resources/texts/gmail.txt new file mode 100644 index 000000000000..89c9704b117c --- /dev/null +++ b/language/snippets/classify_text/resources/texts/gmail.txt @@ -0,0 +1 @@ +Gmail is a free, advertising-supported email service developed by Google. Users can access Gmail on the web and through mobile apps for Android and iOS, as well as through third-party programs that synchronize email content through POP or IMAP protocols. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/google.txt b/language/snippets/classify_text/resources/texts/google.txt new file mode 100644 index 000000000000..06828635931e --- /dev/null +++ b/language/snippets/classify_text/resources/texts/google.txt @@ -0,0 +1 @@ +Google is an American multinational technology company that specializes in Internet-related services and products. These include online advertising technologies, search, cloud computing, software, and hardware. diff --git a/language/snippets/classify_text/resources/texts/harry_potter.txt b/language/snippets/classify_text/resources/texts/harry_potter.txt new file mode 100644 index 000000000000..339c10af05a2 --- /dev/null +++ b/language/snippets/classify_text/resources/texts/harry_potter.txt @@ -0,0 +1 @@ +Harry Potter is a series of fantasy novels written by British author J. K. Rowling. The novels chronicle the life of a young wizard, Harry Potter, and his friends Hermione Granger and Ron Weasley, all of whom are students at Hogwarts School of Witchcraft and Wizardry. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/matilda.txt b/language/snippets/classify_text/resources/texts/matilda.txt new file mode 100644 index 000000000000..e1539d7ee88d --- /dev/null +++ b/language/snippets/classify_text/resources/texts/matilda.txt @@ -0,0 +1 @@ +Matilda is a book by British writer Roald Dahl. Matilda won the Children's Book Award in 1999. It was published in 1988 by Jonathan Cape in London, with 232 pages and illustrations by Quentin Blake. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/mobile_phone.txt b/language/snippets/classify_text/resources/texts/mobile_phone.txt new file mode 100644 index 000000000000..725e22ef3a91 --- /dev/null +++ b/language/snippets/classify_text/resources/texts/mobile_phone.txt @@ -0,0 +1 @@ +A mobile phone is a portable device that can make and receive calls over a radio frequency link while the user is moving within a telephone service area. The radio frequency link establishes a connection to the switching systems of a mobile phone operator, which provides access to the public switched telephone network (PSTN). \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/mr_fox.txt b/language/snippets/classify_text/resources/texts/mr_fox.txt new file mode 100644 index 000000000000..354feced2af1 --- /dev/null +++ b/language/snippets/classify_text/resources/texts/mr_fox.txt @@ -0,0 +1 @@ +Fantastic Mr Fox is a children's novel written by British author Roald Dahl. It was published in 1970, by George Allen & Unwin in the UK and Alfred A. Knopf in the U.S., with illustrations by Donald Chaffin. \ No newline at end of file diff --git a/language/snippets/classify_text/resources/texts/wireless.txt b/language/snippets/classify_text/resources/texts/wireless.txt new file mode 100644 index 000000000000..d742331c4644 --- /dev/null +++ b/language/snippets/classify_text/resources/texts/wireless.txt @@ -0,0 +1 @@ +Wireless communication, or sometimes simply wireless, is the transfer of information or power between two or more points that are not connected by an electrical conductor. The most common wireless technologies use radio waves. \ No newline at end of file diff --git a/language/snippets/cloud-client/.DS_Store b/language/snippets/cloud-client/.DS_Store new file mode 100644 index 000000000000..f344c851a0ee Binary files /dev/null and b/language/snippets/cloud-client/.DS_Store differ diff --git a/language/snippets/cloud-client/v1/README.rst b/language/snippets/cloud-client/v1/README.rst new file mode 100644 index 000000000000..e0d719464c57 --- /dev/null +++ b/language/snippets/cloud-client/v1/README.rst @@ -0,0 +1,99 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Natural Language API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/cloud-client/v1/README.rst + + +This directory contains samples for Google Cloud Natural Language API. The `Google Cloud Natural Language API`_ provides natural language understanding technologies to developers, including sentiment analysis, entity recognition, and syntax analysis. This API is part of the larger Cloud Machine Learning API. + +- See the `migration guide`_ for information about migrating to Python client library v0.26.1. + +.. _migration guide: https://cloud.google.com/natural-language/docs/python-client-migration + + + + +.. _Google Cloud Natural Language API: https://cloud.google.com/natural-language/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the sample. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Sample +------------------------------------------------------------------------------- + +Quickstart ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/cloud-client/v1/quickstart.py,language/cloud-client/v1/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python quickstart.py + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/language/snippets/cloud-client/v1/README.rst.in b/language/snippets/cloud-client/v1/README.rst.in new file mode 100644 index 000000000000..9bf38dbf9105 --- /dev/null +++ b/language/snippets/cloud-client/v1/README.rst.in @@ -0,0 +1,30 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Natural Language API + short_name: Cloud Natural Language API + url: https://cloud.google.com/natural-language/docs/ + description: > + The `Google Cloud Natural Language API`_ provides natural language + understanding technologies to developers, including sentiment analysis, + entity recognition, and syntax analysis. This API is part of the larger + Cloud Machine Learning API. + + + - See the `migration guide`_ for information about migrating to Python client library v0.26.1. + + + .. _migration guide: https://cloud.google.com/natural-language/docs/python-client-migration + +setup: +- auth +- install_deps + +samples: +- name: Quickstart + file: quickstart.py + show_help: true + +cloud_client_library: true + +folder: language/cloud-client/v1 \ No newline at end of file diff --git a/language/snippets/cloud-client/v1/quickstart.py b/language/snippets/cloud-client/v1/quickstart.py new file mode 100644 index 000000000000..b3532f2d56fc --- /dev/null +++ b/language/snippets/cloud-client/v1/quickstart.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def run_quickstart(): + # [START language_quickstart] + # Imports the Google Cloud client library + # [START language_python_migration_imports] + from google.cloud import language_v1 + + # [END language_python_migration_imports] + # Instantiates a client + # [START language_python_migration_client] + client = language_v1.LanguageServiceClient() + # [END language_python_migration_client] + + # The text to analyze + text = "Hello, world!" + document = language_v1.Document( + content=text, type_=language_v1.Document.Type.PLAIN_TEXT + ) + + # Detects the sentiment of the text + sentiment = client.analyze_sentiment( + request={"document": document} + ).document_sentiment + + print("Text: {}".format(text)) + print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude)) + # [END language_quickstart] + + +if __name__ == "__main__": + run_quickstart() diff --git a/language/snippets/cloud-client/v1/quickstart_test.py b/language/snippets/cloud-client/v1/quickstart_test.py new file mode 100644 index 000000000000..065ff2f7409b --- /dev/null +++ b/language/snippets/cloud-client/v1/quickstart_test.py @@ -0,0 +1,22 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import quickstart + + +def test_quickstart(capsys): + quickstart.run_quickstart() + out, _ = capsys.readouterr() + assert "Sentiment" in out diff --git a/language/snippets/cloud-client/v1/requirements-test.txt b/language/snippets/cloud-client/v1/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/language/snippets/cloud-client/v1/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/language/snippets/cloud-client/v1/requirements.txt b/language/snippets/cloud-client/v1/requirements.txt new file mode 100644 index 000000000000..c3458e3d62f8 --- /dev/null +++ b/language/snippets/cloud-client/v1/requirements.txt @@ -0,0 +1 @@ +google-cloud-language==2.6.1 diff --git a/language/snippets/cloud-client/v1/resources/text.txt b/language/snippets/cloud-client/v1/resources/text.txt new file mode 100644 index 000000000000..97a1cea02b7a --- /dev/null +++ b/language/snippets/cloud-client/v1/resources/text.txt @@ -0,0 +1 @@ +President Obama is speaking at the White House. \ No newline at end of file diff --git a/language/snippets/cloud-client/v1/set_endpoint.py b/language/snippets/cloud-client/v1/set_endpoint.py new file mode 100644 index 000000000000..c49537a58b81 --- /dev/null +++ b/language/snippets/cloud-client/v1/set_endpoint.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def set_endpoint(): + """Change your endpoint""" + # [START language_set_endpoint] + # Imports the Google Cloud client library + from google.cloud import language_v1 + + client_options = {"api_endpoint": "eu-language.googleapis.com:443"} + + # Instantiates a client + client = language_v1.LanguageServiceClient(client_options=client_options) + # [END language_set_endpoint] + + # The text to analyze + document = language_v1.Document( + content="Hello, world!", type_=language_v1.Document.Type.PLAIN_TEXT + ) + + # Detects the sentiment of the text + sentiment = client.analyze_sentiment( + request={"document": document} + ).document_sentiment + + print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude)) + + +if __name__ == "__main__": + set_endpoint() diff --git a/language/snippets/cloud-client/v1/set_endpoint_test.py b/language/snippets/cloud-client/v1/set_endpoint_test.py new file mode 100644 index 000000000000..817748b12be4 --- /dev/null +++ b/language/snippets/cloud-client/v1/set_endpoint_test.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import set_endpoint + + +def test_set_endpoint(capsys): + set_endpoint.set_endpoint() + + out, _ = capsys.readouterr() + assert "Sentiment" in out diff --git a/language/snippets/generated-samples/v1/language_sentiment_text.py b/language/snippets/generated-samples/v1/language_sentiment_text.py new file mode 100644 index 000000000000..13447d17c3b3 --- /dev/null +++ b/language/snippets/generated-samples/v1/language_sentiment_text.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "analyze_sentiment") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +import sys + +# isort: split +# [START language_sentiment_text] + +from google.cloud import language_v1 +import six + + +def sample_analyze_sentiment(content): + + client = language_v1.LanguageServiceClient() + + # content = 'Your text to analyze, e.g. Hello, world!' + + if isinstance(content, six.binary_type): + content = content.decode("utf-8") + + type_ = language_v1.Document.Type.PLAIN_TEXT + document = {"type_": type_, "content": content} + + response = client.analyze_sentiment(request={"document": document}) + sentiment = response.document_sentiment + print("Score: {}".format(sentiment.score)) + print("Magnitude: {}".format(sentiment.magnitude)) + + +# [END language_sentiment_text] + + +def main(): + # FIXME: Convert argv from strings to the correct types. + sample_analyze_sentiment(*sys.argv[1:]) + + +if __name__ == "__main__": + main() diff --git a/language/snippets/generated-samples/v1/language_sentiment_text_test.py b/language/snippets/generated-samples/v1/language_sentiment_text_test.py new file mode 100644 index 000000000000..fd89f626516f --- /dev/null +++ b/language/snippets/generated-samples/v1/language_sentiment_text_test.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 Google, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import language_sentiment_text + + +def test_analyze_sentiment_text_positive(capsys): + language_sentiment_text.sample_analyze_sentiment("Happy Happy Joy Joy") + out, _ = capsys.readouterr() + assert "Score: 0." in out + + +def test_analyze_sentiment_text_negative(capsys): + language_sentiment_text.sample_analyze_sentiment("Angry Angry Sad Sad") + out, _ = capsys.readouterr() + assert "Score: -0." in out diff --git a/language/snippets/generated-samples/v1/requirements-test.txt b/language/snippets/generated-samples/v1/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/language/snippets/generated-samples/v1/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/language/snippets/generated-samples/v1/requirements.txt b/language/snippets/generated-samples/v1/requirements.txt new file mode 100644 index 000000000000..c3458e3d62f8 --- /dev/null +++ b/language/snippets/generated-samples/v1/requirements.txt @@ -0,0 +1 @@ +google-cloud-language==2.6.1 diff --git a/language/snippets/sentiment/README.md b/language/snippets/sentiment/README.md new file mode 100644 index 000000000000..313817ef2fe1 --- /dev/null +++ b/language/snippets/sentiment/README.md @@ -0,0 +1,53 @@ +# Introduction + +[![Open in Cloud Shell][shell_img]][shell_link] + +[shell_img]: http://gstatic.com/cloudssh/images/open-btn.png +[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=language/sentiment/README.md + +This sample contains the code referenced in the +[Sentiment Analysis Tutorial](http://cloud.google.com/natural-language/docs/sentiment-tutorial) +within the Google Cloud Natural Language API Documentation. A full walkthrough of this sample +is located within the documentation. + +This sample is a simple illustration of how to construct a sentiment analysis +request and process a response using the API. + +## Prerequisites + +Set up your +[Cloud Natural Language API project](https://cloud.google.com/natural-language/docs/getting-started#set_up_a_project) +, which includes: + +* Enabling the Natural Language API +* Setting up a service account +* Ensuring you've properly set up your `GOOGLE_APPLICATION_CREDENTIALS` for proper + authentication to the service. + +## Download the Code + +``` +$ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git +$ cd python-docs-samples/language/sentiment +``` + +## Run the Code + +Open a sample folder, create a virtualenv, install dependencies, and run the sample: + +``` +$ virtualenv env +$ source env/bin/activate +(env)$ pip install -r requirements.txt +``` + +### Usage + +This sample provides four sample movie reviews which you can +provide to the sample on the command line. (You can also +pass your own text files.) + +``` +(env)$ python sentiment_analysis.py textfile.txt +Sentiment: score of -0.1 with magnitude of 6.7 +``` diff --git a/language/snippets/sentiment/requirements-test.txt b/language/snippets/sentiment/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/language/snippets/sentiment/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/language/snippets/sentiment/requirements.txt b/language/snippets/sentiment/requirements.txt new file mode 100644 index 000000000000..c3458e3d62f8 --- /dev/null +++ b/language/snippets/sentiment/requirements.txt @@ -0,0 +1 @@ +google-cloud-language==2.6.1 diff --git a/language/snippets/sentiment/resources/mixed.txt b/language/snippets/sentiment/resources/mixed.txt new file mode 100644 index 000000000000..d4a42aa2928e --- /dev/null +++ b/language/snippets/sentiment/resources/mixed.txt @@ -0,0 +1,20 @@ +I really wanted to love 'Bladerunner' but ultimately I couldn't get +myself to appreciate it fully. However, you may like it if you're into +science fiction, especially if you're interested in the philosophical +exploration of what it means to be human or machine. Some of the gizmos +like the flying cars and the Vouight-Kampff machine (which seemed very +steampunk), were quite cool. + +I did find the plot pretty slow and but the dialogue and action sequences +were good. Unlike most science fiction films, this one was mostly quiet, and +not all that much happened, except during the last 15 minutes. I didn't +understand why a unicorn was in the movie. The visual effects were fantastic, +however, and the musical score and overall mood was quite interesting. +A futurist Los Angeles that was both highly polished and also falling apart +reminded me of 'Outland.' Certainly, the style of the film made up for +many of its pedantic plot holes. + +If you want your sci-fi to be lasers and spaceships, 'Bladerunner' may +disappoint you. But if you want it to make you think, this movie may +be worth the money. + diff --git a/language/snippets/sentiment/resources/neg.txt b/language/snippets/sentiment/resources/neg.txt new file mode 100644 index 000000000000..5dcbec0f8c5f --- /dev/null +++ b/language/snippets/sentiment/resources/neg.txt @@ -0,0 +1,4 @@ +What was Hollywood thinking with this movie! I hated, +hated, hated it. BORING! I went afterwards and demanded my money back. +They refused. + diff --git a/language/snippets/sentiment/resources/neutral.txt b/language/snippets/sentiment/resources/neutral.txt new file mode 100644 index 000000000000..89839ef25cf2 --- /dev/null +++ b/language/snippets/sentiment/resources/neutral.txt @@ -0,0 +1,3 @@ +I neither liked nor disliked this movie. Parts were interesting, but +overall I was left wanting more. The acting was pretty good. + diff --git a/language/snippets/sentiment/resources/pos.txt b/language/snippets/sentiment/resources/pos.txt new file mode 100644 index 000000000000..5f211496775c --- /dev/null +++ b/language/snippets/sentiment/resources/pos.txt @@ -0,0 +1,11 @@ +`Bladerunner` is often touted as one of the best science fiction films ever +made. Indeed, it satisfies many of the requisites for good sci-fi: a future +world with flying cars and humanoid robots attempting to rebel against their +creators. But more than anything, `Bladerunner` is a fantastic exploration +of the nature of what it means to be human. If we create robots which can +think, will they become human? And if they do, what makes us unique? Indeed, +how can we be sure we're not human in any case? `Bladerunner` explored +these issues before such movies as `The Matrix,' and did so intelligently. +The visual effects and score by Vangelis set the mood. See this movie +in a dark theatre to appreciate it fully. Highly recommended! + diff --git a/language/snippets/sentiment/sentiment_analysis.py b/language/snippets/sentiment/sentiment_analysis.py new file mode 100644 index 000000000000..ff09520f6fd1 --- /dev/null +++ b/language/snippets/sentiment/sentiment_analysis.py @@ -0,0 +1,79 @@ +# Copyright 2016 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START language_sentiment_tutorial] +"""Demonstrates how to make a simple call to the Natural Language API.""" + +# [START language_sentiment_tutorial_imports] +import argparse + +from google.cloud import language_v1 + +# [END language_sentiment_tutorial_imports] + + +# [START language_sentiment_tutorial_print_result] +def print_result(annotations): + score = annotations.document_sentiment.score + magnitude = annotations.document_sentiment.magnitude + + for index, sentence in enumerate(annotations.sentences): + sentence_sentiment = sentence.sentiment.score + print( + "Sentence {} has a sentiment score of {}".format(index, sentence_sentiment) + ) + + print( + "Overall Sentiment: score of {} with magnitude of {}".format(score, magnitude) + ) + return 0 + + +# [END language_sentiment_tutorial_print_result] + + +# [START language_sentiment_tutorial_analyze_sentiment] +def analyze(movie_review_filename): + """Run a sentiment analysis request on text within a passed filename.""" + client = language_v1.LanguageServiceClient() + + with open(movie_review_filename, "r") as review_file: + # Instantiates a plain text document. + content = review_file.read() + + document = language_v1.Document( + content=content, type_=language_v1.Document.Type.PLAIN_TEXT + ) + annotations = client.analyze_sentiment(request={"document": document}) + + # Print the results + print_result(annotations) + + +# [END language_sentiment_tutorial_analyze_sentiment] + + +# [START language_sentiment_tutorial_run_application] +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "movie_review_filename", + help="The filename of the movie review you'd like to analyze.", + ) + args = parser.parse_args() + + analyze(args.movie_review_filename) +# [END language_sentiment_tutorial_run_application] +# [END language_sentiment_tutorial] diff --git a/language/snippets/sentiment/sentiment_analysis_test.py b/language/snippets/sentiment/sentiment_analysis_test.py new file mode 100644 index 000000000000..14718b20042a --- /dev/null +++ b/language/snippets/sentiment/sentiment_analysis_test.py @@ -0,0 +1,50 @@ +# Copyright 2016 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re + +from sentiment_analysis import analyze + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def test_pos(capsys): + analyze(os.path.join(RESOURCES, "pos.txt")) + out, err = capsys.readouterr() + score = float(re.search("score of (.+?) with", out).group(1)) + magnitude = float(re.search("magnitude of (.+?)", out).group(1)) + assert score * magnitude > 0 + + +def test_neg(capsys): + analyze(os.path.join(RESOURCES, "neg.txt")) + out, err = capsys.readouterr() + score = float(re.search("score of (.+?) with", out).group(1)) + magnitude = float(re.search("magnitude of (.+?)", out).group(1)) + assert score * magnitude < 0 + + +def test_mixed(capsys): + analyze(os.path.join(RESOURCES, "mixed.txt")) + out, err = capsys.readouterr() + score = float(re.search("score of (.+?) with", out).group(1)) + assert score <= 0.3 + assert score >= -0.3 + + +def test_neutral(capsys): + analyze(os.path.join(RESOURCES, "neutral.txt")) + out, err = capsys.readouterr() + magnitude = float(re.search("magnitude of (.+?)", out).group(1)) + assert magnitude <= 2.0 diff --git a/language/v1/language_classify_gcs.py b/language/v1/language_classify_gcs.py new file mode 100644 index 000000000000..e0289696fc4b --- /dev/null +++ b/language/v1/language_classify_gcs.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_classify_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Classify Content (GCS) +# description: Classifying Content in text file stored in Cloud Storage +# usage: python3 samples/v1/language_classify_gcs.py [--gcs_content_uri "gs://cloud-samples-data/language/classify-entertainment.txt"] + +# [START language_classify_gcs] +from google.cloud import language_v1 + + +def sample_classify_text(gcs_content_uri): + """ + Classifying Content in text file stored in Cloud Storage + + Args: + gcs_content_uri Google Cloud Storage URI where the file content is located. + e.g. gs://[Your Bucket]/[Path to File] + The text file must include at least 20 words. + """ + + client = language_v1.LanguageServiceClient() + + # gcs_content_uri = 'gs://cloud-samples-data/language/classify-entertainment.txt' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = { + "gcs_content_uri": gcs_content_uri, + "type_": type_, + "language": language, + } + + response = client.classify_text(request={"document": document}) + # Loop through classified categories returned from the API + for category in response.categories: + # Get the name of the category representing the document. + # See the predefined taxonomy of categories: + # https://cloud.google.com/natural-language/docs/categories + print("Category name: {}".format(category.name)) + # Get the confidence. Number representing how certain the classifier + # is that this category represents the provided text. + print("Confidence: {}".format(category.confidence)) + + +# [END language_classify_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--gcs_content_uri", + type=str, + default="gs://cloud-samples-data/language/classify-entertainment.txt", + ) + args = parser.parse_args() + + sample_classify_text(args.gcs_content_uri) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_classify_text.py b/language/v1/language_classify_text.py new file mode 100644 index 000000000000..8c28342bf655 --- /dev/null +++ b/language/v1/language_classify_text.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_classify_text") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Classify Content +# description: Classifying Content in a String +# usage: python3 samples/v1/language_classify_text.py [--text_content "That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows."] + +# [START language_classify_text] +from google.cloud import language_v1 + + +def sample_classify_text(text_content): + """ + Classifying Content in a String + + Args: + text_content The text content to analyze. + """ + + client = language_v1.LanguageServiceClient() + + # text_content = "That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows." + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = {"content": text_content, "type_": type_, "language": language} + + content_categories_version = ( + language_v1.ClassificationModelOptions.V2Model.ContentCategoriesVersion.V2 + ) + response = client.classify_text( + request={ + "document": document, + "classification_model_options": { + "v2_model": {"content_categories_version": content_categories_version} + }, + } + ) + # Loop through classified categories returned from the API + for category in response.categories: + # Get the name of the category representing the document. + # See the predefined taxonomy of categories: + # https://cloud.google.com/natural-language/docs/categories + print("Category name: {}".format(category.name)) + # Get the confidence. Number representing how certain the classifier + # is that this category represents the provided text. + print("Confidence: {}".format(category.confidence)) + + +# [END language_classify_text] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--text_content", + type=str, + default="That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows.", + ) + args = parser.parse_args() + + sample_classify_text(args.text_content) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_entities_gcs.py b/language/v1/language_entities_gcs.py new file mode 100644 index 000000000000..7ae0dbbcd0ec --- /dev/null +++ b/language/v1/language_entities_gcs.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_entities_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Entities (GCS) +# description: Analyzing Entities in text file stored in Cloud Storage +# usage: python3 samples/v1/language_entities_gcs.py [--gcs_content_uri "gs://cloud-samples-data/language/entity.txt"] + +# [START language_entities_gcs] +from google.cloud import language_v1 + + +def sample_analyze_entities(gcs_content_uri): + """ + Analyzing Entities in text file stored in Cloud Storage + + Args: + gcs_content_uri Google Cloud Storage URI where the file content is located. + e.g. gs://[Your Bucket]/[Path to File] + """ + + client = language_v1.LanguageServiceClient() + + # gcs_content_uri = 'gs://cloud-samples-data/language/entity.txt' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = { + "gcs_content_uri": gcs_content_uri, + "type_": type_, + "language": language, + } + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_entities( + request={"document": document, "encoding_type": encoding_type} + ) + # Loop through entitites returned from the API + for entity in response.entities: + print("Representative name for the entity: {}".format(entity.name)) + # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al + print("Entity type: {}".format(language_v1.Entity.Type(entity.type_).name)) + # Get the salience score associated with the entity in the [0, 1.0] range + print("Salience score: {}".format(entity.salience)) + # Loop over the metadata associated with entity. For many known entities, + # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid). + # Some entity types may have additional metadata, e.g. ADDRESS entities + # may have metadata for the address street_name, postal_code, et al. + for metadata_name, metadata_value in entity.metadata.items(): + print("{}: {}".format(metadata_name, metadata_value)) + + # Loop over the mentions of this entity in the input document. + # The API currently supports proper noun mentions. + for mention in entity.mentions: + print("Mention text: {}".format(mention.text.content)) + # Get the mention type, e.g. PROPER for proper noun + print( + "Mention type: {}".format( + language_v1.EntityMention.Type(mention.type_).name + ) + ) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_entities_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--gcs_content_uri", + type=str, + default="gs://cloud-samples-data/language/entity.txt", + ) + args = parser.parse_args() + + sample_analyze_entities(args.gcs_content_uri) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_entities_text.py b/language/v1/language_entities_text.py new file mode 100644 index 000000000000..41624bbffad9 --- /dev/null +++ b/language/v1/language_entities_text.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_entities_text") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Entities +# description: Analyzing Entities in a String +# usage: python3 samples/v1/language_entities_text.py [--text_content "California is a state."] + +# [START language_entities_text] +from google.cloud import language_v1 + + +def sample_analyze_entities(text_content): + """ + Analyzing Entities in a String + + Args: + text_content The text content to analyze + """ + + client = language_v1.LanguageServiceClient() + + # text_content = 'California is a state.' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = {"content": text_content, "type_": type_, "language": language} + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_entities( + request={"document": document, "encoding_type": encoding_type} + ) + + # Loop through entitites returned from the API + for entity in response.entities: + print("Representative name for the entity: {}".format(entity.name)) + + # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al + print("Entity type: {}".format(language_v1.Entity.Type(entity.type_).name)) + + # Get the salience score associated with the entity in the [0, 1.0] range + print("Salience score: {}".format(entity.salience)) + + # Loop over the metadata associated with entity. For many known entities, + # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid). + # Some entity types may have additional metadata, e.g. ADDRESS entities + # may have metadata for the address street_name, postal_code, et al. + for metadata_name, metadata_value in entity.metadata.items(): + print("{}: {}".format(metadata_name, metadata_value)) + + # Loop over the mentions of this entity in the input document. + # The API currently supports proper noun mentions. + for mention in entity.mentions: + print("Mention text: {}".format(mention.text.content)) + + # Get the mention type, e.g. PROPER for proper noun + print( + "Mention type: {}".format( + language_v1.EntityMention.Type(mention.type_).name + ) + ) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_entities_text] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--text_content", type=str, default="California is a state.") + args = parser.parse_args() + + sample_analyze_entities(args.text_content) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_entity_sentiment_gcs.py b/language/v1/language_entity_sentiment_gcs.py new file mode 100644 index 000000000000..df5eb2806769 --- /dev/null +++ b/language/v1/language_entity_sentiment_gcs.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_entity_sentiment_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Entity Sentiment (GCS) +# description: Analyzing Entity Sentiment in text file stored in Cloud Storage +# usage: python3 samples/v1/language_entity_sentiment_gcs.py [--gcs_content_uri "gs://cloud-samples-data/language/entity-sentiment.txt"] + +# [START language_entity_sentiment_gcs] +from google.cloud import language_v1 + + +def sample_analyze_entity_sentiment(gcs_content_uri): + """ + Analyzing Entity Sentiment in text file stored in Cloud Storage + + Args: + gcs_content_uri Google Cloud Storage URI where the file content is located. + e.g. gs://[Your Bucket]/[Path to File] + """ + + client = language_v1.LanguageServiceClient() + + # gcs_content_uri = 'gs://cloud-samples-data/language/entity-sentiment.txt' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = { + "gcs_content_uri": gcs_content_uri, + "type_": type_, + "language": language, + } + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_entity_sentiment( + request={"document": document, "encoding_type": encoding_type} + ) + # Loop through entitites returned from the API + for entity in response.entities: + print("Representative name for the entity: {}".format(entity.name)) + # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al + print("Entity type: {}".format(language_v1.Entity.Type(entity.type_).name)) + # Get the salience score associated with the entity in the [0, 1.0] range + print("Salience score: {}".format(entity.salience)) + # Get the aggregate sentiment expressed for this entity in the provided document. + sentiment = entity.sentiment + print("Entity sentiment score: {}".format(sentiment.score)) + print("Entity sentiment magnitude: {}".format(sentiment.magnitude)) + # Loop over the metadata associated with entity. For many known entities, + # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid). + # Some entity types may have additional metadata, e.g. ADDRESS entities + # may have metadata for the address street_name, postal_code, et al. + for metadata_name, metadata_value in entity.metadata.items(): + print("{} = {}".format(metadata_name, metadata_value)) + + # Loop over the mentions of this entity in the input document. + # The API currently supports proper noun mentions. + for mention in entity.mentions: + print("Mention text: {}".format(mention.text.content)) + # Get the mention type, e.g. PROPER for proper noun + print( + "Mention type: {}".format( + language_v1.EntityMention.Type(mention.type_).name + ) + ) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_entity_sentiment_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--gcs_content_uri", + type=str, + default="gs://cloud-samples-data/language/entity-sentiment.txt", + ) + args = parser.parse_args() + + sample_analyze_entity_sentiment(args.gcs_content_uri) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_entity_sentiment_text.py b/language/v1/language_entity_sentiment_text.py new file mode 100644 index 000000000000..27e06f006336 --- /dev/null +++ b/language/v1/language_entity_sentiment_text.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_entity_sentiment_text") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Entity Sentiment +# description: Analyzing Entity Sentiment in a String +# usage: python3 samples/v1/language_entity_sentiment_text.py [--text_content "Grapes are good. Bananas are bad."] + +# [START language_entity_sentiment_text] +from google.cloud import language_v1 + + +def sample_analyze_entity_sentiment(text_content): + """ + Analyzing Entity Sentiment in a String + + Args: + text_content The text content to analyze + """ + + client = language_v1.LanguageServiceClient() + + # text_content = 'Grapes are good. Bananas are bad.' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.types.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = {"content": text_content, "type_": type_, "language": language} + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_entity_sentiment( + request={"document": document, "encoding_type": encoding_type} + ) + # Loop through entitites returned from the API + for entity in response.entities: + print("Representative name for the entity: {}".format(entity.name)) + # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al + print("Entity type: {}".format(language_v1.Entity.Type(entity.type_).name)) + # Get the salience score associated with the entity in the [0, 1.0] range + print("Salience score: {}".format(entity.salience)) + # Get the aggregate sentiment expressed for this entity in the provided document. + sentiment = entity.sentiment + print("Entity sentiment score: {}".format(sentiment.score)) + print("Entity sentiment magnitude: {}".format(sentiment.magnitude)) + # Loop over the metadata associated with entity. For many known entities, + # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid). + # Some entity types may have additional metadata, e.g. ADDRESS entities + # may have metadata for the address street_name, postal_code, et al. + for metadata_name, metadata_value in entity.metadata.items(): + print("{} = {}".format(metadata_name, metadata_value)) + + # Loop over the mentions of this entity in the input document. + # The API currently supports proper noun mentions. + for mention in entity.mentions: + print("Mention text: {}".format(mention.text.content)) + # Get the mention type, e.g. PROPER for proper noun + print( + "Mention type: {}".format( + language_v1.EntityMention.Type(mention.type_).name + ) + ) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_entity_sentiment_text] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--text_content", type=str, default="Grapes are good. Bananas are bad." + ) + args = parser.parse_args() + + sample_analyze_entity_sentiment(args.text_content) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_sentiment_gcs.py b/language/v1/language_sentiment_gcs.py new file mode 100644 index 000000000000..f297c3867426 --- /dev/null +++ b/language/v1/language_sentiment_gcs.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_sentiment_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Sentiment (GCS) +# description: Analyzing Sentiment in text file stored in Cloud Storage +# usage: python3 samples/v1/language_sentiment_gcs.py [--gcs_content_uri "gs://cloud-samples-data/language/sentiment-positive.txt"] + +# [START language_sentiment_gcs] +from google.cloud import language_v1 + + +def sample_analyze_sentiment(gcs_content_uri): + """ + Analyzing Sentiment in text file stored in Cloud Storage + + Args: + gcs_content_uri Google Cloud Storage URI where the file content is located. + e.g. gs://[Your Bucket]/[Path to File] + """ + + client = language_v1.LanguageServiceClient() + + # gcs_content_uri = 'gs://cloud-samples-data/language/sentiment-positive.txt' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = { + "gcs_content_uri": gcs_content_uri, + "type_": type_, + "language": language, + } + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_sentiment( + request={"document": document, "encoding_type": encoding_type} + ) + # Get overall sentiment of the input document + print("Document sentiment score: {}".format(response.document_sentiment.score)) + print( + "Document sentiment magnitude: {}".format(response.document_sentiment.magnitude) + ) + # Get sentiment for all sentences in the document + for sentence in response.sentences: + print("Sentence text: {}".format(sentence.text.content)) + print("Sentence sentiment score: {}".format(sentence.sentiment.score)) + print("Sentence sentiment magnitude: {}".format(sentence.sentiment.magnitude)) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_sentiment_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--gcs_content_uri", + type=str, + default="gs://cloud-samples-data/language/sentiment-positive.txt", + ) + args = parser.parse_args() + + sample_analyze_sentiment(args.gcs_content_uri) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_sentiment_text.py b/language/v1/language_sentiment_text.py new file mode 100644 index 000000000000..559512d8853f --- /dev/null +++ b/language/v1/language_sentiment_text.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_sentiment_text") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Sentiment +# description: Analyzing Sentiment in a String +# usage: python3 samples/v1/language_sentiment_text.py [--text_content "I am so happy and joyful."] + +# [START language_sentiment_text] +from google.cloud import language_v1 + + +def sample_analyze_sentiment(text_content): + """ + Analyzing Sentiment in a String + + Args: + text_content The text content to analyze + """ + + client = language_v1.LanguageServiceClient() + + # text_content = 'I am so happy and joyful.' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = {"content": text_content, "type_": type_, "language": language} + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_sentiment( + request={"document": document, "encoding_type": encoding_type} + ) + # Get overall sentiment of the input document + print("Document sentiment score: {}".format(response.document_sentiment.score)) + print( + "Document sentiment magnitude: {}".format(response.document_sentiment.magnitude) + ) + # Get sentiment for all sentences in the document + for sentence in response.sentences: + print("Sentence text: {}".format(sentence.text.content)) + print("Sentence sentiment score: {}".format(sentence.sentiment.score)) + print("Sentence sentiment magnitude: {}".format(sentence.sentiment.magnitude)) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_sentiment_text] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--text_content", type=str, default="I am so happy and joyful.") + args = parser.parse_args() + + sample_analyze_sentiment(args.text_content) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_syntax_gcs.py b/language/v1/language_syntax_gcs.py new file mode 100644 index 000000000000..41902f21df50 --- /dev/null +++ b/language/v1/language_syntax_gcs.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_syntax_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Syntax (GCS) +# description: Analyzing Syntax in text file stored in Cloud Storage +# usage: python3 samples/v1/language_syntax_gcs.py [--gcs_content_uri "gs://cloud-samples-data/language/syntax-sentence.txt"] + +# [START language_syntax_gcs] +from google.cloud import language_v1 + + +def sample_analyze_syntax(gcs_content_uri): + """ + Analyzing Syntax in text file stored in Cloud Storage + + Args: + gcs_content_uri Google Cloud Storage URI where the file content is located. + e.g. gs://[Your Bucket]/[Path to File] + """ + + client = language_v1.LanguageServiceClient() + + # gcs_content_uri = 'gs://cloud-samples-data/language/syntax-sentence.txt' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = { + "gcs_content_uri": gcs_content_uri, + "type_": type_, + "language": language, + } + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_syntax( + request={"document": document, "encoding_type": encoding_type} + ) + # Loop through tokens returned from the API + for token in response.tokens: + # Get the text content of this token. Usually a word or punctuation. + text = token.text + print("Token text: {}".format(text.content)) + print( + "Location of this token in overall document: {}".format(text.begin_offset) + ) + # Get the part of speech information for this token. + # Part of speech is defined in: + # http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf + part_of_speech = token.part_of_speech + # Get the tag, e.g. NOUN, ADJ for Adjective, et al. + print( + "Part of Speech tag: {}".format( + language_v1.PartOfSpeech.Tag(part_of_speech.tag).name + ) + ) + # Get the voice, e.g. ACTIVE or PASSIVE + print( + "Voice: {}".format( + language_v1.PartOfSpeech.Voice(part_of_speech.voice).name + ) + ) + # Get the tense, e.g. PAST, FUTURE, PRESENT, et al. + print( + "Tense: {}".format( + language_v1.PartOfSpeech.Tense(part_of_speech.tense).name + ) + ) + # See API reference for additional Part of Speech information available + # Get the lemma of the token. Wikipedia lemma description + # https://en.wikipedia.org/wiki/Lemma_(morphology) + print("Lemma: {}".format(token.lemma)) + # Get the dependency tree parse information for this token. + # For more information on dependency labels: + # http://www.aclweb.org/anthology/P13-2017 + dependency_edge = token.dependency_edge + print("Head token index: {}".format(dependency_edge.head_token_index)) + print( + "Label: {}".format( + language_v1.DependencyEdge.Label(dependency_edge.label).name + ) + ) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_syntax_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--gcs_content_uri", + type=str, + default="gs://cloud-samples-data/language/syntax-sentence.txt", + ) + args = parser.parse_args() + + sample_analyze_syntax(args.gcs_content_uri) + + +if __name__ == "__main__": + main() diff --git a/language/v1/language_syntax_text.py b/language/v1/language_syntax_text.py new file mode 100644 index 000000000000..044234713c69 --- /dev/null +++ b/language/v1/language_syntax_text.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "language_syntax_text") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-language + +# sample-metadata +# title: Analyzing Syntax +# description: Analyzing Syntax in a String +# usage: python3 samples/v1/language_syntax_text.py [--text_content "This is a short sentence."] + +# [START language_syntax_text] +from google.cloud import language_v1 + + +def sample_analyze_syntax(text_content): + """ + Analyzing Syntax in a String + + Args: + text_content The text content to analyze + """ + + client = language_v1.LanguageServiceClient() + + # text_content = 'This is a short sentence.' + + # Available types: PLAIN_TEXT, HTML + type_ = language_v1.Document.Type.PLAIN_TEXT + + # Optional. If not specified, the language is automatically detected. + # For list of supported languages: + # https://cloud.google.com/natural-language/docs/languages + language = "en" + document = {"content": text_content, "type_": type_, "language": language} + + # Available values: NONE, UTF8, UTF16, UTF32 + encoding_type = language_v1.EncodingType.UTF8 + + response = client.analyze_syntax( + request={"document": document, "encoding_type": encoding_type} + ) + # Loop through tokens returned from the API + for token in response.tokens: + # Get the text content of this token. Usually a word or punctuation. + text = token.text + print("Token text: {}".format(text.content)) + print( + "Location of this token in overall document: {}".format(text.begin_offset) + ) + # Get the part of speech information for this token. + # Part of speech is defined in: + # http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf + part_of_speech = token.part_of_speech + # Get the tag, e.g. NOUN, ADJ for Adjective, et al. + print( + "Part of Speech tag: {}".format( + language_v1.PartOfSpeech.Tag(part_of_speech.tag).name + ) + ) + # Get the voice, e.g. ACTIVE or PASSIVE + print( + "Voice: {}".format( + language_v1.PartOfSpeech.Voice(part_of_speech.voice).name + ) + ) + # Get the tense, e.g. PAST, FUTURE, PRESENT, et al. + print( + "Tense: {}".format( + language_v1.PartOfSpeech.Tense(part_of_speech.tense).name + ) + ) + # See API reference for additional Part of Speech information available + # Get the lemma of the token. Wikipedia lemma description + # https://en.wikipedia.org/wiki/Lemma_(morphology) + print("Lemma: {}".format(token.lemma)) + # Get the dependency tree parse information for this token. + # For more information on dependency labels: + # http://www.aclweb.org/anthology/P13-2017 + dependency_edge = token.dependency_edge + print("Head token index: {}".format(dependency_edge.head_token_index)) + print( + "Label: {}".format( + language_v1.DependencyEdge.Label(dependency_edge.label).name + ) + ) + + # Get the language of the text, which will be the same as + # the language specified in the request or, if not specified, + # the automatically-detected language. + print("Language of the text: {}".format(response.language)) + + +# [END language_syntax_text] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--text_content", type=str, default="This is a short sentence.") + args = parser.parse_args() + + sample_analyze_syntax(args.text_content) + + +if __name__ == "__main__": + main() diff --git a/language/v1/requirements-test.txt b/language/v1/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/language/v1/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/language/v1/requirements.txt b/language/v1/requirements.txt new file mode 100644 index 000000000000..c3458e3d62f8 --- /dev/null +++ b/language/v1/requirements.txt @@ -0,0 +1 @@ +google-cloud-language==2.6.1 diff --git a/language/v1/test/analyzing_entities.test.yaml b/language/v1/test/analyzing_entities.test.yaml new file mode 100644 index 000000000000..98765cb479ed --- /dev/null +++ b/language/v1/test/analyzing_entities.test.yaml @@ -0,0 +1,114 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +type: test/samples +schema_version: 1 +test: + suites: + - name: "Analyzing Entities [code sample tests]" + cases: + + - name: language_entities_text - Analyzing the Entities of a text string (default value) + spec: + # Default value: "California is a state." + - call: {sample: language_entities_text} + - assert_contains: + - {literal: "Representative name for the entity: California"} + - {literal: "Entity type: LOCATION"} + - {literal: "Salience score:"} + - {literal: "wikipedia_url: https://en.wikipedia.org/wiki/California"} + - {literal: "mid: /m/01n7q"} + - {literal: "Mention text: California"} + - {literal: "Mention type: PROPER"} + - {literal: "Mention text: state"} + - {literal: "Mention type: COMMON"} + - {literal: "Language of the text: en"} + + - name: language_entities_text - Analyzing the Entities of a text string (*custom value*) + spec: + # Custom value: "Alice is a person. She lives in California." + - call: + sample: language_entities_text + params: + text_content: {literal: "Alice is a person. She lives in California."} + - assert_contains: + - {literal: "Representative name for the entity: Alice"} + - {literal: "Entity type: PERSON"} + - {literal: "Mention text: Alice"} + - {literal: "Mention type: PROPER"} + - {literal: "Mention text: person"} + - {literal: "Mention type: COMMON"} + - {literal: "Representative name for the entity: California"} + - {literal: "Entity type: LOCATION"} + - {literal: "wikipedia_url: https://en.wikipedia.org/wiki/California"} + - {literal: "mid: /m/01n7q"} + - {literal: "Language of the text: en"} + + - name: language_entities_text - Analyzing the Entities of a text string (*metadata attributes*) + spec: + # Try out some of the metadata attributes which should be available for dates, addresses, etc. + # In case fake (555) area code numbers don't work, using United States Naval Observatory number. + # Custom value: "I called 202-762-1401 on January 31, 2019 from 1600 Amphitheatre Parkway, Mountain View, CA." + - call: + sample: language_entities_text + params: + text_content: + literal: "I called 202-762-1401 on January 31, 2019 from 1600 Amphitheatre Parkway, Mountain View, CA." + # The results may change, but it's fair to say that at least one of the following types were detected: + - assert_contains_any: + - literal: "Entity type: DATE" + - literal: "Entity type: ADDRESS" + - literal: "Entity type: PHONE_NUMBER" + # Check that at least some of the supporting metadata for an entity was present in the response + - assert_contains_any: + - literal: "month: 1" + - literal: "day: 31" + - literal: "year: 2019" + - literal: "street_number: 1600" + - literal: "street_name: Amphitheatre Parkway" + - literal: "area_code: 202" + - literal: "number: 7621401" + + - name: language_entities_gcs - Analyzing the Entities of text file in GCS (default value) + spec: + # Default value: gs://cloud-samples-data/language/entity.txt + # => "California is a state." + - call: {sample: language_entities_gcs} + - assert_contains: + - {literal: "Representative name for the entity: California"} + - {literal: "Entity type: LOCATION"} + - {literal: "Salience score:"} + - {literal: "wikipedia_url: https://en.wikipedia.org/wiki/California"} + - {literal: "mid: /m/01n7q"} + - {literal: "Mention text: California"} + - {literal: "Mention type: PROPER"} + - {literal: "Mention text: state"} + - {literal: "Mention type: COMMON"} + - {literal: "Language of the text: en"} + + - name: language_entities_gcs - Analyzing the Entities of text file in GCS (*custom value*) + spec: + # Use different file: gs://cloud-samples-data/language/entity-sentiment.txt + # => "Grapes are good. Bananas are bad." + - call: + sample: language_entities_gcs + params: + gcs_content_uri: + literal: "gs://cloud-samples-data/language/entity-sentiment.txt" + - assert_contains: + - {literal: "Representative name for the entity: Grapes"} + - {literal: "Mention text: Grapes"} + - {literal: "Mention type: COMMON"} + - {literal: "Representative name for the entity: Bananas"} + - {literal: "Mention text: Bananas"} + - {literal: "Language of the text: en"} diff --git a/language/v1/test/analyzing_entity_sentiment.test.yaml b/language/v1/test/analyzing_entity_sentiment.test.yaml new file mode 100644 index 000000000000..41369f978b36 --- /dev/null +++ b/language/v1/test/analyzing_entity_sentiment.test.yaml @@ -0,0 +1,76 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +type: test/samples +schema_version: 1 +test: + suites: + - name: "Analyzing Entity Sentiment [code sample tests]" + cases: + + - name: language_entity_sentiment_text - Analyzing Entity Sentiment of a text string (default value) + spec: + # Default value: "Grapes are good. Bananas are bad." + - call: {sample: language_entity_sentiment_text} + - assert_contains: + - {literal: "Representative name for the entity: Grapes"} + - {literal: "Entity sentiment score: 0."} + - {literal: "Representative name for the entity: Bananas"} + - {literal: "Entity sentiment score: -0."} + - {literal: "Entity sentiment magnitude: 0."} + - {literal: "Language of the text: en"} + + - name: language_entity_sentiment_text - Analyzing Entity Sentiment of a text string (*custom value*) + spec: + # Custom value: "Grapes are actually not very good. But Bananas are great." + - call: + sample: language_entity_sentiment_text + params: + text_content: {literal: "Grapes are actually not very good. But Bananas are great."} + - assert_contains: + - {literal: "Representative name for the entity: Grapes"} + - {literal: "Entity sentiment score: -0."} + - {literal: "Representative name for the entity: Bananas"} + - {literal: "Entity sentiment score: 0."} + - {literal: "Entity sentiment magnitude: 0."} + - {literal: "Language of the text: en"} + + - name: language_entity_sentiment_gcs - Analyzing Entity Sentiment of text file in GCS (default value) + spec: + # Default value: gs://cloud-samples-data/language/entity-sentiment.txt + # => "Grapes are good. Bananas are bad." + - call: {sample: language_entity_sentiment_gcs} + - assert_contains: + - {literal: "Representative name for the entity: Grapes"} + - {literal: "Entity sentiment score: -0."} + - {literal: "Representative name for the entity: Bananas"} + - {literal: "Entity sentiment score: 0."} + - {literal: "Entity sentiment magnitude: 0."} + - {literal: "Language of the text: en"} + + - name: language_entity_sentiment_gcs - Analyzing Entity Sentiment of text file in GCS (*custom value*) + spec: + # Use different file: gs://cloud-samples-data/language/entity-sentiment-reverse.txt + # => "Grapes are actually not very good. But Bananas are great." + - call: + sample: language_entity_sentiment_gcs + params: + gcs_content_uri: + literal: "gs://cloud-samples-data/language/entity-sentiment-reverse.txt" + - assert_contains: + - {literal: "Representative name for the entity: Grapes"} + - {literal: "Entity sentiment score: -0."} + - {literal: "Representative name for the entity: Bananas"} + - {literal: "Entity sentiment score: 0."} + - {literal: "Entity sentiment magnitude: 0."} + - {literal: "Language of the text: en"} diff --git a/language/v1/test/analyzing_sentiment.test.yaml b/language/v1/test/analyzing_sentiment.test.yaml new file mode 100644 index 000000000000..73b4410e422c --- /dev/null +++ b/language/v1/test/analyzing_sentiment.test.yaml @@ -0,0 +1,87 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +type: test/samples +schema_version: 1 +test: + suites: + - name: "Analyzing Sentiment [code sample tests]" + cases: + + - name: language_sentiment_text - Analyzing the sentiment of a text string (default value) + spec: + # Default value: "I am so happy and joyful." + - call: {sample: language_sentiment_text} + - assert_contains: + - {literal: "Document sentiment score: 0."} + - {literal: "Document sentiment magnitude: 0."} + - {literal: "Sentence text: I am so happy and joyful."} + - {literal: "Sentence sentiment score: 0."} + - {literal: "Sentence sentiment magnitude: 0."} + - {literal: "Language of the text: en"} + # There should be no negative sentiment scores for this value. + - assert_not_contains: + - {literal: "Document sentiment score: -0."} + - {literal: "Sentence sentiment score: -0."} + + - name: language_sentiment_text - Analyzing the sentiment of a text string (*custom value*) + spec: + # Custom value: "I am very happy. I am angry and sad." + - call: + sample: language_sentiment_text + params: + text_content: {literal: "I am very happy. I am angry and sad."} + - assert_contains: + - {literal: "Sentence text: I am very happy"} + - {literal: "Sentence sentiment score: 0."} + - {literal: "Sentence text: I am angry and sad"} + - {literal: "Sentence sentiment score: -0."} + - {literal: "Language of the text: en"} + + - name: language_sentiment_gcs - Analyzing the sentiment of text file in GCS (default value) + spec: + # Default value: gs://cloud-samples-data/language/sentiment-positive.txt + # => "I am so happy and joyful." + - call: {sample: language_sentiment_gcs} + - assert_contains: + - {literal: "Document sentiment score: 0."} + - {literal: "Document sentiment magnitude: 0."} + - {literal: "Sentence text: I am so happy and joyful."} + - {literal: "Sentence sentiment score: 0."} + - {literal: "Sentence sentiment magnitude: 0."} + - {literal: "Language of the text: en"} + # There should be no negative sentiment scores for this value. + - assert_not_contains: + - {literal: "Document sentiment score: -0."} + - {literal: "Sentence sentiment score: -0."} + + - name: language_sentiment_gcs - Analyzing the sentiment of text file in GCS (*custom value*) + spec: + # Use different file: gs://cloud-samples-data/language/sentiment-negative.txt + # => "I am so sad and upset." + - call: + sample: language_sentiment_gcs + params: + gcs_content_uri: + literal: "gs://cloud-samples-data/language/sentiment-negative.txt" + - assert_contains: + - {literal: "Document sentiment score: -0."} + - {literal: "Document sentiment magnitude: 0."} + - {literal: "Sentence text: I am so sad and upset."} + - {literal: "Sentence sentiment score: -0."} + - {literal: "Sentence sentiment magnitude: 0."} + - {literal: "Language of the text: en"} + # There should be no positive sentiment scores for this value. + - assert_not_contains: + - {literal: "Document sentiment score: 0."} + - {literal: "Sentence sentiment score: 0."} diff --git a/language/v1/test/analyzing_syntax.test.yaml b/language/v1/test/analyzing_syntax.test.yaml new file mode 100644 index 000000000000..88df9ca040b9 --- /dev/null +++ b/language/v1/test/analyzing_syntax.test.yaml @@ -0,0 +1,85 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +type: test/samples +schema_version: 1 +test: + suites: + - name: "Analyzing Syntax [code sample tests]" + cases: + + - name: language_syntax_text - Analyzing the syntax of a text string (default value) + spec: + # Default value: "This is a short sentence." + - call: {sample: language_syntax_text} + - assert_contains: + - {literal: "Token text: is"} + - {literal: "Part of Speech tag: VERB"} + - {literal: "Tense: PRESENT"} + - {literal: "Lemma: be"} + - {literal: "Token text: short"} + - {literal: "Part of Speech tag: ADJ"} + - {literal: "Lemma: short"} + - {literal: "Language of the text: en"} + + - name: language_syntax_text - Analyzing the syntax of a text string (*custom value*) + spec: + # Custom value: "Alice runs. Bob ran." + - call: + sample: language_syntax_text + params: + text_content: {literal: "Alice runs. Bob ran."} + - assert_contains: + - {literal: "Token text: Alice"} + - {literal: "Location of this token in overall document: 0"} + - {literal: "Part of Speech tag: NOUN"} + - {literal: "Label: NSUBJ"} + - {literal: "Token text: runs"} + - {literal: "Part of Speech tag: VERB"} + - {literal: "Tense: PRESENT"} + - {literal: "Lemma: run"} + - {literal: "Token text: ran"} + - {literal: "Tense: PAST"} + - {literal: "Language of the text: en"} + + - name: language_syntax_gcs - Analyzing the syntax of text file in GCS (default value) + spec: + # Default value: gs://cloud-samples-data/language/syntax-sentence.txt + # => "This is a short sentence." + - call: {sample: language_syntax_gcs} + - assert_contains: + - {literal: "Token text: is"} + - {literal: "Part of Speech tag: VERB"} + - {literal: "Tense: PRESENT"} + - {literal: "Lemma: be"} + - {literal: "Token text: short"} + - {literal: "Part of Speech tag: ADJ"} + - {literal: "Lemma: short"} + - {literal: "Language of the text: en"} + + - name: language_syntax_gcs - Analyzing the syntax of text file in GCS (*custom value*) + spec: + # Use different file: gs://cloud-samples-data/language/hello.txt + # => "Hello, world!" + - call: + sample: language_syntax_gcs + params: + gcs_content_uri: + literal: "gs://cloud-samples-data/language/hello.txt" + - assert_contains: + - {literal: "Token text: Hello"} + - {literal: "Token text: World"} + - {literal: "Part of Speech tag: NOUN"} + - {literal: "Token text: !"} + - {literal: "Part of Speech tag: PUNCT"} + - {literal: "Language of the text: en"} diff --git a/language/v1/test/classifying_content.test.yaml b/language/v1/test/classifying_content.test.yaml new file mode 100644 index 000000000000..6218462da94e --- /dev/null +++ b/language/v1/test/classifying_content.test.yaml @@ -0,0 +1,64 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +type: test/samples +schema_version: 1 +test: + suites: + - name: "Classifying Content [code sample tests]" + cases: + + - name: language_classify_text - Classifying Content of a text string (default value) + spec: + # Default value: "That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows." + - call: {sample: language_classify_text} + - assert_contains_any: + - {literal: "TV"} + - {literal: "Movies"} + - {literal: "Entertainment"} + + - name: language_classify_text - Classifying Content of a text string (*custom value*) + spec: + # Custom value: "Dungeons and dragons and loot, oh my!" + - call: + sample: language_classify_text + params: + text_content: {literal: "Dungeons and dragons and loot, oh my!"} + - assert_contains_any: + - {literal: "Games"} + - {literal: "Roleplaying"} + - {literal: "Computer"} + + - name: language_classify_gcs - Classifying Content of text file in GCS (default value) + spec: + # Default value: gs://cloud-samples-data/language/classify-entertainment.txt + # => "This is about film and movies and television and acting and movie theatres and theatre and drama and entertainment and the arts." + - call: {sample: language_classify_gcs} + - assert_contains_any: + - {literal: "TV"} + - {literal: "Movies"} + - {literal: "Entertainment"} + + - name: language_classify_gcs - Classifying Content of text file in GCS (*custom value*) + spec: + # Use different file: gs://cloud-samples-data/language/android.txt + # => "Android is a mobile operating system developed by Google, based on the Linux kernel and..." + - call: + sample: language_classify_gcs + params: + gcs_content_uri: + literal: "gs://cloud-samples-data/language/android.txt" + - assert_contains_any: + - {literal: "Mobile"} + - {literal: "Phone"} + - {literal: "Internet"} diff --git a/language/v1/test/samples.manifest.yaml b/language/v1/test/samples.manifest.yaml new file mode 100644 index 000000000000..b60100c40dab --- /dev/null +++ b/language/v1/test/samples.manifest.yaml @@ -0,0 +1,51 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +type: manifest/samples +schema_version: 3 +base: &common + env: 'python' + bin: 'python3' + chdir: '{@manifest_dir}/../..' + basepath: '.' +samples: +- <<: *common + path: '{basepath}/v1/language_classify_gcs.py' + sample: 'language_classify_gcs' +- <<: *common + path: '{basepath}/v1/language_classify_text.py' + sample: 'language_classify_text' +- <<: *common + path: '{basepath}/v1/language_entities_gcs.py' + sample: 'language_entities_gcs' +- <<: *common + path: '{basepath}/v1/language_entities_text.py' + sample: 'language_entities_text' +- <<: *common + path: '{basepath}/v1/language_entity_sentiment_gcs.py' + sample: 'language_entity_sentiment_gcs' +- <<: *common + path: '{basepath}/v1/language_entity_sentiment_text.py' + sample: 'language_entity_sentiment_text' +- <<: *common + path: '{basepath}/v1/language_sentiment_gcs.py' + sample: 'language_sentiment_gcs' +- <<: *common + path: '{basepath}/v1/language_sentiment_text.py' + sample: 'language_sentiment_text' +- <<: *common + path: '{basepath}/v1/language_syntax_gcs.py' + sample: 'language_syntax_gcs' +- <<: *common + path: '{basepath}/v1/language_syntax_text.py' + sample: 'language_syntax_text'