From 4219e3edd626757b11e56888a6fb36ab87a5bf19 Mon Sep 17 00:00:00 2001 From: Jason Dobry Date: Fri, 2 Dec 2016 13:00:32 -0800 Subject: [PATCH 1/2] Switch NL from v1beta1 to v1. Fixes #2747. --- language/google/cloud/language/connection.py | 2 +- language/google/cloud/language/document.py | 27 ++- language/google/cloud/language/entity.py | 4 +- language/google/cloud/language/sentiment.py | 16 +- language/google/cloud/language/syntax.py | 8 +- language/unit_tests/test_document.py | 167 ++++++++++++++++++- language/unit_tests/test_sentiment.py | 12 +- system_tests/language.py | 30 +++- 8 files changed, 233 insertions(+), 33 deletions(-) diff --git a/language/google/cloud/language/connection.py b/language/google/cloud/language/connection.py index a1e0269becf5..196824b9a40e 100644 --- a/language/google/cloud/language/connection.py +++ b/language/google/cloud/language/connection.py @@ -23,7 +23,7 @@ class Connection(_http.JSONConnection): API_BASE_URL = 'https://language.googleapis.com' """The base of the API call URL.""" - API_VERSION = 'v1beta1' + API_VERSION = 'v1' """The version of the API, used in building the API call's URL.""" API_URL_TEMPLATE = '{api_base_url}/{api_version}/documents:{path}' diff --git a/language/google/cloud/language/document.py b/language/google/cloud/language/document.py index 3eec41d552d2..d529e53c2eaa 100644 --- a/language/google/cloud/language/document.py +++ b/language/google/cloud/language/document.py @@ -155,7 +155,7 @@ def analyze_entities(self): other properties. .. _analyzeEntities: https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/documents/analyzeEntities + reference/rest/v1/documents/analyzeEntities See `analyzeEntities`_. @@ -176,7 +176,7 @@ def analyze_sentiment(self): """Analyze the sentiment in the current document. .. _analyzeSentiment: https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/documents/analyzeSentiment + reference/rest/v1/documents/analyzeSentiment See `analyzeSentiment`_. @@ -188,6 +188,27 @@ def analyze_sentiment(self): method='POST', path='analyzeSentiment', data=data) return Sentiment.from_api_repr(api_response['documentSentiment']) + def analyze_syntax(self): + """Analyze the syntax in the current document. + + .. _analyzeSyntax: https://cloud.google.com/natural-language/\ + reference/rest/v1/documents/analyzeSyntax + + See `analyzeSyntax`_. + + :rtype: list + :returns: A list of :class:`~.language.syntax.Token` returned from + the API. + """ + data = { + 'document': self._to_dict(), + 'encodingType': self.encoding, + } + api_response = self.client._connection.api_request( + method='POST', path='analyzeSyntax', data=data) + return [Token.from_api_repr(token) + for token in api_response['tokens']] + def annotate_text(self, include_syntax=True, include_entities=True, include_sentiment=True): """Advanced natural language API: document syntax and other features. @@ -205,7 +226,7 @@ def annotate_text(self, include_syntax=True, include_entities=True, learning and need in-depth text features to build upon. .. _annotateText: https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/documents/annotateText + reference/rest/v1/documents/annotateText See `annotateText`_. diff --git a/language/google/cloud/language/entity.py b/language/google/cloud/language/entity.py index 25cacf21bd80..f20887dc3f2e 100644 --- a/language/google/cloud/language/entity.py +++ b/language/google/cloud/language/entity.py @@ -58,9 +58,9 @@ class Entity(object): and put in its own property. .. _Entity message: https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/Entity + reference/rest/v1/Entity .. _EntityType enum: https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/Entity#Type + reference/rest/v1/Entity#Type See `Entity message`_. diff --git a/language/google/cloud/language/sentiment.py b/language/google/cloud/language/sentiment.py index 91502367014e..e1e5da7edc6f 100644 --- a/language/google/cloud/language/sentiment.py +++ b/language/google/cloud/language/sentiment.py @@ -22,24 +22,24 @@ class Sentiment(object): """A Google Cloud Natural Language API sentiment object. .. _Sentiment message: https://cloud.google.com/natural-language/\ - reference/rest/v1beta1/Sentiment + reference/rest/v1/Sentiment .. _Sentiment basics: https://cloud.google.com/natural-language/\ docs/basics#sentiment-analysis-values See `Sentiment message`_ and `Sentiment basics`_. - :type polarity: float - :param polarity: Polarity of the sentiment in the ``[-1.0, 1.0]`` range. + :type score: float + :param score: Score of the sentiment in the ``[-1.0, 1.0]`` range. Larger numbers represent more positive sentiments. :type magnitude: float :param magnitude: A non-negative number in the ``[0, +inf)`` range, which represents the absolute magnitude of sentiment - regardless of polarity (positive or negative). + regardless of score (positive or negative). """ - def __init__(self, polarity, magnitude): - self.polarity = polarity + def __init__(self, score, magnitude): + self.score = score self.magnitude = magnitude @classmethod @@ -52,6 +52,6 @@ def from_api_repr(cls, payload): :rtype: :class:`Sentiment` :returns: The sentiment parsed from the API representation. """ - polarity = payload['polarity'] + score = payload['score'] magnitude = payload['magnitude'] - return cls(polarity, magnitude) + return cls(score, magnitude) diff --git a/language/google/cloud/language/syntax.py b/language/google/cloud/language/syntax.py index fdf14aa7b5e2..0c7260839e74 100644 --- a/language/google/cloud/language/syntax.py +++ b/language/google/cloud/language/syntax.py @@ -103,10 +103,10 @@ class Token(object): """A Google Cloud Natural Language API token object. .. _Token message: https://cloud.google.com/natural-language/reference\ - /rest/v1beta1/documents/annotateText#Token + /rest/v1/documents/annotateText#Token .. _Lemma: https://en.wikipedia.org/wiki/Lemma_(morphology) .. _Label enum: https://cloud.google.com/natural-language/reference/\ - rest/v1beta1/documents/annotateText#Label + rest/v1/documents/annotateText#Label See `Token message`_. @@ -148,7 +148,7 @@ def __init__(self, text_content, text_begin, part_of_speech, @classmethod def from_api_repr(cls, payload): - """Convert a token from the JSON API into a :class:`Sentiment`. + """Convert a token from the JSON API into a :class:`Token`. :param payload: dict :type payload: The value from the backend. @@ -172,7 +172,7 @@ class Sentence(object): """A Google Cloud Natural Language API sentence object. .. _Sentence message: https://cloud.google.com/natural-language/reference\ - /rest/v1beta1/documents/annotateText#Sentence + /rest/v1/documents/annotateText#Sentence See `Sentence message`_. diff --git a/language/unit_tests/test_document.py b/language/unit_tests/test_document.py index 644e4512348f..24427b6f4d33 100644 --- a/language/unit_tests/test_document.py +++ b/language/unit_tests/test_document.py @@ -17,7 +17,7 @@ ANNOTATE_NAME = 'Moon' ANNOTATE_CONTENT = 'A cow jumped over the %s.' % (ANNOTATE_NAME,) -ANNOTATE_POLARITY = 1 +ANNOTATE_SCORE = 1 ANNOTATE_MAGNITUDE = 0.2 ANNOTATE_SALIENCE = 0.11793101 ANNOTATE_WIKI_URL = 'http://en.wikipedia.org/wiki/Natural_satellite' @@ -286,20 +286,20 @@ def test_analyze_entities(self): client._connection.api_request.assert_called_once_with( path='analyzeEntities', method='POST', data=expected) - def _verify_sentiment(self, sentiment, polarity, magnitude): + def _verify_sentiment(self, sentiment, score, magnitude): from google.cloud.language.sentiment import Sentiment self.assertIsInstance(sentiment, Sentiment) - self.assertEqual(sentiment.polarity, polarity) + self.assertEqual(sentiment.score, score) self.assertEqual(sentiment.magnitude, magnitude) def test_analyze_sentiment(self): content = 'All the pretty horses.' - polarity = 1 + score = 1 magnitude = 0.6 response = { 'documentSentiment': { - 'polarity': polarity, + 'score': score, 'magnitude': magnitude, }, 'language': 'en-US', @@ -308,13 +308,164 @@ def test_analyze_sentiment(self): document = self._make_one(client, content) sentiment = document.analyze_sentiment() - self._verify_sentiment(sentiment, polarity, magnitude) + self._verify_sentiment(sentiment, score, magnitude) # Verify the request. expected = self._expected_data(content) client._connection.api_request.assert_called_once_with( path='analyzeSentiment', method='POST', data=expected) + def _verify_token(self, token, text_content, part_of_speech, lemma): + from google.cloud.language.syntax import Token + from google.cloud.language.syntax import PartOfSpeech + + self.assertIsInstance(token, Token) + self.assertEqual(token.text_content, text_content) + self.assertEqual(token.part_of_speech, part_of_speech) + self.assertEqual(token.lemma, lemma) + + def test_analyze_syntax(self): + from google.cloud.language.document import Encoding + from google.cloud.language.syntax import Token + from google.cloud.language.syntax import PartOfSpeech + + name1 = 'R-O-C-K' + name2 = 'USA' + content = name1 + ' in the ' + name2 + response = { + 'sentences': [ + { + 'text': { + 'content': 'R-O-C-K in the USA', + 'beginOffset': -1, + }, + 'sentiment': None, + } + ], + 'tokens': [ + { + 'text': { + 'content': 'R-O-C-K', + 'beginOffset': -1, + }, + 'partOfSpeech': { + 'tag': 'NOUN', + 'aspect': 'ASPECT_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'gender': 'GENDER_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', + 'number': 'SINGULAR', + 'person': 'PERSON_UNKNOWN', + 'proper': 'PROPER', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'tense': 'TENSE_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + }, + 'dependencyEdge': { + 'headTokenIndex': 0, + 'label': 'ROOT', + }, + 'lemma': 'R-O-C-K', + }, + { + 'text': { + 'content': 'in', + 'beginOffset': -1, + }, + 'partOfSpeech': { + 'tag': 'ADP', + 'aspect': 'ASPECT_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'gender': 'GENDER_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', + 'number': 'NUMBER_UNKNOWN', + 'person': 'PERSON_UNKNOWN', + 'proper': 'PROPER_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'tense': 'TENSE_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + }, + 'dependencyEdge': { + 'headTokenIndex': 0, + 'label': 'PREP', + }, + 'lemma': 'in', + }, + { + 'text': { + 'content': 'the', + 'beginOffset': -1, + }, + 'partOfSpeech': { + 'tag': 'DET', + 'aspect': 'ASPECT_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'gender': 'GENDER_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', + 'number': 'NUMBER_UNKNOWN', + 'person': 'PERSON_UNKNOWN', + 'proper': 'PROPER_UNKNOWN', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'tense': 'TENSE_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + }, + 'dependencyEdge': { + 'headTokenIndex': 3, + 'label': 'DET', + }, + 'lemma': 'the', + }, + { + 'text': { + 'content': 'USA', + 'beginOffset': -1, + }, + 'partOfSpeech': { + 'tag': 'NOUN', + 'aspect': 'ASPECT_UNKNOWN', + 'case': 'CASE_UNKNOWN', + 'form': 'FORM_UNKNOWN', + 'gender': 'GENDER_UNKNOWN', + 'mood': 'MOOD_UNKNOWN', + 'number': 'SINGULAR', + 'person': 'PERSON_UNKNOWN', + 'proper': 'PROPER', + 'reciprocity': 'RECIPROCITY_UNKNOWN', + 'tense': 'TENSE_UNKNOWN', + 'voice': 'VOICE_UNKNOWN', + }, + 'dependencyEdge': { + 'headTokenIndex': 1, + 'label': 'POBJ', + }, + 'lemma': 'USA', + }, + ], + 'language': 'en-US', + } + client = make_mock_client(response) + document = self._make_one(client, content) + + tokens = document.analyze_syntax() + self.assertEqual(len(tokens), 4) + token1 = tokens[0] + self._verify_token(token1, name1, PartOfSpeech.NOUN, name1) + token2 = tokens[1] + self._verify_token(token2, 'in', PartOfSpeech.ADPOSITION, 'in') + token3 = tokens[2] + self._verify_token(token3, 'the', PartOfSpeech.DETERMINER, 'the') + token4 = tokens[3] + self._verify_token(token4, name2, PartOfSpeech.NOUN, name2) + + # Verify the request. + expected = self._expected_data( + content, encoding_type=Encoding.UTF8) + client._connection.api_request.assert_called_once_with( + path='analyzeSyntax', method='POST', data=expected) + def _verify_sentences(self, include_syntax, annotations): from google.cloud.language.syntax import Sentence @@ -357,7 +508,7 @@ def _annotate_text_helper(self, include_sentiment, } if include_sentiment: response['documentSentiment'] = { - 'polarity': ANNOTATE_POLARITY, + 'score': ANNOTATE_SCORE, 'magnitude': ANNOTATE_MAGNITUDE, } @@ -375,7 +526,7 @@ def _annotate_text_helper(self, include_sentiment, # Sentiment if include_sentiment: self._verify_sentiment(annotations.sentiment, - ANNOTATE_POLARITY, ANNOTATE_MAGNITUDE) + ANNOTATE_SCORE, ANNOTATE_MAGNITUDE) else: self.assertIsNone(annotations.sentiment) # Entity diff --git a/language/unit_tests/test_sentiment.py b/language/unit_tests/test_sentiment.py index de545faca848..37cc52df954d 100644 --- a/language/unit_tests/test_sentiment.py +++ b/language/unit_tests/test_sentiment.py @@ -26,20 +26,20 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_constructor(self): - polarity = 1 + score = 1 magnitude = 2.3 - sentiment = self._make_one(polarity, magnitude) - self.assertEqual(sentiment.polarity, polarity) + sentiment = self._make_one(score, magnitude) + self.assertEqual(sentiment.score, score) self.assertEqual(sentiment.magnitude, magnitude) def test_from_api_repr(self): klass = self._get_target_class() - polarity = -1 + score = -1 magnitude = 5.55 payload = { - 'polarity': polarity, + 'score': score, 'magnitude': magnitude, } sentiment = klass.from_api_repr(payload) - self.assertEqual(sentiment.polarity, polarity) + self.assertEqual(sentiment.score, score) self.assertEqual(sentiment.magnitude, magnitude) diff --git a/system_tests/language.py b/system_tests/language.py index 1e81ca12afc1..238d95d6ccbb 100644 --- a/system_tests/language.py +++ b/system_tests/language.py @@ -121,5 +121,33 @@ def test_analyze_sentiment(self): positive_msg = 'Jogging is fun' document = Config.CLIENT.document_from_text(positive_msg) sentiment = document.analyze_sentiment() - self.assertEqual(sentiment.polarity, 1) + self.assertEqual(sentiment.score, 0.5) self.assertTrue(0.0 < sentiment.magnitude < 1.5) + + def _verify_token(self, token, text_content, part_of_speech, lemma): + from google.cloud.language.syntax import Token + from google.cloud.language.syntax import PartOfSpeech + + self.assertIsInstance(token, Token) + self.assertEqual(token.text_content, text_content) + self.assertEqual(token.part_of_speech, part_of_speech) + self.assertEqual(token.lemma, lemma) + + def _check_analyze_syntax_result(self, tokens): + from google.cloud.language.syntax import Token + from google.cloud.language.syntax import PartOfSpeech + + self.assertEqual(len(tokens), 3) + token1, token2, token3 = tokens + # Verify token 1. + self._verify_token(token1, 'Jogging', PartOfSpeech.NOUN, 'Jogging') + # Verify token 2. + self._verify_token(token2, 'is', PartOfSpeech.VERB, 'be') + # Verify token 3. + self._verify_token(token3, 'fun', PartOfSpeech.ADJECTIVE, 'fun') + + def test_analyze_syntax(self): + positive_msg = 'Jogging is fun' + document = Config.CLIENT.document_from_text(positive_msg) + tokens = document.analyze_syntax() + self._check_analyze_syntax_result(tokens) From f34e0a854bbae8f98069dd347e6103d3937a4a54 Mon Sep 17 00:00:00 2001 From: Jason Dobry Date: Fri, 2 Dec 2016 14:21:08 -0800 Subject: [PATCH 2/2] Address comments. --- language/google/cloud/language/document.py | 2 +- language/unit_tests/test_document.py | 46 ---------------------- system_tests/language.py | 2 - 3 files changed, 1 insertion(+), 49 deletions(-) diff --git a/language/google/cloud/language/document.py b/language/google/cloud/language/document.py index d529e53c2eaa..9e29ccb2af28 100644 --- a/language/google/cloud/language/document.py +++ b/language/google/cloud/language/document.py @@ -207,7 +207,7 @@ def analyze_syntax(self): api_response = self.client._connection.api_request( method='POST', path='analyzeSyntax', data=data) return [Token.from_api_repr(token) - for token in api_response['tokens']] + for token in api_response.get('tokens', ())] def annotate_text(self, include_syntax=True, include_entities=True, include_sentiment=True): diff --git a/language/unit_tests/test_document.py b/language/unit_tests/test_document.py index 24427b6f4d33..48ebfd1c1188 100644 --- a/language/unit_tests/test_document.py +++ b/language/unit_tests/test_document.py @@ -317,7 +317,6 @@ def test_analyze_sentiment(self): def _verify_token(self, token, text_content, part_of_speech, lemma): from google.cloud.language.syntax import Token - from google.cloud.language.syntax import PartOfSpeech self.assertIsInstance(token, Token) self.assertEqual(token.text_content, text_content) @@ -326,7 +325,6 @@ def _verify_token(self, token, text_content, part_of_speech, lemma): def test_analyze_syntax(self): from google.cloud.language.document import Encoding - from google.cloud.language.syntax import Token from google.cloud.language.syntax import PartOfSpeech name1 = 'R-O-C-K' @@ -350,17 +348,6 @@ def test_analyze_syntax(self): }, 'partOfSpeech': { 'tag': 'NOUN', - 'aspect': 'ASPECT_UNKNOWN', - 'case': 'CASE_UNKNOWN', - 'form': 'FORM_UNKNOWN', - 'gender': 'GENDER_UNKNOWN', - 'mood': 'MOOD_UNKNOWN', - 'number': 'SINGULAR', - 'person': 'PERSON_UNKNOWN', - 'proper': 'PROPER', - 'reciprocity': 'RECIPROCITY_UNKNOWN', - 'tense': 'TENSE_UNKNOWN', - 'voice': 'VOICE_UNKNOWN', }, 'dependencyEdge': { 'headTokenIndex': 0, @@ -375,17 +362,6 @@ def test_analyze_syntax(self): }, 'partOfSpeech': { 'tag': 'ADP', - 'aspect': 'ASPECT_UNKNOWN', - 'case': 'CASE_UNKNOWN', - 'form': 'FORM_UNKNOWN', - 'gender': 'GENDER_UNKNOWN', - 'mood': 'MOOD_UNKNOWN', - 'number': 'NUMBER_UNKNOWN', - 'person': 'PERSON_UNKNOWN', - 'proper': 'PROPER_UNKNOWN', - 'reciprocity': 'RECIPROCITY_UNKNOWN', - 'tense': 'TENSE_UNKNOWN', - 'voice': 'VOICE_UNKNOWN', }, 'dependencyEdge': { 'headTokenIndex': 0, @@ -400,17 +376,6 @@ def test_analyze_syntax(self): }, 'partOfSpeech': { 'tag': 'DET', - 'aspect': 'ASPECT_UNKNOWN', - 'case': 'CASE_UNKNOWN', - 'form': 'FORM_UNKNOWN', - 'gender': 'GENDER_UNKNOWN', - 'mood': 'MOOD_UNKNOWN', - 'number': 'NUMBER_UNKNOWN', - 'person': 'PERSON_UNKNOWN', - 'proper': 'PROPER_UNKNOWN', - 'reciprocity': 'RECIPROCITY_UNKNOWN', - 'tense': 'TENSE_UNKNOWN', - 'voice': 'VOICE_UNKNOWN', }, 'dependencyEdge': { 'headTokenIndex': 3, @@ -425,17 +390,6 @@ def test_analyze_syntax(self): }, 'partOfSpeech': { 'tag': 'NOUN', - 'aspect': 'ASPECT_UNKNOWN', - 'case': 'CASE_UNKNOWN', - 'form': 'FORM_UNKNOWN', - 'gender': 'GENDER_UNKNOWN', - 'mood': 'MOOD_UNKNOWN', - 'number': 'SINGULAR', - 'person': 'PERSON_UNKNOWN', - 'proper': 'PROPER', - 'reciprocity': 'RECIPROCITY_UNKNOWN', - 'tense': 'TENSE_UNKNOWN', - 'voice': 'VOICE_UNKNOWN', }, 'dependencyEdge': { 'headTokenIndex': 1, diff --git a/system_tests/language.py b/system_tests/language.py index 238d95d6ccbb..d978cda8e13a 100644 --- a/system_tests/language.py +++ b/system_tests/language.py @@ -126,7 +126,6 @@ def test_analyze_sentiment(self): def _verify_token(self, token, text_content, part_of_speech, lemma): from google.cloud.language.syntax import Token - from google.cloud.language.syntax import PartOfSpeech self.assertIsInstance(token, Token) self.assertEqual(token.text_content, text_content) @@ -134,7 +133,6 @@ def _verify_token(self, token, text_content, part_of_speech, lemma): self.assertEqual(token.lemma, lemma) def _check_analyze_syntax_result(self, tokens): - from google.cloud.language.syntax import Token from google.cloud.language.syntax import PartOfSpeech self.assertEqual(len(tokens), 3)