From 1b4cc437208246493d49095179798c29113a33b5 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 22 Aug 2016 15:45:06 -0700 Subject: [PATCH 1/5] Adding Natural Language Entity for responses. --- docs/index.rst | 1 + docs/language-responses.rst | 9 ++++ gcloud/language/entity.py | 84 ++++++++++++++++++++++++++++++++++ gcloud/language/test_entity.py | 39 ++++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 docs/language-responses.rst create mode 100644 gcloud/language/entity.py create mode 100644 gcloud/language/test_entity.py diff --git a/docs/index.rst b/docs/index.rst index 5cee1dbb4f63..03d7a86007b2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -157,6 +157,7 @@ language-usage Client language-document + language-responses .. toctree:: :maxdepth: 0 diff --git a/docs/language-responses.rst b/docs/language-responses.rst new file mode 100644 index 000000000000..ee6b30e9f3b3 --- /dev/null +++ b/docs/language-responses.rst @@ -0,0 +1,9 @@ +Natural Language Response Classes +================================= + +Entity +~~~~~~ + +.. automodule:: gcloud.language.entity + :members: + :show-inheritance: diff --git a/gcloud/language/entity.py b/gcloud/language/entity.py new file mode 100644 index 000000000000..2c4a16648b25 --- /dev/null +++ b/gcloud/language/entity.py @@ -0,0 +1,84 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Definition for Google Cloud Natural Language API entities. + +An entity is used to describe a proper name extracted from text. +""" + + +class EntityType(object): + """List of possible entity types.""" + + UNKNOWN = 'UNKNOWN' + """Unknown entity type.""" + + PERSON = 'PERSON' + """Person entity type.""" + + LOCATION = 'LOCATION' + """Location entity type.""" + + ORGANIZATION = 'ORGANIZATION' + """Organization entity type.""" + + EVENT = 'EVENT' + """Event entity type.""" + + WORK_OF_ART = 'WORK_OF_ART' + """Work of art entity type.""" + + CONSUMER_GOOD = 'CONSUMER_GOOD' + """Consumer good entity type.""" + + OTHER = 'OTHER' + """Other entity type (i.e. known but not classified).""" + + + +class Entity(object): + """A Google Cloud Natural Language API entity. + + Represents a phrase in text that is a known entity, such as a person, + an organization, or location. The API associates information, such as + salience and mentions, with entities. + + See: + https://cloud.google.com/natural-language/reference/rest/v1beta1/Entity + + :type name: str + :param name: The name / phrase identified as the entity. + + :type entity_type: str + :param entity_type: The type of the entity. See + https://cloud.google.com/natural-language/\ + reference/rest/v1beta1/Entity#Type + + :type metadata: dict + :param metadata: The metadata associated with the entity. + + :type salience: float + :param salience: The prominence of the entity / phrase within the text + containing it. + + :type mentions: list + :param mentions: List of strings that mention the entity. + """ + + def __init__(self, name, entity_type, metadata, salience, mentions): + self.name = name + self.entity_type = entity_type + self.metadata = metadata + self.salience = salience + self.mentions = mentions diff --git a/gcloud/language/test_entity.py b/gcloud/language/test_entity.py new file mode 100644 index 000000000000..3ba644669de1 --- /dev/null +++ b/gcloud/language/test_entity.py @@ -0,0 +1,39 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestEntity(unittest.TestCase): + + def _getTargetClass(self): + from gcloud.language.entity import Entity + return Entity + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_constructor_defaults(self): + name = 'Italian' + entity_type = 'LOCATION' + metadata = {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} + salience = 0.19960518 + mentions = ['Italian'] + entity = self._makeOne(name, entity_type, metadata, + salience, mentions) + self.assertEqual(entity.name, name) + self.assertEqual(entity.entity_type, entity_type) + self.assertEqual(entity.metadata, metadata) + self.assertEqual(entity.salience, salience) + self.assertEqual(entity.mentions, mentions) From 53fefb9fce58e5977b896d954104d0dabbc0381b Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 22 Aug 2016 15:59:14 -0700 Subject: [PATCH 2/5] Adding Entity.from_api_repr() helper for language. --- gcloud/language/entity.py | 19 ++++++++++++++++++- gcloud/language/test_entity.py | 27 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/gcloud/language/entity.py b/gcloud/language/entity.py index 2c4a16648b25..c7254c06dc16 100644 --- a/gcloud/language/entity.py +++ b/gcloud/language/entity.py @@ -46,7 +46,6 @@ class EntityType(object): """Other entity type (i.e. known but not classified).""" - class Entity(object): """A Google Cloud Natural Language API entity. @@ -82,3 +81,21 @@ def __init__(self, name, entity_type, metadata, salience, mentions): self.metadata = metadata self.salience = salience self.mentions = mentions + + @classmethod + def from_api_repr(cls, payload): + """Convert an Entity from the JSON API into an :class:`Entity`. + + :param payload: dict + :type payload: The value from the backend. + + :rtype: :class:`Entity` + :returns: The entity parsed from the API representation. + """ + name = payload['name'] + entity_type = payload['type'] + metadata = payload['metadata'] + salience = payload['salience'] + mentions = [value['text']['content'] + for value in payload['mentions']] + return cls(name, entity_type, metadata, salience, mentions) diff --git a/gcloud/language/test_entity.py b/gcloud/language/test_entity.py index 3ba644669de1..7eab34a40101 100644 --- a/gcloud/language/test_entity.py +++ b/gcloud/language/test_entity.py @@ -37,3 +37,30 @@ def test_constructor_defaults(self): self.assertEqual(entity.metadata, metadata) self.assertEqual(entity.salience, salience) self.assertEqual(entity.mentions, mentions) + + def test_from_api_repr(self): + klass = self._getTargetClass() + name = 'Italy' + entity_type = 'LOCATION' + salience = 0.223 + metadata = {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} + mention1 = 'Italy' + mention2 = 'To Italy' + mention3 = 'From Italy' + payload = { + 'name': name, + 'type': entity_type, + 'salience': salience, + 'metadata': metadata, + 'mentions': [ + {'text': {'content': mention1}}, + {'text': {'content': mention2}}, + {'text': {'content': mention3}}, + ], + } + entity = klass.from_api_repr(payload) + self.assertEqual(entity.name, name) + self.assertEqual(entity.entity_type, entity_type) + self.assertEqual(entity.salience, salience) + self.assertEqual(entity.metadata, metadata) + self.assertEqual(entity.mentions, [mention1, mention2, mention3]) From 84e5e96da2733f386f1dfbbd202957fb93bf6145 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 22 Aug 2016 17:03:16 -0700 Subject: [PATCH 3/5] Adding Document.analyze_entities() in language. --- gcloud/language/document.py | 43 +++++++++++ gcloud/language/test_document.py | 124 +++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/gcloud/language/document.py b/gcloud/language/document.py index f25c85a4f4c7..11b9db5e242a 100644 --- a/gcloud/language/document.py +++ b/gcloud/language/document.py @@ -17,6 +17,8 @@ A document is used to hold text to be analyzed and annotated. """ +from gcloud.language.entity import Entity + DEFAULT_LANGUAGE = 'en' """Default document language, English.""" @@ -101,3 +103,44 @@ def __init__(self, client, content=None, gcs_url=None, doc_type=PLAIN_TEXT, self.doc_type = doc_type self.language = language self.encoding = encoding + + def _to_dict(self): + """Helper to convert the current document into a dictionary. + + To be used when constructing requests. + + :rtype: dict + :returns: The Document value as a JSON dictionary. + """ + info = { + 'type': self.doc_type, + 'language': self.language, + } + if self.content is not None: + info['content'] = self.content + elif self.gcs_url is not None: + info['gcsContentUri'] = self.gcs_url + return info + + def analyze_entities(self): + """Analyze the entities in the current document. + + Finds named entities (currently finds proper names as of August 2016) + in the text, entity types, salience, mentions for each entity, and + other properties. + + See: + https://cloud.google.com/natural-language/reference/\ + rest/v1beta1/documents/analyzeEntities + + :rtype: list + :returns: A list of :class:`Entity` returned from the API. + """ + data = { + 'document': self._to_dict(), + 'encodingType': self.encoding, + } + api_response = self.client.connection.api_request( + method='POST', path='analyzeEntities', data=data) + return [Entity.from_api_repr(entity) + for entity in api_response['entities']] diff --git a/gcloud/language/test_document.py b/gcloud/language/test_document.py index 2b52f13a7b31..66ee3d219079 100644 --- a/gcloud/language/test_document.py +++ b/gcloud/language/test_document.py @@ -62,3 +62,127 @@ def test_constructor_text_and_gcs(self): with self.assertRaises(ValueError): self._makeOne(None, content='abc', gcs_url='gs://some-bucket/some-obj.txt') + + def test__to_dict_with_content(self): + klass = self._getTargetClass() + content = 'Hello World' + document = self._makeOne(None, content=content) + info = document._to_dict() + self.assertEqual(info, { + 'content': content, + 'language': document.language, + 'type': klass.PLAIN_TEXT, + }) + + def test__to_dict_with_gcs(self): + klass = self._getTargetClass() + gcs_url = 'gs://some-bucket/some-obj.html' + document = self._makeOne(None, gcs_url=gcs_url) + info = document._to_dict() + self.assertEqual(info, { + 'gcsContentUri': gcs_url, + 'language': document.language, + 'type': klass.PLAIN_TEXT, + }) + + def test__to_dict_with_no_content(self): + klass = self._getTargetClass() + document = self._makeOne(None, content='') + document.content = None # Manually unset the content. + info = document._to_dict() + self.assertEqual(info, { + 'language': document.language, + 'type': klass.PLAIN_TEXT, + }) + + def test_analyze_entities(self): + from gcloud.language.entity import Entity + from gcloud.language.entity import EntityType + + name1 = 'R-O-C-K' + name2 = 'USA' + content = name1 + ' in the ' + name2 + metadata1 = { + 'wikipedia_url': 'http://en.wikipedia.org/wiki/Rock_music', + } + metadata2 = { + 'wikipedia_url': 'http://en.wikipedia.org/wiki/United_States', + } + salience1 = 0.91391456 + salience2 = 0.086085409 + response = { + 'entities': [ + { + 'name': name1, + 'type': EntityType.OTHER, + 'metadata': metadata1, + 'salience': salience1, + 'mentions': [ + { + 'text': { + 'content': name1, + 'beginOffset': -1 + } + } + ] + }, + { + 'name': name2, + 'type': EntityType.LOCATION, + 'metadata': metadata2, + 'salience': salience2, + 'mentions': [ + { + 'text': { + 'content': name2, + 'beginOffset': -1, + }, + }, + ], + }, + ], + 'language': 'en', + } + connection = _Connection(response) + client = _Client(connection=connection) + document = self._makeOne(client, content) + + entities = document.analyze_entities() + self.assertEqual(len(entities), 2) + entity1 = entities[0] + self.assertIsInstance(entity1, Entity) + self.assertEqual(entity1.name, name1) + self.assertEqual(entity1.entity_type, EntityType.OTHER) + self.assertEqual(entity1.metadata, metadata1) + self.assertEqual(entity1.salience, salience1) + self.assertEqual(entity1.mentions, [name1]) + entity2 = entities[1] + self.assertIsInstance(entity2, Entity) + self.assertEqual(entity2.name, name2) + self.assertEqual(entity2.entity_type, EntityType.LOCATION) + self.assertEqual(entity2.metadata, metadata2) + self.assertEqual(entity2.salience, salience2) + self.assertEqual(entity2.mentions, [name2]) + + # Verify the request. + self.assertEqual(len(connection._requested), 1) + req = connection._requested[0] + self.assertEqual(req['path'], 'analyzeEntities') + self.assertEqual(req['method'], 'POST') + + +class _Connection(object): + + def __init__(self, response): + self._response = response + self._requested = [] + + def api_request(self, **kwargs): + self._requested.append(kwargs) + return self._response + + +class _Client(object): + + def __init__(self, connection=None): + self.connection = connection From 9a47dedd221efd9dff229d19f188ba56f3ca16f1 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 22 Aug 2016 17:25:33 -0700 Subject: [PATCH 4/5] Adding system test for language Document.analyze_entities(). --- docs/language-usage.rst | 10 ++--- system_tests/attempt_system_tests.py | 1 + system_tests/language.py | 67 ++++++++++++++++++++++++++++ system_tests/run_system_test.py | 2 + 4 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 system_tests/language.py diff --git a/docs/language-usage.rst b/docs/language-usage.rst index 62ecff14fc01..e8d8212b10af 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -179,17 +179,17 @@ metadata and other properties. name: Michelangelo Caravaggio type: PERSON metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Caravaggio'} - salience: 0.75942981 + salience: 0.7615959 ==================== name: Italian type: LOCATION metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} - salience: 0.20193423 + salience: 0.19960518 ==================== name: The Calling of Saint Matthew - type: WORK_OF_ART - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/index.html?curid=2838808'} - salience: 0.03863598 + type: EVENT + metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/The_Calling_of_St_Matthew_(Caravaggio)'} + salience: 0.038798928 Analyze Sentiment ----------------- diff --git a/system_tests/attempt_system_tests.py b/system_tests/attempt_system_tests.py index 5c53db1c6184..ce97a97fd33c 100644 --- a/system_tests/attempt_system_tests.py +++ b/system_tests/attempt_system_tests.py @@ -35,6 +35,7 @@ 'storage', 'bigquery', 'pubsub', + 'language', 'logging', 'translate', 'monitoring', diff --git a/system_tests/language.py b/system_tests/language.py new file mode 100644 index 000000000000..56c8e373b526 --- /dev/null +++ b/system_tests/language.py @@ -0,0 +1,67 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from gcloud import language + + +class Config(object): + """Run-time configuration to be modified at set-up. + + This is a mutable stand-in to allow test set-up to modify + global state. + """ + CLIENT = None + + +def setUpModule(): + Config.CLIENT = language.Client() + + +class TestLanguage(unittest.TestCase): + + def test_analyze_entities(self): + from gcloud.language.entity import EntityType + + text_content = ("Michelangelo Caravaggio, Italian painter, is " + "known for 'The Calling of Saint Matthew'.") + document = Config.CLIENT.document_from_text(text_content) + entities = document.analyze_entities() + self.assertEqual(len(entities), 3) + entity1, entity2, entity3 = entities + # Verify entity 1. + self.assertEqual(entity1.name, 'Michelangelo Caravaggio') + self.assertEqual(entity1.entity_type, EntityType.PERSON) + self.assertTrue(0.7 < entity1.salience < 0.8) + self.assertEqual(entity1.mentions, [entity1.name]) + self.assertEqual(entity1.metadata, { + 'wikipedia_url': 'http://en.wikipedia.org/wiki/Caravaggio', + }) + # Verify entity 2. + self.assertEqual(entity2.name, 'Italian') + self.assertEqual(entity2.entity_type, EntityType.LOCATION) + self.assertTrue(0.15 < entity2.salience < 0.25) + self.assertEqual(entity2.mentions, [entity2.name]) + self.assertEqual(entity2.metadata, { + 'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy', + }) + # Verify entity 3. + self.assertEqual(entity3.name, 'The Calling of Saint Matthew') + self.assertEqual(entity3.entity_type, EntityType.EVENT) + self.assertTrue(0 < entity3.salience < 0.1) + self.assertEqual(entity3.mentions, [entity3.name]) + wiki_url = ('http://en.wikipedia.org/wiki/' + 'The_Calling_of_St_Matthew_(Caravaggio)') + self.assertEqual(entity3.metadata, {'wikipedia_url': wiki_url}) diff --git a/system_tests/run_system_test.py b/system_tests/run_system_test.py index 2fd6ff93fc2d..d3593db8be22 100644 --- a/system_tests/run_system_test.py +++ b/system_tests/run_system_test.py @@ -19,6 +19,7 @@ import bigquery import bigtable import datastore +import language import logging_ import monitoring import pubsub @@ -33,6 +34,7 @@ 'pubsub': pubsub, 'bigquery': bigquery, 'bigtable': bigtable, + 'language': language, 'logging': logging_, 'monitoring': monitoring, 'translate': translate, From b61b82c27302e90c23e17f2e9b8dd08fc00778d4 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 23 Aug 2016 13:39:57 -0700 Subject: [PATCH 5/5] Splitting out wikipedia_url as own property in language. --- docs/language-usage.rst | 54 ++++++++++++++++++-------------- gcloud/language/entity.py | 5 +++ gcloud/language/test_document.py | 17 +++++----- gcloud/language/test_entity.py | 15 ++++++--- system_tests/language.py | 15 ++++----- 5 files changed, 60 insertions(+), 46 deletions(-) diff --git a/docs/language-usage.rst b/docs/language-usage.rst index e8d8212b10af..c61076d6df2c 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -171,25 +171,29 @@ metadata and other properties. >>> entities = document.analyze_entities() >>> for entity in entities: ... print('=' * 20) - ... print(' name: %s' % (entity.name,)) - ... print(' type: %s' % (entity.entity_type,)) - ... print('metadata: %s' % (entity.metadata,)) - ... print('salience: %s' % (entity.salience,)) + ... print(' name: %s' % (entity.name,)) + ... print(' type: %s' % (entity.entity_type,)) + ... print('wikipedia_url: %s' % (entity.wikipedia_url,)) + ... print(' metadata: %s' % (entity.metadata,)) + ... print(' salience: %s' % (entity.salience,)) ==================== - name: Michelangelo Caravaggio - type: PERSON - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Caravaggio'} - salience: 0.7615959 + name: Michelangelo Caravaggio + type: PERSON + wikipedia_url: http://en.wikipedia.org/wiki/Caravaggio + metadata: {} + salience: 0.7615959 ==================== - name: Italian - type: LOCATION - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} - salience: 0.19960518 + name: Italian + type: LOCATION + wikipedia_url: http://en.wikipedia.org/wiki/Italy + metadata: {} + salience: 0.19960518 ==================== - name: The Calling of Saint Matthew - type: EVENT - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/The_Calling_of_St_Matthew_(Caravaggio)'} - salience: 0.038798928 + name: The Calling of Saint Matthew + type: EVENT + wikipedia_url: http://en.wikipedia.org/wiki/The_Calling_of_St_Matthew_(Caravaggio) + metadata: {} + salience: 0.038798928 Analyze Sentiment ----------------- @@ -266,14 +270,16 @@ the response is :data:`None`. >>> # Entities present if include_entities=True >>> for entity in annotations.entities: ... print('=' * 20) - ... print(' name: %s' % (entity.name,)) - ... print(' type: %s' % (entity.entity_type,)) - ... print('metadata: %s' % (entity.metadata,)) - ... print('salience: %s' % (entity.salience,)) + ... print(' name: %s' % (entity.name,)) + ... print(' type: %s' % (entity.entity_type,)) + ... print('wikipedia_url: %s' % (entity.wikipedia_url,)) + ... print(' metadata: %s' % (entity.metadata,)) + ... print(' salience: %s' % (entity.salience,)) ==================== - name: Moon - type: LOCATION - metadata: {'wikipedia_url': 'http://en.wikipedia.org/wiki/Natural_satellite'} - salience: 0.11793101 + name: Moon + type: LOCATION + wikipedia_url: http://en.wikipedia.org/wiki/Natural_satellite + metadata: {} + salience: 0.11793101 .. _Features: https://cloud.google.com/natural-language/reference/rest/v1beta1/documents/annotateText#Features diff --git a/gcloud/language/entity.py b/gcloud/language/entity.py index c7254c06dc16..0b1c26f92da4 100644 --- a/gcloud/language/entity.py +++ b/gcloud/language/entity.py @@ -53,6 +53,10 @@ class Entity(object): an organization, or location. The API associates information, such as salience and mentions, with entities. + The only supported metadata (as of August 2016) is ``wikipedia_url``, + so this value will be removed from the passed in ``metadata`` + and put in its own property. + See: https://cloud.google.com/natural-language/reference/rest/v1beta1/Entity @@ -78,6 +82,7 @@ class Entity(object): def __init__(self, name, entity_type, metadata, salience, mentions): self.name = name self.entity_type = entity_type + self.wikipedia_url = metadata.pop('wikipedia_url', None) self.metadata = metadata self.salience = salience self.mentions = mentions diff --git a/gcloud/language/test_document.py b/gcloud/language/test_document.py index 66ee3d219079..cd8eff3cdb97 100644 --- a/gcloud/language/test_document.py +++ b/gcloud/language/test_document.py @@ -102,12 +102,7 @@ def test_analyze_entities(self): name1 = 'R-O-C-K' name2 = 'USA' content = name1 + ' in the ' + name2 - metadata1 = { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/Rock_music', - } - metadata2 = { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/United_States', - } + wiki2 = 'http://en.wikipedia.org/wiki/United_States' salience1 = 0.91391456 salience2 = 0.086085409 response = { @@ -115,7 +110,7 @@ def test_analyze_entities(self): { 'name': name1, 'type': EntityType.OTHER, - 'metadata': metadata1, + 'metadata': {}, 'salience': salience1, 'mentions': [ { @@ -129,7 +124,7 @@ def test_analyze_entities(self): { 'name': name2, 'type': EntityType.LOCATION, - 'metadata': metadata2, + 'metadata': {'wikipedia_url': wiki2}, 'salience': salience2, 'mentions': [ { @@ -153,14 +148,16 @@ def test_analyze_entities(self): self.assertIsInstance(entity1, Entity) self.assertEqual(entity1.name, name1) self.assertEqual(entity1.entity_type, EntityType.OTHER) - self.assertEqual(entity1.metadata, metadata1) + self.assertEqual(entity1.wikipedia_url, None) + self.assertEqual(entity1.metadata, {}) self.assertEqual(entity1.salience, salience1) self.assertEqual(entity1.mentions, [name1]) entity2 = entities[1] self.assertIsInstance(entity2, Entity) self.assertEqual(entity2.name, name2) self.assertEqual(entity2.entity_type, EntityType.LOCATION) - self.assertEqual(entity2.metadata, metadata2) + self.assertEqual(entity2.wikipedia_url, wiki2) + self.assertEqual(entity2.metadata, {}) self.assertEqual(entity2.salience, salience2) self.assertEqual(entity2.mentions, [name2]) diff --git a/gcloud/language/test_entity.py b/gcloud/language/test_entity.py index 7eab34a40101..34dde32c0ae1 100644 --- a/gcloud/language/test_entity.py +++ b/gcloud/language/test_entity.py @@ -27,14 +27,18 @@ def _makeOne(self, *args, **kw): def test_constructor_defaults(self): name = 'Italian' entity_type = 'LOCATION' - metadata = {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} + wiki_url = 'http://en.wikipedia.org/wiki/Italy' + metadata = {'wikipedia_url': wiki_url} + base_metadata = {'foo': 'bar'} + metadata.update(base_metadata) salience = 0.19960518 mentions = ['Italian'] entity = self._makeOne(name, entity_type, metadata, salience, mentions) self.assertEqual(entity.name, name) self.assertEqual(entity.entity_type, entity_type) - self.assertEqual(entity.metadata, metadata) + self.assertEqual(entity.wikipedia_url, wiki_url) + self.assertEqual(entity.metadata, base_metadata) self.assertEqual(entity.salience, salience) self.assertEqual(entity.mentions, mentions) @@ -43,7 +47,7 @@ def test_from_api_repr(self): name = 'Italy' entity_type = 'LOCATION' salience = 0.223 - metadata = {'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy'} + wiki_url = 'http://en.wikipedia.org/wiki/Italy' mention1 = 'Italy' mention2 = 'To Italy' mention3 = 'From Italy' @@ -51,7 +55,7 @@ def test_from_api_repr(self): 'name': name, 'type': entity_type, 'salience': salience, - 'metadata': metadata, + 'metadata': {'wikipedia_url': wiki_url}, 'mentions': [ {'text': {'content': mention1}}, {'text': {'content': mention2}}, @@ -62,5 +66,6 @@ def test_from_api_repr(self): self.assertEqual(entity.name, name) self.assertEqual(entity.entity_type, entity_type) self.assertEqual(entity.salience, salience) - self.assertEqual(entity.metadata, metadata) + self.assertEqual(entity.wikipedia_url, wiki_url) + self.assertEqual(entity.metadata, {}) self.assertEqual(entity.mentions, [mention1, mention2, mention3]) diff --git a/system_tests/language.py b/system_tests/language.py index 56c8e373b526..c23afc8711dd 100644 --- a/system_tests/language.py +++ b/system_tests/language.py @@ -46,17 +46,17 @@ def test_analyze_entities(self): self.assertEqual(entity1.entity_type, EntityType.PERSON) self.assertTrue(0.7 < entity1.salience < 0.8) self.assertEqual(entity1.mentions, [entity1.name]) - self.assertEqual(entity1.metadata, { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/Caravaggio', - }) + self.assertEqual(entity1.wikipedia_url, + 'http://en.wikipedia.org/wiki/Caravaggio') + self.assertEqual(entity1.metadata, {}) # Verify entity 2. self.assertEqual(entity2.name, 'Italian') self.assertEqual(entity2.entity_type, EntityType.LOCATION) self.assertTrue(0.15 < entity2.salience < 0.25) self.assertEqual(entity2.mentions, [entity2.name]) - self.assertEqual(entity2.metadata, { - 'wikipedia_url': 'http://en.wikipedia.org/wiki/Italy', - }) + self.assertEqual(entity2.wikipedia_url, + 'http://en.wikipedia.org/wiki/Italy') + self.assertEqual(entity2.metadata, {}) # Verify entity 3. self.assertEqual(entity3.name, 'The Calling of Saint Matthew') self.assertEqual(entity3.entity_type, EntityType.EVENT) @@ -64,4 +64,5 @@ def test_analyze_entities(self): self.assertEqual(entity3.mentions, [entity3.name]) wiki_url = ('http://en.wikipedia.org/wiki/' 'The_Calling_of_St_Matthew_(Caravaggio)') - self.assertEqual(entity3.metadata, {'wikipedia_url': wiki_url}) + self.assertEqual(entity3.wikipedia_url, wiki_url) + self.assertEqual(entity3.metadata, {})