From 9267a850e68e61b97770c65b6ed7a6838aa97adf Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Mon, 22 Aug 2016 15:11:26 -0700 Subject: [PATCH] Adding document_from_blob() factory to language client. --- docs/language-usage.rst | 4 ++-- gcloud/language/client.py | 32 +++++++++++++++++++++++++++++++- gcloud/language/test_client.py | 17 +++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/docs/language-usage.rst b/docs/language-usage.rst index 013fe28cf36a5..8adc638b61ca2 100644 --- a/docs/language-usage.rst +++ b/docs/language-usage.rst @@ -127,8 +127,8 @@ to content stored in `Google Cloud Storage`_. We can use the .. code-block:: python - >>> document = client.document_from_blob(bucket='my-text-bucket', - ... blob='sentiment-me.txt') + >>> document = client.document_from_blob('my-text-bucket', + ... 'sentiment-me.txt') >>> document.gcs_url 'gs://my-text-bucket/sentiment-me.txt' >>> document.doc_type == language.Document.PLAIN_TEXT diff --git a/gcloud/language/client.py b/gcloud/language/client.py index 8552f011ed9dd..b3e80016c741d 100644 --- a/gcloud/language/client.py +++ b/gcloud/language/client.py @@ -101,6 +101,36 @@ def document_from_url(self, gcs_url, :class:`Document` constructor. :rtype: :class:`Document` - :returns: A plain-text document bound to this client. + :returns: A document bound to this client. """ return Document(self, gcs_url=gcs_url, doc_type=doc_type, **kwargs) + + + def document_from_blob(self, bucket_name, blob_name, + doc_type=Document.PLAIN_TEXT, **kwargs): + """Create a Cloud Storage document bound to this client. + + :type bucket_name: str + :param bucket_name: The name of the bucket that contains the + document text. + + :type blob_name: str + :param blob_name: The name of the blob (within the bucket) that + contains document text. + + :type doc_type: str + :param doc_type: (Optional) The type of text in the document. + Defaults to plain text. Can also be specified + as HTML via :attr:`~.Document.HTML`. + + :type kwargs: dict + :param kwargs: Remaining keyword arguments to be passed along to the + :class:`Document` constructor. + + :rtype: :class:`Document` + :returns: A document bound to this client. + """ + # NOTE: We assume that the bucket and blob name don't + # need to be URL-encoded. + gcs_url = 'gs://%s/%s' % (bucket_name, blob_name) + return self.document_from_url(gcs_url, doc_type=doc_type, **kwargs) diff --git a/gcloud/language/test_client.py b/gcloud/language/test_client.py index 4e96d1f9ea9f9..ca3c47ed38d34 100644 --- a/gcloud/language/test_client.py +++ b/gcloud/language/test_client.py @@ -121,6 +121,23 @@ def test_document_from_url_factory_explicit(self): self.assertEqual(document.doc_type, Document.HTML) self.assertEqual(document.encoding, encoding) + def test_document_from_blob_factory(self): + from gcloud.language.document import Document + + creds = _Credentials() + client = self._makeOne(project='PROJECT', + credentials=creds, http=object()) + + bucket_name = 'my-text-bucket' + blob_name = 'sentiment-me.txt' + gcs_url = 'gs://%s/%s' % (bucket_name, blob_name) + document = client.document_from_blob(bucket_name, blob_name) + self.assertIsInstance(document, Document) + self.assertIs(document.client, client) + self.assertIsNone(document.content) + self.assertEqual(document.gcs_url, gcs_url) + self.assertEqual(document.doc_type, Document.PLAIN_TEXT) + class _Credentials(object):