diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py new file mode 100644 index 000000000000..bdbf41e10f79 --- /dev/null +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -0,0 +1,51 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START vision_async_batch_annotate_images] + +from google.cloud import vision_v1 +from google.cloud.vision_v1 import enums + + +def sample_async_batch_annotate_images( + input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg", + output_uri="gs://your-bucket/prefix/", +): + """Perform async batch image annotation.""" + client = vision_v1.ImageAnnotatorClient() + + source = {"image_uri": input_image_uri} + image = {"source": source} + features = [ + {"type": enums.Feature.Type.LABEL_DETECTION}, + {"type": enums.Feature.Type.IMAGE_PROPERTIES}, + ] + requests = [{"image": image, "features": features}] + gcs_destination = {"uri": output_uri} + + # The max number of responses to output in each JSON file + batch_size = 2 + output_config = {"gcs_destination": gcs_destination, "batch_size": batch_size} + + operation = client.async_batch_annotate_images(requests, output_config) + + print("Waiting for operation to complete...") + response = operation.result() + + # The output is written to GCS with the provided output_uri as prefix + gcs_output_uri = response.output_config.gcs_destination.uri + print("Output written to GCS with prefix: {}".format(gcs_output_uri)) + + +# [END vision_async_batch_annotate_images] diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py new file mode 100644 index 000000000000..003a6fa894cb --- /dev/null +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py @@ -0,0 +1,62 @@ +# Copyright 2020 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid + +from google.cloud import storage +import pytest + +import vision_async_batch_annotate_images + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") +GCS_ROOT = "gs://cloud-samples-data/vision/" + +BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] +OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) +GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) + + +@pytest.fixture() +def storage_client(): + yield storage.Client() + + +@pytest.fixture() +def bucket(storage_client): + bucket = storage_client.get_bucket(BUCKET) + + try: + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + except Exception: + pass + + yield bucket + + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + + +def test_sample_asyn_batch_annotate_images(storage_client, bucket, capsys): + input_image_uri = os.path.join(GCS_ROOT, "label/wakeupcat.jpg") + + vision_async_batch_annotate_images.sample_async_batch_annotate_images( + input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI + ) + + out, _ = capsys.readouterr() + + assert "Output written to GCS" in out + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0 diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py new file mode 100644 index 000000000000..a612ed9d4bfe --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -0,0 +1,56 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START vision_batch_annotate_files] + +from google.cloud import vision_v1 +from google.cloud.vision_v1 import enums +import io + + +def sample_batch_annotate_files(file_path="path/to/your/document.pdf"): + """Perform batch file annotation.""" + client = vision_v1.ImageAnnotatorClient() + + # Supported mime_type: application/pdf, image/tiff, image/gif + mime_type = "application/pdf" + with io.open(file_path, "rb") as f: + content = f.read() + input_config = {"mime_type": mime_type, "content": content} + features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}] + + # The service can process up to 5 pages per document file. Here we specify + # the first, second, and last page of the document to be processed. + pages = [1, 2, -1] + requests = [{"input_config": input_config, "features": features, "pages": pages}] + + response = client.batch_annotate_files(requests) + for image_response in response.responses[0].responses: + print(u"Full text: {}".format(image_response.full_text_annotation.text)) + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print(u"\nBlock confidence: {}".format(block.confidence)) + for par in block.paragraphs: + print(u"\tParagraph confidence: {}".format(par.confidence)) + for word in par.words: + print(u"\t\tWord confidence: {}".format(word.confidence)) + for symbol in word.symbols: + print( + u"\t\t\tSymbol: {}, (confidence: {})".format( + symbol.text, symbol.confidence + ) + ) + + +# [END vision_batch_annotate_files] diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py new file mode 100644 index 000000000000..76c80a381b04 --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -0,0 +1,57 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START vision_batch_annotate_files_gcs] + +from google.cloud import vision_v1 +from google.cloud.vision_v1 import enums + + +def sample_batch_annotate_files( + storage_uri="gs://cloud-samples-data/vision/document_understanding/kafka.pdf", +): + """Perform batch file annotation.""" + mime_type = "application/pdf" + + client = vision_v1.ImageAnnotatorClient() + + gcs_source = {"uri": storage_uri} + input_config = {"gcs_source": gcs_source, "mime_type": mime_type} + features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}] + + # The service can process up to 5 pages per document file. + # Here we specify the first, second, and last page of the document to be + # processed. + pages = [1, 2, -1] + requests = [{"input_config": input_config, "features": features, "pages": pages}] + + response = client.batch_annotate_files(requests) + for image_response in response.responses[0].responses: + print(u"Full text: {}".format(image_response.full_text_annotation.text)) + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print(u"\nBlock confidence: {}".format(block.confidence)) + for par in block.paragraphs: + print(u"\tParagraph confidence: {}".format(par.confidence)) + for word in par.words: + print(u"\t\tWord confidence: {}".format(word.confidence)) + for symbol in word.symbols: + print( + u"\t\t\tSymbol: {}, (confidence: {})".format( + symbol.text, symbol.confidence + ) + ) + + +# [END vision_batch_annotate_files_gcs] diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py new file mode 100644 index 000000000000..6e86176232e1 --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py @@ -0,0 +1,30 @@ +# Copyright 2020 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import vision_batch_annotate_files_gcs + +GCS_ROOT = "gs://cloud-samples-data/vision/" + + +def test_sample_batch_annotate_files_gcs(capsys): + storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf") + + vision_batch_annotate_files_gcs.sample_batch_annotate_files(storage_uri=storage_uri) + + out, _ = capsys.readouterr() + + assert "Full text" in out + assert "Block confidence" in out diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_test.py new file mode 100644 index 000000000000..db07c42ffa26 --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files_test.py @@ -0,0 +1,30 @@ +# Copyright 2020 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import vision_batch_annotate_files + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def test_sample_batch_annotate_files(capsys): + file_path = os.path.join(RESOURCES, "kafka.pdf") + + vision_batch_annotate_files.sample_batch_annotate_files(file_path=file_path) + + out, _ = capsys.readouterr() + + assert "Full text" in out + assert "Block confidence" in out