From 705b704209429e0977ccdf0e6014e4baccdec72f Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Sat, 1 Jun 2019 23:02:05 -0700 Subject: [PATCH 01/12] Add generated samples for Vision API --- .../vision_async_batch_annotate_images.py | 83 +++++++++++++++++ .../detect/vision_batch_annotate_files.py | 91 +++++++++++++++++++ .../detect/vision_batch_annotate_files_gcs.py | 82 +++++++++++++++++ 3 files changed, 256 insertions(+) create mode 100644 vision/cloud-client/detect/vision_async_batch_annotate_images.py create mode 100644 vision/cloud-client/detect/vision_batch_annotate_files.py create mode 100644 vision/cloud-client/detect/vision_batch_annotate_files_gcs.py diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py new file mode 100644 index 000000000000..0e3a652f7a99 --- /dev/null +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "vision_async_batch_annotate_images") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-vision + +import sys + +# [START vision_async_batch_annotate_images] + +from google.cloud import vision_v1 +from google.cloud.vision_v1 import enums +import six + +def sample_async_batch_annotate_images(input_image_uri, output_uri): + """Perform async batch image annotation""" + # [START vision_async_batch_annotate_images_core] + + client = vision_v1.ImageAnnotatorClient() + + # input_image_uri = 'gs://cloud-samples-data/vision/label/woman.jpg' + # output_uri = 'gs://your-bucket/prefix/' + + if isinstance(input_image_uri, six.binary_type): + input_image_uri = input_image_uri.decode('utf-8') + if isinstance(output_uri, six.binary_type): + output_uri = output_uri.decode('utf-8') + source = {'image_uri': input_image_uri} + image = {'source': source} + type_ = enums.Feature.Type.LABEL_DETECTION + features_element = {'type': type_} + type_2 = enums.Feature.Type.TEXT_DETECTION + features_element_2 = {'type': type_2} + type_3 = enums.Feature.Type.IMAGE_PROPERTIES + features_element_3 = {'type': type_3} + features = [features_element, features_element_2, features_element_3] + requests_element = {'image': image, 'features': features} + requests = [requests_element] + gcs_destination = {'uri': output_uri} + + # The max number of responses to output in each JSON file + batch_size = 2 + output_config = {'gcs_destination': gcs_destination, 'batch_size': batch_size} + + operation = client.async_batch_annotate_images(requests, output_config) + + print('Waiting for operation to complete...') + response = operation.result() + + # The output is written to GCS with the provided output_uri as prefix + gcs_output_uri = response.output_config.gcs_destination.uri + print('Output written to GCS with prefix: {}'.format(gcs_output_uri)) + + # [END vision_async_batch_annotate_images_core] +# [END vision_async_batch_annotate_images] + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--input_image_uri', type=str, default='gs://cloud-samples-data/vision/label/woman.jpg') + parser.add_argument('--output_uri', type=str, default='gs://your-bucket/prefix/') + args = parser.parse_args() + + sample_async_batch_annotate_images(args.input_image_uri, args.output_uri) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py new file mode 100644 index 000000000000..bb35870f3ee2 --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "vision_batch_annotate_files") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-vision + +import sys + +# [START vision_batch_annotate_files] + +from google.cloud import vision_v1 +from google.cloud.vision_v1 import enums +import io +import six + +def sample_batch_annotate_files(file_path): + """ + Perform batch file annotation + + Args: + file_path Path to local pdf file, e.g. /path/document.pdf + """ + # [START vision_batch_annotate_files_core] + + client = vision_v1.ImageAnnotatorClient() + + # file_path = 'resources/kafka.pdf' + + if isinstance(file_path, six.binary_type): + file_path = file_path.decode('utf-8') + + # Supported mime_type: application/pdf, image/tiff, image/gif + mime_type = 'application/pdf' + with io.open(file_path, 'rb') as f: + content = f.read() + input_config = {'mime_type': mime_type, 'content': content} + type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION + features_element = {'type': type_} + features = [features_element] + + # The service can process up to 5 pages per document file. Here we specify the + # first, second, and last page of the document to be processed. + pages_element = 1 + pages_element_2 = 2 + pages_element_3 = -1 + pages = [pages_element, pages_element_2, pages_element_3] + requests_element = {'input_config': input_config, 'features': features, 'pages': pages} + requests = [requests_element] + + response = client.batch_annotate_files(requests) + for image_response in response.responses[0].responses: + print('Full text: {}'.format(image_response.full_text_annotation.text)) + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print('\nBlock confidence: {}'.format(block.confidence)) + for par in block.paragraphs: + print('\tParagraph confidence: {}'.format(par.confidence)) + for word in par.words: + print('\t\tWord confidence: {}'.format(word.confidence)) + for symbol in word.symbols: + print('\t\t\tSymbol: {}, (confidence: {})'.format(symbol.text, symbol.confidence)) + + # [END vision_batch_annotate_files_core] +# [END vision_batch_annotate_files] + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--file_path', type=str, default='resources/kafka.pdf') + args = parser.parse_args() + + sample_batch_annotate_files(args.file_path) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py new file mode 100644 index 000000000000..96977cb18566 --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "vision_batch_annotate_files_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-vision + +import sys + +# [START vision_batch_annotate_files_gcs] + +from google.cloud import vision_v1 +from google.cloud.vision_v1 import enums +import six + +def sample_batch_annotate_files(gcs_uri): + """Perform batch file annotation""" + # [START vision_batch_annotate_files_gcs_core] + + client = vision_v1.ImageAnnotatorClient() + + # gcs_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' + + if isinstance(gcs_uri, six.binary_type): + gcs_uri = gcs_uri.decode('utf-8') + gcs_source = {'uri': gcs_uri} + input_config = {'gcs_source': gcs_source} + type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION + features_element = {'type': type_} + features = [features_element] + + # The service can process up to 5 pages per document file. Here we specify the + # first, second, and last page of the document to be processed. + pages_element = 1 + pages_element_2 = 2 + pages_element_3 = -1 + pages = [pages_element, pages_element_2, pages_element_3] + requests_element = {'input_config': input_config, 'features': features, 'pages': pages} + requests = [requests_element] + + response = client.batch_annotate_files(requests) + for image_response in response.responses[0].responses: + print('Full text: {}'.format(image_response.full_text_annotation.text)) + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + # The service also returns the bounding boxes for blocks, paragraphs, words, and symbols. + print('\nBlock confidence: {}'.format(block.confidence)) + for par in block.paragraphs: + print('\tParagraph confidence: {}'.format(par.confidence)) + for word in par.words: + print('\t\tWord confidence: {}'.format(word.confidence)) + for symbol in word.symbols: + print('\t\t\tSymbol: {}, (confidence: {})'.format(symbol.text, symbol.confidence)) + + # [END vision_batch_annotate_files_gcs_core] +# [END vision_batch_annotate_files_gcs] + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--gcs_uri', type=str, default='gs://cloud-samples-data/vision/document_understanding/kafka.pdf') + args = parser.parse_args() + + sample_batch_annotate_files(args.gcs_uri) + +if __name__ == '__main__': + main() \ No newline at end of file From aaca667fa64bc494c2b507b4eea261a5f851afc4 Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Fri, 7 Jun 2019 10:48:46 -0700 Subject: [PATCH 02/12] Add generated sample --- .../vision_async_batch_annotate_images.py | 14 ++++---- .../detect/vision_batch_annotate_files.py | 4 +++ .../detect/vision_batch_annotate_files_gcs.py | 32 ++++++++++++------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index 0e3a652f7a99..4a6da40e243c 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -19,6 +19,10 @@ # To install the latest published package dependency, execute the following: # pip install google-cloud-vision +# sample-metadata +# title: Async Batch Image Annotation +# description: Perform async batch image annotation +# usage: python3 samples/v1/vision_async_batch_annotate_images.py [--input_image_uri "gs://cloud-samples-data/vision/label/wakeupcat.jpg"] [--output_uri "gs://your-bucket/prefix/"] import sys # [START vision_async_batch_annotate_images] @@ -33,7 +37,7 @@ def sample_async_batch_annotate_images(input_image_uri, output_uri): client = vision_v1.ImageAnnotatorClient() - # input_image_uri = 'gs://cloud-samples-data/vision/label/woman.jpg' + # input_image_uri = 'gs://cloud-samples-data/vision/label/wakeupcat.jpg' # output_uri = 'gs://your-bucket/prefix/' if isinstance(input_image_uri, six.binary_type): @@ -44,11 +48,9 @@ def sample_async_batch_annotate_images(input_image_uri, output_uri): image = {'source': source} type_ = enums.Feature.Type.LABEL_DETECTION features_element = {'type': type_} - type_2 = enums.Feature.Type.TEXT_DETECTION + type_2 = enums.Feature.Type.IMAGE_PROPERTIES features_element_2 = {'type': type_2} - type_3 = enums.Feature.Type.IMAGE_PROPERTIES - features_element_3 = {'type': type_3} - features = [features_element, features_element_2, features_element_3] + features = [features_element, features_element_2] requests_element = {'image': image, 'features': features} requests = [requests_element] gcs_destination = {'uri': output_uri} @@ -73,7 +75,7 @@ def main(): import argparse parser = argparse.ArgumentParser() - parser.add_argument('--input_image_uri', type=str, default='gs://cloud-samples-data/vision/label/woman.jpg') + parser.add_argument('--input_image_uri', type=str, default='gs://cloud-samples-data/vision/label/wakeupcat.jpg') parser.add_argument('--output_uri', type=str, default='gs://your-bucket/prefix/') args = parser.parse_args() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index bb35870f3ee2..b43b7752deec 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -19,6 +19,10 @@ # To install the latest published package dependency, execute the following: # pip install google-cloud-vision +# sample-metadata +# title: +# description: Perform batch file annotation +# usage: python3 samples/v1/vision_batch_annotate_files.py [--file_path "resources/kafka.pdf"] import sys # [START vision_batch_annotate_files] diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 96977cb18566..52d51813fbe3 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -19,6 +19,10 @@ # To install the latest published package dependency, execute the following: # pip install google-cloud-vision +# sample-metadata +# title: +# description: Perform batch file annotation +# usage: python3 samples/v1/vision_batch_annotate_files_gcs.py [--storage_uri "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"] import sys # [START vision_batch_annotate_files_gcs] @@ -27,24 +31,31 @@ from google.cloud.vision_v1 import enums import six -def sample_batch_annotate_files(gcs_uri): - """Perform batch file annotation""" +def sample_batch_annotate_files(storage_uri): + """ + Perform batch file annotation + + Args: + storage_uri Cloud Storage URI to source image in the format gs://[bucket]/ + [file] + """ # [START vision_batch_annotate_files_gcs_core] client = vision_v1.ImageAnnotatorClient() - # gcs_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' + # storage_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' - if isinstance(gcs_uri, six.binary_type): - gcs_uri = gcs_uri.decode('utf-8') - gcs_source = {'uri': gcs_uri} + if isinstance(storage_uri, six.binary_type): + storage_uri = storage_uri.decode('utf-8') + gcs_source = {'uri': storage_uri} input_config = {'gcs_source': gcs_source} type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION features_element = {'type': type_} features = [features_element] - # The service can process up to 5 pages per document file. Here we specify the - # first, second, and last page of the document to be processed. + # The service can process up to 5 pages per document file. + # Here we specify the first, second, and last page of the document to be + # processed. pages_element = 1 pages_element_2 = 2 pages_element_3 = -1 @@ -57,7 +68,6 @@ def sample_batch_annotate_files(gcs_uri): print('Full text: {}'.format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: - # The service also returns the bounding boxes for blocks, paragraphs, words, and symbols. print('\nBlock confidence: {}'.format(block.confidence)) for par in block.paragraphs: print('\tParagraph confidence: {}'.format(par.confidence)) @@ -73,10 +83,10 @@ def main(): import argparse parser = argparse.ArgumentParser() - parser.add_argument('--gcs_uri', type=str, default='gs://cloud-samples-data/vision/document_understanding/kafka.pdf') + parser.add_argument('--storage_uri', type=str, default='gs://cloud-samples-data/vision/document_understanding/kafka.pdf') args = parser.parse_args() - sample_batch_annotate_files(args.gcs_uri) + sample_batch_annotate_files(args.storage_uri) if __name__ == '__main__': main() \ No newline at end of file From 36ab341bbaf31294de6e2363e3734f00a8f05ead Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Tue, 21 Jan 2020 13:39:14 -0800 Subject: [PATCH 03/12] vision: resolve lint issues --- .../vision_async_batch_annotate_images.py | 110 ++++++++------- .../detect/vision_batch_annotate_files.py | 119 +++++++++-------- .../detect/vision_batch_annotate_files_gcs.py | 126 +++++++++++------- 3 files changed, 204 insertions(+), 151 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index 4a6da40e243c..d6cc1d4b0575 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "vision_async_batch_annotate_images") +# DO NOT EDIT! This is a generated sample +# ("LongRunningPromise", "vision_async_batch_annotate_images") # To install the latest published package dependency, execute the following: # pip install google-cloud-vision @@ -22,8 +23,10 @@ # sample-metadata # title: Async Batch Image Annotation # description: Perform async batch image annotation -# usage: python3 samples/v1/vision_async_batch_annotate_images.py [--input_image_uri "gs://cloud-samples-data/vision/label/wakeupcat.jpg"] [--output_uri "gs://your-bucket/prefix/"] -import sys +# usage: python3 samples/v1/vision_async_batch_annotate_images.py \ +# [--input_image_uri \ +# "gs://cloud-samples-data/vision/label/wakeupcat.jpg"] \ +# [--output_uri "gs://your-bucket/prefix/"] # [START vision_async_batch_annotate_images] @@ -31,55 +34,66 @@ from google.cloud.vision_v1 import enums import six + def sample_async_batch_annotate_images(input_image_uri, output_uri): - """Perform async batch image annotation""" - # [START vision_async_batch_annotate_images_core] - - client = vision_v1.ImageAnnotatorClient() - - # input_image_uri = 'gs://cloud-samples-data/vision/label/wakeupcat.jpg' - # output_uri = 'gs://your-bucket/prefix/' - - if isinstance(input_image_uri, six.binary_type): - input_image_uri = input_image_uri.decode('utf-8') - if isinstance(output_uri, six.binary_type): - output_uri = output_uri.decode('utf-8') - source = {'image_uri': input_image_uri} - image = {'source': source} - type_ = enums.Feature.Type.LABEL_DETECTION - features_element = {'type': type_} - type_2 = enums.Feature.Type.IMAGE_PROPERTIES - features_element_2 = {'type': type_2} - features = [features_element, features_element_2] - requests_element = {'image': image, 'features': features} - requests = [requests_element] - gcs_destination = {'uri': output_uri} - - # The max number of responses to output in each JSON file - batch_size = 2 - output_config = {'gcs_destination': gcs_destination, 'batch_size': batch_size} - - operation = client.async_batch_annotate_images(requests, output_config) - - print('Waiting for operation to complete...') - response = operation.result() - - # The output is written to GCS with the provided output_uri as prefix - gcs_output_uri = response.output_config.gcs_destination.uri - print('Output written to GCS with prefix: {}'.format(gcs_output_uri)) - - # [END vision_async_batch_annotate_images_core] + """Perform async batch image annotation""" + # [START vision_async_batch_annotate_images_core] + + client = vision_v1.ImageAnnotatorClient() + + # input_image_uri = 'gs://cloud-samples-data/vision/label/wakeupcat.jpg' + # output_uri = 'gs://your-bucket/prefix/' + + if isinstance(input_image_uri, six.binary_type): + input_image_uri = input_image_uri.decode("utf-8") + if isinstance(output_uri, six.binary_type): + output_uri = output_uri.decode("utf-8") + source = {"image_uri": input_image_uri} + image = {"source": source} + type_ = enums.Feature.Type.LABEL_DETECTION + features_element = {"type": type_} + type_2 = enums.Feature.Type.IMAGE_PROPERTIES + features_element_2 = {"type": type_2} + features = [features_element, features_element_2] + requests_element = {"image": image, "features": features} + requests = [requests_element] + gcs_destination = {"uri": output_uri} + + # The max number of responses to output in each JSON file + batch_size = 2 + output_config = { + "gcs_destination": gcs_destination, "batch_size": batch_size} + + operation = client.async_batch_annotate_images(requests, output_config) + + print("Waiting for operation to complete...") + response = operation.result() + + # The output is written to GCS with the provided output_uri as prefix + gcs_output_uri = response.output_config.gcs_destination.uri + print("Output written to GCS with prefix: {}".format(gcs_output_uri)) + + # [END vision_async_batch_annotate_images_core] + + # [END vision_async_batch_annotate_images] + def main(): - import argparse + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_image_uri", + type=str, + default="gs://cloud-samples-data/vision/label/wakeupcat.jpg", + ) + parser.add_argument( + "--output_uri", type=str, default="gs://your-bucket/prefix/") + args = parser.parse_args() - parser = argparse.ArgumentParser() - parser.add_argument('--input_image_uri', type=str, default='gs://cloud-samples-data/vision/label/wakeupcat.jpg') - parser.add_argument('--output_uri', type=str, default='gs://your-bucket/prefix/') - args = parser.parse_args() + sample_async_batch_annotate_images(args.input_image_uri, args.output_uri) - sample_async_batch_annotate_images(args.input_image_uri, args.output_uri) -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index b43b7752deec..cfff97f84cb3 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# DO NOT EDIT! This is a generated sample ("Request", "vision_batch_annotate_files") +# DO NOT EDIT! This is a generated sample +# ("Request", "vision_batch_annotate_files") # To install the latest published package dependency, execute the following: # pip install google-cloud-vision @@ -22,8 +23,8 @@ # sample-metadata # title: # description: Perform batch file annotation -# usage: python3 samples/v1/vision_batch_annotate_files.py [--file_path "resources/kafka.pdf"] -import sys +# usage: python3 samples/v1/vision_batch_annotate_files.py \ +# [--file_path "resources/kafka.pdf"] # [START vision_batch_annotate_files] @@ -32,64 +33,78 @@ import io import six + def sample_batch_annotate_files(file_path): - """ + """ Perform batch file annotation Args: file_path Path to local pdf file, e.g. /path/document.pdf """ - # [START vision_batch_annotate_files_core] - - client = vision_v1.ImageAnnotatorClient() - - # file_path = 'resources/kafka.pdf' - - if isinstance(file_path, six.binary_type): - file_path = file_path.decode('utf-8') - - # Supported mime_type: application/pdf, image/tiff, image/gif - mime_type = 'application/pdf' - with io.open(file_path, 'rb') as f: - content = f.read() - input_config = {'mime_type': mime_type, 'content': content} - type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION - features_element = {'type': type_} - features = [features_element] - - # The service can process up to 5 pages per document file. Here we specify the - # first, second, and last page of the document to be processed. - pages_element = 1 - pages_element_2 = 2 - pages_element_3 = -1 - pages = [pages_element, pages_element_2, pages_element_3] - requests_element = {'input_config': input_config, 'features': features, 'pages': pages} - requests = [requests_element] - - response = client.batch_annotate_files(requests) - for image_response in response.responses[0].responses: - print('Full text: {}'.format(image_response.full_text_annotation.text)) - for page in image_response.full_text_annotation.pages: - for block in page.blocks: - print('\nBlock confidence: {}'.format(block.confidence)) - for par in block.paragraphs: - print('\tParagraph confidence: {}'.format(par.confidence)) - for word in par.words: - print('\t\tWord confidence: {}'.format(word.confidence)) - for symbol in word.symbols: - print('\t\t\tSymbol: {}, (confidence: {})'.format(symbol.text, symbol.confidence)) - - # [END vision_batch_annotate_files_core] + # [START vision_batch_annotate_files_core] + + client = vision_v1.ImageAnnotatorClient() + + # file_path = 'resources/kafka.pdf' + + if isinstance(file_path, six.binary_type): + file_path = file_path.decode("utf-8") + + # Supported mime_type: application/pdf, image/tiff, image/gif + mime_type = "application/pdf" + with io.open(file_path, "rb") as f: + content = f.read() + input_config = {"mime_type": mime_type, "content": content} + type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION + features_element = {"type": type_} + features = [features_element] + + # The service can process up to 5 pages per document file. Here we specify + # the first, second, and last page of the document to be processed. + pages_element = 1 + pages_element_2 = 2 + pages_element_3 = -1 + pages = [pages_element, pages_element_2, pages_element_3] + requests_element = { + "input_config": input_config, + "features": features, + "pages": pages, + } + requests = [requests_element] + + response = client.batch_annotate_files(requests) + for image_response in response.responses[0].responses: + print("Full text: {}".format(image_response.full_text_annotation.text)) + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print("\nBlock confidence: {}".format(block.confidence)) + for par in block.paragraphs: + print("\tParagraph confidence: {}".format(par.confidence)) + for word in par.words: + print( + "\t\tWord confidence: {}".format(word.confidence)) + for symbol in word.symbols: + print( + "\t\t\tSymbol: {}, (confidence: {})".format( + symbol.text, symbol.confidence + ) + ) + + # [END vision_batch_annotate_files_core] + + # [END vision_batch_annotate_files] + def main(): - import argparse + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--file_path", type=str, default="resources/kafka.pdf") + args = parser.parse_args() - parser = argparse.ArgumentParser() - parser.add_argument('--file_path', type=str, default='resources/kafka.pdf') - args = parser.parse_args() + sample_batch_annotate_files(args.file_path) - sample_batch_annotate_files(args.file_path) -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 52d51813fbe3..677beb67aa38 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# DO NOT EDIT! This is a generated sample ("Request", "vision_batch_annotate_files_gcs") +# DO NOT EDIT! This is a generated sample +# ("Request", "vision_batch_annotate_files_gcs") # To install the latest published package dependency, execute the following: # pip install google-cloud-vision @@ -22,8 +23,9 @@ # sample-metadata # title: # description: Perform batch file annotation -# usage: python3 samples/v1/vision_batch_annotate_files_gcs.py [--storage_uri "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"] -import sys +# usage: python3 samples/v1/vision_batch_annotate_files_gcs.py \ +# [--storage_uri \ +# "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"] # [START vision_batch_annotate_files_gcs] @@ -31,62 +33,84 @@ from google.cloud.vision_v1 import enums import six + def sample_batch_annotate_files(storage_uri): - """ + """ Perform batch file annotation Args: - storage_uri Cloud Storage URI to source image in the format gs://[bucket]/ - [file] + storage_uri Cloud Storage URI to source image in the format + gs://[bucket]/ [file] """ - # [START vision_batch_annotate_files_gcs_core] - - client = vision_v1.ImageAnnotatorClient() - - # storage_uri = 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' - - if isinstance(storage_uri, six.binary_type): - storage_uri = storage_uri.decode('utf-8') - gcs_source = {'uri': storage_uri} - input_config = {'gcs_source': gcs_source} - type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION - features_element = {'type': type_} - features = [features_element] - - # The service can process up to 5 pages per document file. - # Here we specify the first, second, and last page of the document to be - # processed. - pages_element = 1 - pages_element_2 = 2 - pages_element_3 = -1 - pages = [pages_element, pages_element_2, pages_element_3] - requests_element = {'input_config': input_config, 'features': features, 'pages': pages} - requests = [requests_element] - - response = client.batch_annotate_files(requests) - for image_response in response.responses[0].responses: - print('Full text: {}'.format(image_response.full_text_annotation.text)) - for page in image_response.full_text_annotation.pages: - for block in page.blocks: - print('\nBlock confidence: {}'.format(block.confidence)) - for par in block.paragraphs: - print('\tParagraph confidence: {}'.format(par.confidence)) - for word in par.words: - print('\t\tWord confidence: {}'.format(word.confidence)) - for symbol in word.symbols: - print('\t\t\tSymbol: {}, (confidence: {})'.format(symbol.text, symbol.confidence)) - - # [END vision_batch_annotate_files_gcs_core] + # [START vision_batch_annotate_files_gcs_core] + + client = vision_v1.ImageAnnotatorClient() + + # storage_uri = ( + # 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' + # ) + + if isinstance(storage_uri, six.binary_type): + storage_uri = storage_uri.decode("utf-8") + gcs_source = {"uri": storage_uri} + input_config = {"gcs_source": gcs_source} + type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION + features_element = {"type": type_} + features = [features_element] + + # The service can process up to 5 pages per document file. + # Here we specify the first, second, and last page of the document to be + # processed. + pages_element = 1 + pages_element_2 = 2 + pages_element_3 = -1 + pages = [pages_element, pages_element_2, pages_element_3] + requests_element = { + "input_config": input_config, + "features": features, + "pages": pages, + } + requests = [requests_element] + + response = client.batch_annotate_files(requests) + for image_response in response.responses[0].responses: + print("Full text: {}".format(image_response.full_text_annotation.text)) + for page in image_response.full_text_annotation.pages: + for block in page.blocks: + print("\nBlock confidence: {}".format(block.confidence)) + for par in block.paragraphs: + print("\tParagraph confidence: {}".format(par.confidence)) + for word in par.words: + print( + "\t\tWord confidence: {}".format(word.confidence)) + for symbol in word.symbols: + print( + "\t\t\tSymbol: {}, (confidence: {})".format( + symbol.text, symbol.confidence + ) + ) + + # [END vision_batch_annotate_files_gcs_core] + + # [END vision_batch_annotate_files_gcs] + def main(): - import argparse + import argparse + + storage_uri_default = ( + "gs://cloud-samples-data/vision/document_understanding/kafka.pdf" + ) + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", type=str, default=storage_uri_default, + ) + args = parser.parse_args() - parser = argparse.ArgumentParser() - parser.add_argument('--storage_uri', type=str, default='gs://cloud-samples-data/vision/document_understanding/kafka.pdf') - args = parser.parse_args() + sample_batch_annotate_files(args.storage_uri) - sample_batch_annotate_files(args.storage_uri) -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() From 88b647a17c200823f57cbdd834a4ebe560f15c2b Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Wed, 22 Jan 2020 00:05:29 +0000 Subject: [PATCH 04/12] vision: add test for async_batch_annotate_images --- ...vision_async_batch_annotate_images_test.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 vision/cloud-client/detect/vision_async_batch_annotate_images_test.py diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py new file mode 100644 index 000000000000..59cbcb4cb6d6 --- /dev/null +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py @@ -0,0 +1,53 @@ +# Copyright 2019 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid + +from google.cloud import storage + +import vision_async_batch_annotate_images + +RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') +GCS_ROOT = 'gs://cloud-samples-data/vision/' + +BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] +OUTPUT_PREFIX = 'TEST_OUTPUT_{}'.format(uuid.uuid4()) +GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX) + + +def test_sample_asyn_batch_annotate_images(capsys): + storage_client = storage.Client() + bucket = storage_client.get_bucket(BUCKET) + if len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0: + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + + input_image_uri = os.path.join(GCS_ROOT, 'label/wakeupcat.jpg') + + vision_async_batch_annotate_images.sample_async_batch_annotate_images( + input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI) + + out, _ = capsys.readouterr() + + assert 'Output written to GCS' in out + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0 + + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + From 2b5a3fe2e495506e726243ce42cf7872f74382a5 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Wed, 22 Jan 2020 00:35:18 +0000 Subject: [PATCH 05/12] vision: add test for batch_annotate_files_gcs fix: add required attribute mime_type fix: lint errors in async_batch_annotate_images_test --- ...vision_async_batch_annotate_images_test.py | 42 +++++++++---------- .../detect/vision_batch_annotate_files_gcs.py | 4 +- .../vision_batch_annotate_files_gcs_test.py | 38 +++++++++++++++++ 3 files changed, 62 insertions(+), 22 deletions(-) create mode 100644 vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py index 59cbcb4cb6d6..48ed661542d5 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py @@ -19,35 +19,35 @@ import vision_async_batch_annotate_images -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') -GCS_ROOT = 'gs://cloud-samples-data/vision/' +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") +GCS_ROOT = "gs://cloud-samples-data/vision/" -BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] -OUTPUT_PREFIX = 'TEST_OUTPUT_{}'.format(uuid.uuid4()) -GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX) +BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] +OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) +GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) def test_sample_asyn_batch_annotate_images(capsys): - storage_client = storage.Client() - bucket = storage_client.get_bucket(BUCKET) - if len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0: - for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): - blob.delete() + storage_client = storage.Client() + bucket = storage_client.get_bucket(BUCKET) + if len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0: + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 - input_image_uri = os.path.join(GCS_ROOT, 'label/wakeupcat.jpg') + input_image_uri = os.path.join(GCS_ROOT, "label/wakeupcat.jpg") - vision_async_batch_annotate_images.sample_async_batch_annotate_images( - input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI) + vision_async_batch_annotate_images.sample_async_batch_annotate_images( + input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI + ) - out, _ = capsys.readouterr() - - assert 'Output written to GCS' in out - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0 + out, _ = capsys.readouterr() - for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): - blob.delete() + assert "Output written to GCS" in out + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0 - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 677beb67aa38..3f6c714ff0c4 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -43,6 +43,7 @@ def sample_batch_annotate_files(storage_uri): gs://[bucket]/ [file] """ # [START vision_batch_annotate_files_gcs_core] + mime_type = 'application/pdf' client = vision_v1.ImageAnnotatorClient() @@ -53,7 +54,8 @@ def sample_batch_annotate_files(storage_uri): if isinstance(storage_uri, six.binary_type): storage_uri = storage_uri.decode("utf-8") gcs_source = {"uri": storage_uri} - input_config = {"gcs_source": gcs_source} + input_config = {"gcs_source": gcs_source, + "mime_type": mime_type} type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION features_element = {"type": type_} features = [features_element] diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py new file mode 100644 index 000000000000..51fa44ca3f91 --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid + +import vision_batch_annotate_files_gcs + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") +GCS_ROOT = "gs://cloud-samples-data/vision/" + +BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] +OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) +GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) + + +def test_sample_batch_annotate_files_gcs(capsys): + storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf") + + vision_batch_annotate_files_gcs.sample_batch_annotate_files( + storage_uri=storage_uri + ) + + out, _ = capsys.readouterr() + + assert "Full text" in out + assert "Block confidence" in out From e24b100984f391a1aac7c925ab7c07d9553572d3 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Wed, 22 Jan 2020 01:37:36 +0000 Subject: [PATCH 06/12] vision: add test for batch_annotate_files fix: encoding error in py2 docs: update year in copyright header --- ...vision_async_batch_annotate_images_test.py | 2 +- .../detect/vision_batch_annotate_files.py | 11 +++--- .../detect/vision_batch_annotate_files_gcs.py | 16 ++++---- .../vision_batch_annotate_files_gcs_test.py | 4 +- .../vision_batch_annotate_files_test.py | 37 +++++++++++++++++++ 5 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 vision/cloud-client/detect/vision_batch_annotate_files_test.py diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py index 48ed661542d5..bdfbb94dee07 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google +# Copyright 2020 Google # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index cfff97f84cb3..2ab18a23d1fe 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -74,18 +74,17 @@ def sample_batch_annotate_files(file_path): response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: - print("Full text: {}".format(image_response.full_text_annotation.text)) + print(u"Full text: {}".format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: - print("\nBlock confidence: {}".format(block.confidence)) + print(u"\nBlock confidence: {}".format(block.confidence)) for par in block.paragraphs: - print("\tParagraph confidence: {}".format(par.confidence)) + print(u"\tParagraph confidence: {}".format(par.confidence)) for word in par.words: - print( - "\t\tWord confidence: {}".format(word.confidence)) + print(u"\t\tWord confidence: {}".format(word.confidence)) for symbol in word.symbols: print( - "\t\t\tSymbol: {}, (confidence: {})".format( + u"\t\t\tSymbol: {}, (confidence: {})".format( symbol.text, symbol.confidence ) ) diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 3f6c714ff0c4..9548d6230b09 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -43,7 +43,7 @@ def sample_batch_annotate_files(storage_uri): gs://[bucket]/ [file] """ # [START vision_batch_annotate_files_gcs_core] - mime_type = 'application/pdf' + mime_type = "application/pdf" client = vision_v1.ImageAnnotatorClient() @@ -54,8 +54,7 @@ def sample_batch_annotate_files(storage_uri): if isinstance(storage_uri, six.binary_type): storage_uri = storage_uri.decode("utf-8") gcs_source = {"uri": storage_uri} - input_config = {"gcs_source": gcs_source, - "mime_type": mime_type} + input_config = {"gcs_source": gcs_source, "mime_type": mime_type} type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION features_element = {"type": type_} features = [features_element] @@ -76,18 +75,17 @@ def sample_batch_annotate_files(storage_uri): response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: - print("Full text: {}".format(image_response.full_text_annotation.text)) + print(u"Full text: {}".format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: - print("\nBlock confidence: {}".format(block.confidence)) + print(u"\nBlock confidence: {}".format(block.confidence)) for par in block.paragraphs: - print("\tParagraph confidence: {}".format(par.confidence)) + print(u"\tParagraph confidence: {}".format(par.confidence)) for word in par.words: - print( - "\t\tWord confidence: {}".format(word.confidence)) + print(u"\t\tWord confidence: {}".format(word.confidence)) for symbol in word.symbols: print( - "\t\t\tSymbol: {}, (confidence: {})".format( + u"\t\t\tSymbol: {}, (confidence: {})".format( symbol.text, symbol.confidence ) ) diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py index 51fa44ca3f91..c04ef1e64482 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py @@ -28,9 +28,7 @@ def test_sample_batch_annotate_files_gcs(capsys): storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf") - vision_batch_annotate_files_gcs.sample_batch_annotate_files( - storage_uri=storage_uri - ) + vision_batch_annotate_files_gcs.sample_batch_annotate_files(storage_uri=storage_uri) out, _ = capsys.readouterr() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_test.py new file mode 100644 index 000000000000..b17bfc4927ac --- /dev/null +++ b/vision/cloud-client/detect/vision_batch_annotate_files_test.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid + +import vision_batch_annotate_files + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") +GCS_ROOT = "gs://cloud-samples-data/vision/" + +BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] +OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) +GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) + + +def test_sample_batch_annotate_files(capsys): + file_path = "resources/kafka.pdf" + + vision_batch_annotate_files.sample_batch_annotate_files(file_path=file_path) + + out, _ = capsys.readouterr() + + assert "Full text" in out + assert "Block confidence" in out From 4e6f203f4ef4bc26e4ed13ce058cd5e1d13c3244 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Tue, 21 Jan 2020 17:45:03 -0800 Subject: [PATCH 07/12] fix: pesky lint issues --- vision/cloud-client/detect/vision_batch_annotate_files.py | 6 ++++-- .../cloud-client/detect/vision_batch_annotate_files_gcs.py | 6 ++++-- .../detect/vision_batch_annotate_files_gcs_test.py | 3 ++- .../cloud-client/detect/vision_batch_annotate_files_test.py | 3 ++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index 2ab18a23d1fe..06880e7e64bc 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -74,14 +74,16 @@ def sample_batch_annotate_files(file_path): response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: - print(u"Full text: {}".format(image_response.full_text_annotation.text)) + print( + u"Full text: {}".format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: print(u"\nBlock confidence: {}".format(block.confidence)) for par in block.paragraphs: print(u"\tParagraph confidence: {}".format(par.confidence)) for word in par.words: - print(u"\t\tWord confidence: {}".format(word.confidence)) + print( + u"\t\tWord confidence: {}".format(word.confidence)) for symbol in word.symbols: print( u"\t\t\tSymbol: {}, (confidence: {})".format( diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 9548d6230b09..0dbd65ea4bdc 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -75,14 +75,16 @@ def sample_batch_annotate_files(storage_uri): response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: - print(u"Full text: {}".format(image_response.full_text_annotation.text)) + print( + u"Full text: {}".format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: print(u"\nBlock confidence: {}".format(block.confidence)) for par in block.paragraphs: print(u"\tParagraph confidence: {}".format(par.confidence)) for word in par.words: - print(u"\t\tWord confidence: {}".format(word.confidence)) + print( + u"\t\tWord confidence: {}".format(word.confidence)) for symbol in word.symbols: print( u"\t\t\tSymbol: {}, (confidence: {})".format( diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py index c04ef1e64482..30c6e1c0ce7e 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py @@ -28,7 +28,8 @@ def test_sample_batch_annotate_files_gcs(capsys): storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf") - vision_batch_annotate_files_gcs.sample_batch_annotate_files(storage_uri=storage_uri) + vision_batch_annotate_files_gcs.sample_batch_annotate_files( + storage_uri=storage_uri) out, _ = capsys.readouterr() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_test.py index b17bfc4927ac..34a5d7521c6c 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_test.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_test.py @@ -29,7 +29,8 @@ def test_sample_batch_annotate_files(capsys): file_path = "resources/kafka.pdf" - vision_batch_annotate_files.sample_batch_annotate_files(file_path=file_path) + vision_batch_annotate_files.sample_batch_annotate_files( + file_path=file_path) out, _ = capsys.readouterr() From bed231dbc326883441063a4daf4ae5827ec65459 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Wed, 22 Jan 2020 20:35:30 +0000 Subject: [PATCH 08/12] Address review cleanup Remove autogenerated warnings Remove coding: utf-8 line Remove argument encoding checks Update copyright to 2020 --- .../vision_async_batch_annotate_images.py | 23 +------------------ .../detect/vision_batch_annotate_files.py | 20 +--------------- .../detect/vision_batch_annotate_files_gcs.py | 20 +--------------- .../vision_batch_annotate_files_test.py | 1 - 4 files changed, 3 insertions(+), 61 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index d6cc1d4b0575..1efb234ce052 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,25 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# DO NOT EDIT! This is a generated sample -# ("LongRunningPromise", "vision_async_batch_annotate_images") - -# To install the latest published package dependency, execute the following: -# pip install google-cloud-vision - -# sample-metadata -# title: Async Batch Image Annotation -# description: Perform async batch image annotation -# usage: python3 samples/v1/vision_async_batch_annotate_images.py \ -# [--input_image_uri \ -# "gs://cloud-samples-data/vision/label/wakeupcat.jpg"] \ -# [--output_uri "gs://your-bucket/prefix/"] - # [START vision_async_batch_annotate_images] from google.cloud import vision_v1 from google.cloud.vision_v1 import enums -import six def sample_async_batch_annotate_images(input_image_uri, output_uri): @@ -44,10 +27,6 @@ def sample_async_batch_annotate_images(input_image_uri, output_uri): # input_image_uri = 'gs://cloud-samples-data/vision/label/wakeupcat.jpg' # output_uri = 'gs://your-bucket/prefix/' - if isinstance(input_image_uri, six.binary_type): - input_image_uri = input_image_uri.decode("utf-8") - if isinstance(output_uri, six.binary_type): - output_uri = output_uri.decode("utf-8") source = {"image_uri": input_image_uri} image = {"source": source} type_ = enums.Feature.Type.LABEL_DETECTION diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index 06880e7e64bc..08a878905d82 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,24 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# DO NOT EDIT! This is a generated sample -# ("Request", "vision_batch_annotate_files") - -# To install the latest published package dependency, execute the following: -# pip install google-cloud-vision - -# sample-metadata -# title: -# description: Perform batch file annotation -# usage: python3 samples/v1/vision_batch_annotate_files.py \ -# [--file_path "resources/kafka.pdf"] - # [START vision_batch_annotate_files] from google.cloud import vision_v1 from google.cloud.vision_v1 import enums import io -import six def sample_batch_annotate_files(file_path): @@ -47,9 +32,6 @@ def sample_batch_annotate_files(file_path): # file_path = 'resources/kafka.pdf' - if isinstance(file_path, six.binary_type): - file_path = file_path.decode("utf-8") - # Supported mime_type: application/pdf, image/tiff, image/gif mime_type = "application/pdf" with io.open(file_path, "rb") as f: diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 0dbd65ea4bdc..9ce5c2cec78d 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,24 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# DO NOT EDIT! This is a generated sample -# ("Request", "vision_batch_annotate_files_gcs") - -# To install the latest published package dependency, execute the following: -# pip install google-cloud-vision - -# sample-metadata -# title: -# description: Perform batch file annotation -# usage: python3 samples/v1/vision_batch_annotate_files_gcs.py \ -# [--storage_uri \ -# "gs://cloud-samples-data/vision/document_understanding/kafka.pdf"] - # [START vision_batch_annotate_files_gcs] from google.cloud import vision_v1 from google.cloud.vision_v1 import enums -import six def sample_batch_annotate_files(storage_uri): @@ -51,8 +35,6 @@ def sample_batch_annotate_files(storage_uri): # 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' # ) - if isinstance(storage_uri, six.binary_type): - storage_uri = storage_uri.decode("utf-8") gcs_source = {"uri": storage_uri} input_config = {"gcs_source": gcs_source, "mime_type": mime_type} type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_test.py index 34a5d7521c6c..105b0d247130 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_test.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_test.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2020 Google # # Licensed under the Apache License, Version 2.0 (the "License"); From c28e115acd12c5e3106632a03bf358e504226fbd Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Wed, 22 Jan 2020 22:10:13 +0000 Subject: [PATCH 09/12] Remove CLI --- .../vision_async_batch_annotate_images.py | 32 ++++--------------- .../detect/vision_batch_annotate_files.py | 24 +++----------- .../detect/vision_batch_annotate_files_gcs.py | 32 ++++--------------- 3 files changed, 17 insertions(+), 71 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index 1efb234ce052..6b3a378939f4 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -18,14 +18,17 @@ from google.cloud.vision_v1 import enums -def sample_async_batch_annotate_images(input_image_uri, output_uri): +def sample_async_batch_annotate_images( + input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg", + output_uri="gs://your-bucket/prefix/", +): """Perform async batch image annotation""" # [START vision_async_batch_annotate_images_core] client = vision_v1.ImageAnnotatorClient() - # input_image_uri = 'gs://cloud-samples-data/vision/label/wakeupcat.jpg' - # output_uri = 'gs://your-bucket/prefix/' + # input_image_uri = "gs://cloud-samples-data/vision/label/wakeupcat.jpg" + # output_uri = "gs://your-bucket/prefix/" source = {"image_uri": input_image_uri} image = {"source": source} @@ -40,8 +43,7 @@ def sample_async_batch_annotate_images(input_image_uri, output_uri): # The max number of responses to output in each JSON file batch_size = 2 - output_config = { - "gcs_destination": gcs_destination, "batch_size": batch_size} + output_config = {"gcs_destination": gcs_destination, "batch_size": batch_size} operation = client.async_batch_annotate_images(requests, output_config) @@ -56,23 +58,3 @@ def sample_async_batch_annotate_images(input_image_uri, output_uri): # [END vision_async_batch_annotate_images] - - -def main(): - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument( - "--input_image_uri", - type=str, - default="gs://cloud-samples-data/vision/label/wakeupcat.jpg", - ) - parser.add_argument( - "--output_uri", type=str, default="gs://your-bucket/prefix/") - args = parser.parse_args() - - sample_async_batch_annotate_images(args.input_image_uri, args.output_uri) - - -if __name__ == "__main__": - main() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index 08a878905d82..0cdf55ff9a66 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -19,7 +19,7 @@ import io -def sample_batch_annotate_files(file_path): +def sample_batch_annotate_files(file_path="resources/kafka.pdf"): """ Perform batch file annotation @@ -30,7 +30,7 @@ def sample_batch_annotate_files(file_path): client = vision_v1.ImageAnnotatorClient() - # file_path = 'resources/kafka.pdf' + # file_path = "resources/kafka.pdf" # Supported mime_type: application/pdf, image/tiff, image/gif mime_type = "application/pdf" @@ -56,16 +56,14 @@ def sample_batch_annotate_files(file_path): response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: - print( - u"Full text: {}".format(image_response.full_text_annotation.text)) + print(u"Full text: {}".format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: print(u"\nBlock confidence: {}".format(block.confidence)) for par in block.paragraphs: print(u"\tParagraph confidence: {}".format(par.confidence)) for word in par.words: - print( - u"\t\tWord confidence: {}".format(word.confidence)) + print(u"\t\tWord confidence: {}".format(word.confidence)) for symbol in word.symbols: print( u"\t\t\tSymbol: {}, (confidence: {})".format( @@ -77,17 +75,3 @@ def sample_batch_annotate_files(file_path): # [END vision_batch_annotate_files] - - -def main(): - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("--file_path", type=str, default="resources/kafka.pdf") - args = parser.parse_args() - - sample_batch_annotate_files(args.file_path) - - -if __name__ == "__main__": - main() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 9ce5c2cec78d..ecfcf54cf9e9 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -18,7 +18,9 @@ from google.cloud.vision_v1 import enums -def sample_batch_annotate_files(storage_uri): +def sample_batch_annotate_files( + storage_uri="gs://cloud-samples-data/vision/document_understanding/kafka.pdf", +): """ Perform batch file annotation @@ -32,7 +34,7 @@ def sample_batch_annotate_files(storage_uri): client = vision_v1.ImageAnnotatorClient() # storage_uri = ( - # 'gs://cloud-samples-data/vision/document_understanding/kafka.pdf' + # "gs://cloud-samples-data/vision/document_understanding/kafka.pdf" # ) gcs_source = {"uri": storage_uri} @@ -57,16 +59,14 @@ def sample_batch_annotate_files(storage_uri): response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: - print( - u"Full text: {}".format(image_response.full_text_annotation.text)) + print(u"Full text: {}".format(image_response.full_text_annotation.text)) for page in image_response.full_text_annotation.pages: for block in page.blocks: print(u"\nBlock confidence: {}".format(block.confidence)) for par in block.paragraphs: print(u"\tParagraph confidence: {}".format(par.confidence)) for word in par.words: - print( - u"\t\tWord confidence: {}".format(word.confidence)) + print(u"\t\tWord confidence: {}".format(word.confidence)) for symbol in word.symbols: print( u"\t\t\tSymbol: {}, (confidence: {})".format( @@ -78,23 +78,3 @@ def sample_batch_annotate_files(storage_uri): # [END vision_batch_annotate_files_gcs] - - -def main(): - import argparse - - storage_uri_default = ( - "gs://cloud-samples-data/vision/document_understanding/kafka.pdf" - ) - - parser = argparse.ArgumentParser() - parser.add_argument( - "--storage_uri", type=str, default=storage_uri_default, - ) - args = parser.parse_args() - - sample_batch_annotate_files(args.storage_uri) - - -if __name__ == "__main__": - main() From f2abf4e88eb2abb87959880ff5bb55d069c90094 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Sat, 25 Jan 2020 00:38:25 +0000 Subject: [PATCH 10/12] Add test fixtures Remove unnecessary statics, variables, and imports Blacken with l=88 --- .../vision_async_batch_annotate_images.py | 12 ++++----- ...vision_async_batch_annotate_images_test.py | 27 ++++++++++++------- .../detect/vision_batch_annotate_files.py | 16 +++-------- .../detect/vision_batch_annotate_files_gcs.py | 16 +++-------- .../vision_batch_annotate_files_gcs_test.py | 9 +------ .../vision_batch_annotate_files_test.py | 11 ++------ 6 files changed, 32 insertions(+), 59 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index 6b3a378939f4..66f7e1e85aad 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -32,13 +32,11 @@ def sample_async_batch_annotate_images( source = {"image_uri": input_image_uri} image = {"source": source} - type_ = enums.Feature.Type.LABEL_DETECTION - features_element = {"type": type_} - type_2 = enums.Feature.Type.IMAGE_PROPERTIES - features_element_2 = {"type": type_2} - features = [features_element, features_element_2] - requests_element = {"image": image, "features": features} - requests = [requests_element] + features = [ + {"type": enums.Feature.Type.LABEL_DETECTION}, + {"type": enums.Feature.Type.IMAGE_PROPERTIES}, + ] + requests = [{"image": image, "features": features}] gcs_destination = {"uri": output_uri} # The max number of responses to output in each JSON file diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py index bdfbb94dee07..003a6fa894cb 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images_test.py @@ -16,6 +16,7 @@ import uuid from google.cloud import storage +import pytest import vision_async_batch_annotate_images @@ -27,15 +28,28 @@ GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) -def test_sample_asyn_batch_annotate_images(capsys): - storage_client = storage.Client() +@pytest.fixture() +def storage_client(): + yield storage.Client() + + +@pytest.fixture() +def bucket(storage_client): bucket = storage_client.get_bucket(BUCKET) - if len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0: + + try: for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): blob.delete() + except Exception: + pass - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + yield bucket + + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() + +def test_sample_asyn_batch_annotate_images(storage_client, bucket, capsys): input_image_uri = os.path.join(GCS_ROOT, "label/wakeupcat.jpg") vision_async_batch_annotate_images.sample_async_batch_annotate_images( @@ -46,8 +60,3 @@ def test_sample_asyn_batch_annotate_images(capsys): assert "Output written to GCS" in out assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0 - - for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): - blob.delete() - - assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index 0cdf55ff9a66..55f4e1319b1a 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -37,22 +37,12 @@ def sample_batch_annotate_files(file_path="resources/kafka.pdf"): with io.open(file_path, "rb") as f: content = f.read() input_config = {"mime_type": mime_type, "content": content} - type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION - features_element = {"type": type_} - features = [features_element] + features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}] # The service can process up to 5 pages per document file. Here we specify # the first, second, and last page of the document to be processed. - pages_element = 1 - pages_element_2 = 2 - pages_element_3 = -1 - pages = [pages_element, pages_element_2, pages_element_3] - requests_element = { - "input_config": input_config, - "features": features, - "pages": pages, - } - requests = [requests_element] + pages = [1, 2, -1] + requests = [{"input_config": input_config, "features": features, "pages": pages}] response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index ecfcf54cf9e9..03477c8c1101 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -39,23 +39,13 @@ def sample_batch_annotate_files( gcs_source = {"uri": storage_uri} input_config = {"gcs_source": gcs_source, "mime_type": mime_type} - type_ = enums.Feature.Type.DOCUMENT_TEXT_DETECTION - features_element = {"type": type_} - features = [features_element] + features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}] # The service can process up to 5 pages per document file. # Here we specify the first, second, and last page of the document to be # processed. - pages_element = 1 - pages_element_2 = 2 - pages_element_3 = -1 - pages = [pages_element, pages_element_2, pages_element_3] - requests_element = { - "input_config": input_config, - "features": features, - "pages": pages, - } - requests = [requests_element] + pages = [1, 2, -1] + requests = [{"input_config": input_config, "features": features, "pages": pages}] response = client.batch_annotate_files(requests) for image_response in response.responses[0].responses: diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py index 30c6e1c0ce7e..6e86176232e1 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs_test.py @@ -13,23 +13,16 @@ # limitations under the License. import os -import uuid import vision_batch_annotate_files_gcs -RESOURCES = os.path.join(os.path.dirname(__file__), "resources") GCS_ROOT = "gs://cloud-samples-data/vision/" -BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] -OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) -GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) - def test_sample_batch_annotate_files_gcs(capsys): storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf") - vision_batch_annotate_files_gcs.sample_batch_annotate_files( - storage_uri=storage_uri) + vision_batch_annotate_files_gcs.sample_batch_annotate_files(storage_uri=storage_uri) out, _ = capsys.readouterr() diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_test.py b/vision/cloud-client/detect/vision_batch_annotate_files_test.py index 105b0d247130..db07c42ffa26 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_test.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_test.py @@ -13,23 +13,16 @@ # limitations under the License. import os -import uuid import vision_batch_annotate_files RESOURCES = os.path.join(os.path.dirname(__file__), "resources") -GCS_ROOT = "gs://cloud-samples-data/vision/" - -BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] -OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) -GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) def test_sample_batch_annotate_files(capsys): - file_path = "resources/kafka.pdf" + file_path = os.path.join(RESOURCES, "kafka.pdf") - vision_batch_annotate_files.sample_batch_annotate_files( - file_path=file_path) + vision_batch_annotate_files.sample_batch_annotate_files(file_path=file_path) out, _ = capsys.readouterr() From 7921c89a18e537b1495cf3448112e88880780248 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Mon, 27 Jan 2020 18:41:53 +0000 Subject: [PATCH 11/12] Remove unused region tag and comments Verified that there are no published links pointing to these region tags: vision_async_batch_annotate_images_core vision_batch_annotate_files_core vision_batch_annotate_files_gcs_core --- .../detect/vision_async_batch_annotate_images.py | 6 ------ vision/cloud-client/detect/vision_batch_annotate_files.py | 6 ------ .../cloud-client/detect/vision_batch_annotate_files_gcs.py | 7 ------- 3 files changed, 19 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index 66f7e1e85aad..f1eae7fb4685 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -23,13 +23,9 @@ def sample_async_batch_annotate_images( output_uri="gs://your-bucket/prefix/", ): """Perform async batch image annotation""" - # [START vision_async_batch_annotate_images_core] client = vision_v1.ImageAnnotatorClient() - # input_image_uri = "gs://cloud-samples-data/vision/label/wakeupcat.jpg" - # output_uri = "gs://your-bucket/prefix/" - source = {"image_uri": input_image_uri} image = {"source": source} features = [ @@ -52,7 +48,5 @@ def sample_async_batch_annotate_images( gcs_output_uri = response.output_config.gcs_destination.uri print("Output written to GCS with prefix: {}".format(gcs_output_uri)) - # [END vision_async_batch_annotate_images_core] - # [END vision_async_batch_annotate_images] diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index 55f4e1319b1a..0cdacec606e2 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -26,12 +26,8 @@ def sample_batch_annotate_files(file_path="resources/kafka.pdf"): Args: file_path Path to local pdf file, e.g. /path/document.pdf """ - # [START vision_batch_annotate_files_core] - client = vision_v1.ImageAnnotatorClient() - # file_path = "resources/kafka.pdf" - # Supported mime_type: application/pdf, image/tiff, image/gif mime_type = "application/pdf" with io.open(file_path, "rb") as f: @@ -61,7 +57,5 @@ def sample_batch_annotate_files(file_path="resources/kafka.pdf"): ) ) - # [END vision_batch_annotate_files_core] - # [END vision_batch_annotate_files] diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index 03477c8c1101..df23dd0d21b8 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -28,15 +28,10 @@ def sample_batch_annotate_files( storage_uri Cloud Storage URI to source image in the format gs://[bucket]/ [file] """ - # [START vision_batch_annotate_files_gcs_core] mime_type = "application/pdf" client = vision_v1.ImageAnnotatorClient() - # storage_uri = ( - # "gs://cloud-samples-data/vision/document_understanding/kafka.pdf" - # ) - gcs_source = {"uri": storage_uri} input_config = {"gcs_source": gcs_source, "mime_type": mime_type} features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}] @@ -64,7 +59,5 @@ def sample_batch_annotate_files( ) ) - # [END vision_batch_annotate_files_gcs_core] - # [END vision_batch_annotate_files_gcs] From 4b4838c6a1277566c75697d8db94d5afcaf200d2 Mon Sep 17 00:00:00 2001 From: Michelle Casbon Date: Tue, 28 Jan 2020 18:15:23 +0000 Subject: [PATCH 12/12] Shorten docstring. Replace concrete file path with "path/to/your/document.pdf". --- .../detect/vision_async_batch_annotate_images.py | 3 +-- .../cloud-client/detect/vision_batch_annotate_files.py | 9 ++------- .../detect/vision_batch_annotate_files_gcs.py | 8 +------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/vision/cloud-client/detect/vision_async_batch_annotate_images.py b/vision/cloud-client/detect/vision_async_batch_annotate_images.py index f1eae7fb4685..bdbf41e10f79 100644 --- a/vision/cloud-client/detect/vision_async_batch_annotate_images.py +++ b/vision/cloud-client/detect/vision_async_batch_annotate_images.py @@ -22,8 +22,7 @@ def sample_async_batch_annotate_images( input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg", output_uri="gs://your-bucket/prefix/", ): - """Perform async batch image annotation""" - + """Perform async batch image annotation.""" client = vision_v1.ImageAnnotatorClient() source = {"image_uri": input_image_uri} diff --git a/vision/cloud-client/detect/vision_batch_annotate_files.py b/vision/cloud-client/detect/vision_batch_annotate_files.py index 0cdacec606e2..a612ed9d4bfe 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files.py @@ -19,13 +19,8 @@ import io -def sample_batch_annotate_files(file_path="resources/kafka.pdf"): - """ - Perform batch file annotation - - Args: - file_path Path to local pdf file, e.g. /path/document.pdf - """ +def sample_batch_annotate_files(file_path="path/to/your/document.pdf"): + """Perform batch file annotation.""" client = vision_v1.ImageAnnotatorClient() # Supported mime_type: application/pdf, image/tiff, image/gif diff --git a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py index df23dd0d21b8..76c80a381b04 100644 --- a/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py +++ b/vision/cloud-client/detect/vision_batch_annotate_files_gcs.py @@ -21,13 +21,7 @@ def sample_batch_annotate_files( storage_uri="gs://cloud-samples-data/vision/document_understanding/kafka.pdf", ): - """ - Perform batch file annotation - - Args: - storage_uri Cloud Storage URI to source image in the format - gs://[bucket]/ [file] - """ + """Perform batch file annotation.""" mime_type = "application/pdf" client = vision_v1.ImageAnnotatorClient()