- On January 1, 2020 this library will no longer support Python 2 on the latest released version. - Previously released library versions will continue to be available. For more information please + As of January 1, 2020 this library no longer supports Python 2 on the latest released version. + Library versions released prior to that date will continue to be available. For more information please visit Python 2 support on Google Cloud.
{% block body %} {% endblock %} diff --git a/packages/google-cloud-datalabeling/docs/conf.py b/packages/google-cloud-datalabeling/docs/conf.py index ccd6acf345e0..7f935934fc5a 100644 --- a/packages/google-cloud-datalabeling/docs/conf.py +++ b/packages/google-cloud-datalabeling/docs/conf.py @@ -20,6 +20,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) +# For plugins that can not read conf.py. +# See also: https://github.com/docascode/sphinx-docfx-yaml/issues/85 +sys.path.insert(0, os.path.abspath(".")) + __version__ = "" # -- General configuration ------------------------------------------------ @@ -90,7 +94,12 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build"] +exclude_patterns = [ + "_build", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/snippets/README.rst", +] # The reST default role (used for this markup: `text`) to use for all # documents. diff --git a/packages/google-cloud-datalabeling/noxfile.py b/packages/google-cloud-datalabeling/noxfile.py index cd19801576c7..5849759f9ab3 100644 --- a/packages/google-cloud-datalabeling/noxfile.py +++ b/packages/google-cloud-datalabeling/noxfile.py @@ -100,6 +100,10 @@ def system(session): """Run the system test suite.""" system_test_path = os.path.join("tests", "system.py") system_test_folder_path = os.path.join("tests", "system") + + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": + session.skip("RUN_SYSTEM_TESTS is set to false, skipping") # Sanity check: Only run tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable") @@ -145,14 +149,47 @@ def docs(session): """Build the docs for this library.""" session.install("-e", ".") - session.install("sphinx<3.0.0", "alabaster", "recommonmark") + session.install("sphinx", "alabaster", "recommonmark") + + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + # "-W", # warnings as errors + "-T", # show full traceback on exception + "-N", # no colors + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), + ) + + +@nox.session(python=DEFAULT_PYTHON_VERSION) +def docfx(session): + """Build the docfx yaml files for this library.""" + + session.install("-e", ".") + session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( "sphinx-build", - "-W", # warnings as errors "-T", # show full traceback on exception "-N", # no colors + "-D", + ( + "extensions=sphinx.ext.autodoc," + "sphinx.ext.autosummary," + "docfx_yaml.extension," + "sphinx.ext.intersphinx," + "sphinx.ext.coverage," + "sphinx.ext.napoleon," + "sphinx.ext.todo," + "sphinx.ext.viewcode," + "recommonmark" + ), "-b", "html", "-d", diff --git a/packages/google-cloud-datalabeling/samples/AUTHORING_GUIDE.md b/packages/google-cloud-datalabeling/samples/AUTHORING_GUIDE.md new file mode 100644 index 000000000000..55c97b32f4c1 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/packages/google-cloud-datalabeling/samples/CONTRIBUTING.md b/packages/google-cloud-datalabeling/samples/CONTRIBUTING.md new file mode 100644 index 000000000000..34c882b6f1a3 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/packages/google-cloud-datalabeling/samples/snippets/README.rst b/packages/google-cloud-datalabeling/samples/snippets/README.rst new file mode 100644 index 000000000000..bf5949b8cb79 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/README.rst @@ -0,0 +1,78 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Data Labeling Service Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=datalabeling/README.rst + + +This directory contains samples for Google Cloud Data Labeling Service. `Google Cloud Data Labeling Service`_ allows developers to request having human labelers label a collection of data that you plan to use to train a custom machine learning model. + + + + +.. _Google Cloud Data Labeling Service: https://cloud.google.com/data-labeling/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/packages/google-cloud-datalabeling/samples/snippets/README.rst.in b/packages/google-cloud-datalabeling/samples/snippets/README.rst.in new file mode 100644 index 000000000000..c87a1ff89b49 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/README.rst.in @@ -0,0 +1,18 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Data Labeling Service + short_name: Cloud Data Labeling + url: https://cloud.google.com/data-labeling/docs/ + description: > + `Google Cloud Data Labeling Service`_ allows developers to request having + human labelers label a collection of data that you plan to use to train a + custom machine learning model. + +setup: +- auth +- install_deps + +cloud_client_library: true + +folder: datalabeling \ No newline at end of file diff --git a/packages/google-cloud-datalabeling/samples/snippets/create_annotation_spec_set.py b/packages/google-cloud-datalabeling/samples/snippets/create_annotation_spec_set.py new file mode 100644 index 000000000000..4a8add772e6a --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/create_annotation_spec_set.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_create_annotation_spec_set_beta] +def create_annotation_spec_set(project_id): + """Creates a data labeling annotation spec set for the given + Google Cloud project. + """ + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_create_annotation_spec_set_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_create_annotation_spec_set_beta] + + project_path = client.project_path(project_id) + + annotation_spec_1 = datalabeling.types.AnnotationSpec( + display_name='label_1', + description='label_description_1' + ) + + annotation_spec_2 = datalabeling.types.AnnotationSpec( + display_name='label_2', + description='label_description_2' + ) + + annotation_spec_set = datalabeling.types.AnnotationSpecSet( + display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME', + description='YOUR_DESCRIPTION', + annotation_specs=[annotation_spec_1, annotation_spec_2] + ) + + response = client.create_annotation_spec_set( + project_path, annotation_spec_set) + + # The format of the resource name: + # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id} + print('The annotation_spec_set resource name: {}'.format(response.name)) + print('Display name: {}'.format(response.display_name)) + print('Description: {}'.format(response.description)) + print('Annotation specs:') + for annotation_spec in response.annotation_specs: + print('\tDisplay name: {}'.format(annotation_spec.display_name)) + print('\tDescription: {}\n'.format(annotation_spec.description)) + + return response +# [END datalabeling_create_annotation_spec_set_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + args = parser.parse_args() + + create_annotation_spec_set(args.project_id) diff --git a/packages/google-cloud-datalabeling/samples/snippets/create_annotation_spec_set_test.py b/packages/google-cloud-datalabeling/samples/snippets/create_annotation_spec_set_test.py new file mode 100644 index 000000000000..6ae5ee5d1b3c --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/create_annotation_spec_set_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import create_annotation_spec_set +import testing_lib + + +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT') + + +@pytest.fixture(scope='module') +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.delete_annotation_spec_set(resource_name) + + +def test_create_annotation_spec_set(cleaner, capsys): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + return create_annotation_spec_set.create_annotation_spec_set(PROJECT_ID) + + response = run_sample() + + # For cleanup + cleaner.append(response.name) + + out, _ = capsys.readouterr() + assert 'The annotation_spec_set resource name:' in out diff --git a/packages/google-cloud-datalabeling/samples/snippets/create_instruction.py b/packages/google-cloud-datalabeling/samples/snippets/create_instruction.py new file mode 100644 index 000000000000..5495acbaf18d --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/create_instruction.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_create_instruction_beta] +def create_instruction(project_id, data_type, instruction_gcs_uri): + """ Creates a data labeling PDF instruction for the given Google Cloud + project. The PDF file should be uploaded to the project in + Google Cloud Storage. + """ + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_create_instruction_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_create_instruction_beta] + + project_path = client.project_path(project_id) + + pdf_instruction = datalabeling.types.PdfInstruction( + gcs_file_uri=instruction_gcs_uri) + + instruction = datalabeling.types.Instruction( + display_name='YOUR_INSTRUCTION_DISPLAY_NAME', + description='YOUR_DESCRIPTION', + data_type=data_type, + pdf_instruction=pdf_instruction + ) + + operation = client.create_instruction(project_path, instruction) + + result = operation.result() + + # The format of the resource name: + # project_id/{project_id}/instruction/{instruction_id} + print('The instruction resource name: {}'.format(result.name)) + print('Display name: {}'.format(result.display_name)) + print('Description: {}'.format(result.description)) + print('Create time:') + print('\tseconds: {}'.format(result.create_time.seconds)) + print('\tnanos: {}'.format(result.create_time.nanos)) + print('Data type: {}'.format( + datalabeling.enums.DataType(result.data_type).name)) + print('Pdf instruction:') + print('\tGcs file uri: {}\n'.format( + result.pdf_instruction.gcs_file_uri)) + + return result +# [END datalabeling_create_instruction_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + parser.add_argument( + '--data-type', + help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.', + required=True + ) + + parser.add_argument( + '--instruction-gcs-uri', + help='The URI of Google Cloud Storage of the instruction. Required.', + required=True + ) + + args = parser.parse_args() + + create_instruction( + args.project_id, + args.data_type, + args.instruction_gcs_uri + ) diff --git a/packages/google-cloud-datalabeling/samples/snippets/create_instruction_test.py b/packages/google-cloud-datalabeling/samples/snippets/create_instruction_test.py new file mode 100644 index 000000000000..b164da0c13ba --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/create_instruction_test.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import create_instruction +import testing_lib + + +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT') +INSTRUCTION_GCS_URI = ('gs://cloud-samples-data/datalabeling' + '/instruction/test.pdf') + + +@pytest.fixture(scope='module') +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.delete_instruction(resource_name) + + +def test_create_instruction(cleaner, capsys): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + return create_instruction.create_instruction( + PROJECT_ID, 'IMAGE', INSTRUCTION_GCS_URI) + + instruction = run_sample() + cleaner.append(instruction.name) + + out, _ = capsys.readouterr() + assert 'The instruction resource name: ' in out diff --git a/packages/google-cloud-datalabeling/samples/snippets/export_data.py b/packages/google-cloud-datalabeling/samples/snippets/export_data.py new file mode 100644 index 000000000000..f70dc9c588d7 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/export_data.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_export_data_beta] +def export_data(dataset_resource_name, annotated_dataset_resource_name, + export_gcs_uri): + """Exports a dataset from the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_export_data_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_export_data_beta] + + gcs_destination = datalabeling.types.GcsDestination( + output_uri=export_gcs_uri, mime_type='text/csv') + + output_config = datalabeling.types.OutputConfig( + gcs_destination=gcs_destination) + + response = client.export_data( + dataset_resource_name, + annotated_dataset_resource_name, + output_config + ) + + print('Dataset ID: {}\n'.format(response.result().dataset)) + print('Output config:') + print('\tGcs destination:') + print('\t\tOutput URI: {}\n'.format( + response.result().output_config.gcs_destination.output_uri)) +# [END datalabeling_export_data_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotated-dataset-resource-name', + help='Annotated Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--export-gcs-uri', + help='The export GCS URI. Required.', + required=True + ) + + args = parser.parse_args() + + export_data( + args.dataset_resource_name, + args.annotated_dataset_resource_name, + args.export_gcs_uri + ) diff --git a/packages/google-cloud-datalabeling/samples/snippets/import_data.py b/packages/google-cloud-datalabeling/samples/snippets/import_data.py new file mode 100644 index 000000000000..01c3201845f0 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/import_data.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_import_data_beta] +def import_data(dataset_resource_name, data_type, input_gcs_uri): + """Imports data to the given Google Cloud project and dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_import_data_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_import_data_beta] + + gcs_source = datalabeling.types.GcsSource( + input_uri=input_gcs_uri, mime_type='text/csv') + + csv_input_config = datalabeling.types.InputConfig( + data_type=data_type, gcs_source=gcs_source) + + response = client.import_data(dataset_resource_name, csv_input_config) + + result = response.result() + + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print('Dataset resource name: {}\n'.format(result.dataset)) + + return result +# [END datalabeling_import_data_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--data-type', + help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.', + required=True + ) + + parser.add_argument( + '--input-gcs-uri', + help='The GCS URI of the input dataset. Required.', + required=True + ) + + args = parser.parse_args() + + import_data(args.dataset_resource_name, args.data_type, args.input_gcs_uri) diff --git a/packages/google-cloud-datalabeling/samples/snippets/import_data_test.py b/packages/google-cloud-datalabeling/samples/snippets/import_data_test.py new file mode 100644 index 000000000000..246cfba4b573 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/import_data_test.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import import_data +import testing_lib + + +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/image/image_dataset.csv' + + +@pytest.fixture(scope='module') +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +def test_import_data(capsys, dataset): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI) + + run_sample() + out, _ = capsys.readouterr() + assert 'Dataset resource name: ' in out diff --git a/packages/google-cloud-datalabeling/samples/snippets/label_image.py b/packages/google-cloud-datalabeling/samples/snippets/label_image.py new file mode 100644 index 000000000000..19a10ebc83c5 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/label_image.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_label_image_beta] +def label_image(dataset_resource_name, instruction_resource_name, + annotation_spec_set_resource_name): + """Labels an image dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_label_image_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_label_image_beta] + + basic_config = datalabeling.types.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME', + label_group='YOUR_LABEL_GROUP', + replica_count=1 + ) + + feature = datalabeling.enums.LabelImageRequest.Feature.CLASSIFICATION + + config = datalabeling.types.ImageClassificationConfig( + annotation_spec_set=annotation_spec_set_resource_name, + allow_multi_label=False, + answer_aggregation_type=datalabeling.enums.StringAggregationType + .MAJORITY_VOTE + ) + + response = client.label_image( + dataset_resource_name, + basic_config, + feature, + image_classification_config=config + ) + + print('Label_image operation name: {}'.format(response.operation.name)) + return response +# [END datalabeling_label_image_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--instruction-resource-name', + help='Instruction resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotation-spec-set-resource-name', + help='Annotation spec set resource name. Required.', + required=True + ) + + args = parser.parse_args() + + label_image( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name + ) diff --git a/packages/google-cloud-datalabeling/samples/snippets/label_image_test.py b/packages/google-cloud-datalabeling/samples/snippets/label_image_test.py new file mode 100644 index 000000000000..a6feee42c1d8 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/label_image_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import label_image +import testing_lib + + +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/image/image_dataset.csv' +INSTRUCTION_GCS_URI = ('gs://cloud-samples-data/datalabeling' + '/instruction/test.pdf') + + +@pytest.fixture(scope='module') +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + testing_lib.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI) + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope='module') +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = testing_lib.create_annotation_spec_set(PROJECT_ID) + + yield response + + testing_lib.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope='module') +def instruction(): + # create a temporary instruction + instruction = testing_lib.create_instruction( + PROJECT_ID, 'IMAGE', INSTRUCTION_GCS_URI) + + yield instruction + + # tear down + testing_lib.delete_instruction(instruction.name) + + +@pytest.fixture(scope='module') +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.cancel_operation(resource_name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +def test_label_image( + capsys, annotation_spec_set, instruction, dataset, cleaner): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + # Start labeling. + return label_image.label_image( + dataset.name, instruction.name, annotation_spec_set.name) + + response = run_sample() + cleaner.append(response.operation.name) + + out, _ = capsys.readouterr() + assert 'Label_image operation name: ' in out diff --git a/packages/google-cloud-datalabeling/samples/snippets/label_text.py b/packages/google-cloud-datalabeling/samples/snippets/label_text.py new file mode 100644 index 000000000000..6b8659035975 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/label_text.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_label_text_beta] +def label_text(dataset_resource_name, instruction_resource_name, + annotation_spec_set_resource_name): + """Labels a text dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_label_text_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_label_text_beta] + + basic_config = datalabeling.types.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME', + label_group='YOUR_LABEL_GROUP', + replica_count=1 + ) + + feature = (datalabeling.enums.LabelTextRequest. + Feature.TEXT_ENTITY_EXTRACTION) + + config = datalabeling.types.TextEntityExtractionConfig( + annotation_spec_set=annotation_spec_set_resource_name) + + response = client.label_text( + parent=dataset_resource_name, + basic_config=basic_config, + feature=feature, + text_entity_extraction_config=config + ) + + print('Label_text operation name: {}'.format(response.operation.name)) + return response +# [END datalabeling_label_text_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--instruction-resource-name', + help='Instruction resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotation-spec-set-resource-name', + help='Annotation spec set resource name. Required.', + required=True + ) + + args = parser.parse_args() + + label_text( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name + ) diff --git a/packages/google-cloud-datalabeling/samples/snippets/label_text_test.py b/packages/google-cloud-datalabeling/samples/snippets/label_text_test.py new file mode 100644 index 000000000000..c90024b97336 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/label_text_test.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import label_text +import testing_lib + +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/text/input.csv' +INSTRUCTION_GCS_URI = ('gs://cloud-samples-data/datalabeling' + '/instruction/test.pdf') + + +@pytest.fixture(scope='module') +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + testing_lib.import_data(dataset.name, 'TEXT', INPUT_GCS_URI) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope='module') +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = testing_lib.create_annotation_spec_set(PROJECT_ID) + + yield response + + testing_lib.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope='module') +def instruction(): + # create a temporary instruction + instruction = testing_lib.create_instruction( + PROJECT_ID, 'IMAGE', INSTRUCTION_GCS_URI) + + yield instruction + + # tear down + testing_lib.delete_instruction(instruction.name) + + +@pytest.fixture(scope='module') +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.cancel_operation(resource_name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.skip("Constantly failing") +def test_label_text(capsys, annotation_spec_set, instruction, dataset, cleaner): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + # Start labeling. + return label_text.label_text( + dataset.name, instruction.name, annotation_spec_set.name) + + response = run_sample() + cleaner.append(response.operation.name) + + out, _ = capsys.readouterr() + assert 'Label_text operation name: ' in out + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True diff --git a/packages/google-cloud-datalabeling/samples/snippets/label_video.py b/packages/google-cloud-datalabeling/samples/snippets/label_video.py new file mode 100644 index 000000000000..a3425b4745c0 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/label_video.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_label_video_beta] +def label_video(dataset_resource_name, instruction_resource_name, + annotation_spec_set_resource_name): + """Labels a video dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_label_video_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_label_video_beta] + + basic_config = datalabeling.types.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME', + label_group='YOUR_LABEL_GROUP', + replica_count=1 + ) + + feature = datalabeling.enums.LabelVideoRequest.Feature.OBJECT_TRACKING + + config = datalabeling.types.ObjectTrackingConfig( + annotation_spec_set=annotation_spec_set_resource_name + ) + + response = client.label_video( + dataset_resource_name, + basic_config, + feature, + object_tracking_config=config + ) + + print('Label_video operation name: {}'.format(response.operation.name)) + return response +# [END datalabeling_label_video_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--instruction-resource-name', + help='Instruction resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotation-spec-set-resource-name', + help='Annotation spec set resource name. Required.', + required=True + ) + + args = parser.parse_args() + + label_video( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name + ) diff --git a/packages/google-cloud-datalabeling/samples/snippets/label_video_test.py b/packages/google-cloud-datalabeling/samples/snippets/label_video_test.py new file mode 100644 index 000000000000..05d3c5a3b124 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/label_video_test.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import label_video +import testing_lib + +PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/videos/video_dataset.csv' +INSTRUCTION_GCS_URI = ('gs://cloud-samples-data/datalabeling' + '/instruction/test.pdf') + + +@pytest.fixture(scope='module') +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + testing_lib.import_data(dataset.name, 'VIDEO', INPUT_GCS_URI) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope='module') +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = testing_lib.create_annotation_spec_set(PROJECT_ID) + + yield response + + testing_lib.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope='module') +def instruction(): + # create a temporary instruction + instruction = testing_lib.create_instruction( + PROJECT_ID, 'VIDEO', INSTRUCTION_GCS_URI) + + yield instruction + + # tear down + testing_lib.delete_instruction(instruction.name) + + +@pytest.fixture(scope='module') +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.cancel_operation(resource_name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +def test_label_video( + capsys, annotation_spec_set, instruction, dataset, cleaner): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + # Start labeling. + return label_video.label_video( + dataset.name, instruction.name, annotation_spec_set.name) + + response = run_sample() + cleaner.append(response.operation.name) + + out, _ = capsys.readouterr() + assert 'Label_video operation name: ' in out + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True diff --git a/packages/google-cloud-datalabeling/samples/snippets/manage_dataset.py b/packages/google-cloud-datalabeling/samples/snippets/manage_dataset.py new file mode 100644 index 000000000000..a100bf4b9b22 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/manage_dataset.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_create_dataset_beta] +def create_dataset(project_id): + """Creates a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_create_dataset_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_create_dataset_beta] + + formatted_project_name = client.project_path(project_id) + + dataset = datalabeling.types.Dataset( + display_name='YOUR_DATASET_SET_DISPLAY_NAME', + description='YOUR_DESCRIPTION' + ) + + response = client.create_dataset(formatted_project_name, dataset) + + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print('The dataset resource name: {}'.format(response.name)) + print('Display name: {}'.format(response.display_name)) + print('Description: {}'.format(response.description)) + print('Create time:') + print('\tseconds: {}'.format(response.create_time.seconds)) + print('\tnanos: {}\n'.format(response.create_time.nanos)) + + return response +# [END datalabeling_create_dataset_beta] + + +# [START datalabeling_list_datasets_beta] +def list_datasets(project_id): + """Lists datasets for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_list_datasets_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_list_datasets_beta] + + formatted_project_name = client.project_path(project_id) + + response = client.list_datasets(formatted_project_name) + for element in response: + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print('The dataset resource name: {}\n'.format(element.name)) + print('Display name: {}'.format(element.display_name)) + print('Description: {}'.format(element.description)) + print('Create time:') + print('\tseconds: {}'.format(element.create_time.seconds)) + print('\tnanos: {}'.format(element.create_time.nanos)) +# [END datalabeling_list_datasets_beta] + + +# [START datalabeling_get_dataset_beta] +def get_dataset(dataset_resource_name): + """Gets a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_get_dataset_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_get_dataset_beta] + + response = client.get_dataset(dataset_resource_name) + + print('The dataset resource name: {}\n'.format(response.name)) + print('Display name: {}'.format(response.display_name)) + print('Description: {}'.format(response.description)) + print('Create time:') + print('\tseconds: {}'.format(response.create_time.seconds)) + print('\tnanos: {}'.format(response.create_time.nanos)) +# [END datalabeling_get_dataset_beta] + + +# [START datalabeling_delete_dataset_beta] +def delete_dataset(dataset_resource_name): + """Deletes a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_delete_dataset_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_delete_dataset_beta] + + response = client.delete_dataset(dataset_resource_name) + + print('Dataset deleted. {}\n'.format(response)) +# [END datalabeling_delete_dataset_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + subparsers = parser.add_subparsers(dest='command') + + create_parser = subparsers.add_parser( + 'create', help='Create a new dataset.') + create_parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + list_parser = subparsers.add_parser('list', help='List all datasets.') + list_parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + get_parser = subparsers.add_parser( + 'get', help='Get a dataset by the dataset resource name.') + get_parser.add_argument( + '--dataset-resource-name', + help='The dataset resource name. Used in the get or delete operation.', + required=True + ) + + delete_parser = subparsers.add_parser( + 'delete', help='Delete a dataset by the dataset resource name.') + delete_parser.add_argument( + '--dataset-resource-name', + help='The dataset resource name. Used in the get or delete operation.', + required=True + ) + + args = parser.parse_args() + + if args.command == 'create': + create_dataset(args.project_id) + elif args.command == 'list': + list_datasets(args.project_id) + elif args.command == 'get': + get_dataset(args.dataset_resource_name) + elif args.command == 'delete': + delete_dataset(args.dataset_resource_name) diff --git a/packages/google-cloud-datalabeling/samples/snippets/manage_dataset_test.py b/packages/google-cloud-datalabeling/samples/snippets/manage_dataset_test.py new file mode 100644 index 000000000000..b6abc25829bb --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/manage_dataset_test.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import RetryError +from google.api_core.exceptions import ServerError +import pytest + +import manage_dataset +import testing_lib + + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +@pytest.fixture(scope='module') +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope='module') +def cleaner(): + # First delete old datasets. + try: + testing_lib.delete_old_datasets(PROJECT_ID) + # We see occational RetryError while deleting old datasets. + # We can just ignore it and move on. + except RetryError as e: + print("delete_old_datasets failed: detail {}".format(e)) + + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.delete_dataset(resource_name) + + +def test_create_dataset(cleaner, capsys): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + return manage_dataset.create_dataset(PROJECT_ID) + + response = run_sample() + cleaner.append(response.name) + + out, _ = capsys.readouterr() + assert "The dataset resource name:" in out + + +@pytest.mark.skip("Constantly failing") +def test_list_dataset(capsys, dataset): + + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + manage_dataset.list_datasets(PROJECT_ID) + + run_sample() + out, _ = capsys.readouterr() + assert dataset.name in out + + +def test_get_dataset(capsys, dataset): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + manage_dataset.get_dataset(dataset.name) + + run_sample() + out, _ = capsys.readouterr() + assert "The dataset resource name:" in out + + +def test_delete_dataset(capsys, dataset): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE) + def run_sample(): + manage_dataset.delete_dataset(dataset.name) + + run_sample() + out, _ = capsys.readouterr() + assert "Dataset deleted." in out diff --git a/packages/google-cloud-datalabeling/samples/snippets/noxfile.py b/packages/google-cloud-datalabeling/samples/snippets/noxfile.py new file mode 100644 index 000000000000..ba55d7ce53ca --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/noxfile.py @@ -0,0 +1,224 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + 'ignored_versions': ["2.7"], + + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + 'envs': {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append('.') + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars(): + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG['gcloud_project_env'] + # This should error out if not set. + ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG['envs']) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir): + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session): + session.install("flake8", "flake8-import-order") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + "." + ] + session.run("flake8", *args) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session, post_install=None): + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session): + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip("SKIPPED: {} tests are disabled for this sample.".format( + session.python + )) + + +# +# Readmegen +# + + +def _get_repo_root(): + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session, path): + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/packages/google-cloud-datalabeling/samples/snippets/noxfile_config.py b/packages/google-cloud-datalabeling/samples/snippets/noxfile_config.py new file mode 100644 index 000000000000..58569885fabf --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/noxfile_config.py @@ -0,0 +1,41 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Declare optional test sessions you want to opt-in. Currently we + # have the following optional test sessions: + # 'cloud_run' # Test session for Cloud Run application. + "opt_in_sessions": [], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + # 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": { + # For Datalabeling samples to hit the testing endpoint + "DATALABELING_ENDPOINT": "test-datalabeling.sandbox.googleapis.com:443" + }, +} diff --git a/packages/google-cloud-datalabeling/samples/snippets/requirements-test.txt b/packages/google-cloud-datalabeling/samples/snippets/requirements-test.txt new file mode 100644 index 000000000000..d0029c6de49e --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/requirements-test.txt @@ -0,0 +1,2 @@ +backoff==1.10.0 +pytest==6.0.1 diff --git a/packages/google-cloud-datalabeling/samples/snippets/requirements.txt b/packages/google-cloud-datalabeling/samples/snippets/requirements.txt new file mode 100644 index 000000000000..56ba790e276d --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/requirements.txt @@ -0,0 +1 @@ +google-cloud-datalabeling==0.4.0 diff --git a/packages/google-cloud-datalabeling/samples/snippets/testing_lib.py b/packages/google-cloud-datalabeling/samples/snippets/testing_lib.py new file mode 100644 index 000000000000..c9674a9bd785 --- /dev/null +++ b/packages/google-cloud-datalabeling/samples/snippets/testing_lib.py @@ -0,0 +1,105 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time + +import backoff +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import DeadlineExceeded +from google.api_core.exceptions import FailedPrecondition +from google.cloud import datalabeling_v1beta1 as datalabeling + +import create_annotation_spec_set as annotation_spec_set_sample +import create_instruction as instruction_sample +import import_data as import_sample +import manage_dataset as dataset_sample + + +RETRY_DEADLINE = 60 + + +def create_client(): + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if 'DATALABELING_ENDPOINT' in os.environ: + opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + else: + client = datalabeling.DataLabelingServiceClient() + return client + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def create_dataset(project_id): + return dataset_sample.create_dataset(project_id) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def delete_dataset(name): + return dataset_sample.delete_dataset(name) + + +def delete_old_datasets(project_id): + client = create_client() + formatted_project_name = client.project_path(project_id) + + response = client.list_datasets(formatted_project_name) + # It will delete datasets created more than 2 hours ago + cutoff_time = time.time() - 7200 + for element in response: + if element.create_time.seconds < cutoff_time: + print("Deleting {}".format(element.name)) + try: + dataset_sample.delete_dataset(element.name) + except FailedPrecondition as e: + # We're always getting FailedPrecondition with 400 + # resource conflict. I don't know why. + print("Deleting {} failed.".format(element.name)) + print("Detail: {}".format(e)) + # To avoid quota error + time.sleep(1) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def create_annotation_spec_set(project_id): + return annotation_spec_set_sample.create_annotation_spec_set(project_id) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def delete_annotation_spec_set(name): + client = create_client() + client.delete_annotation_spec_set(name) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def create_instruction(project_id, data_type, gcs_uri): + return instruction_sample.create_instruction(project_id, data_type, gcs_uri) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def delete_instruction(name): + client = create_client() + client.delete_instruction(name) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def cancel_operation(name): + client = create_client() + client.transport._operations_client.cancel_operation(name) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def import_data(dataset_name, data_type, gcs_uri): + import_sample.import_data(dataset_name, data_type, gcs_uri) diff --git a/packages/google-cloud-datalabeling/synth.metadata b/packages/google-cloud-datalabeling/synth.metadata index 2e5f49130af5..54315524496f 100644 --- a/packages/google-cloud-datalabeling/synth.metadata +++ b/packages/google-cloud-datalabeling/synth.metadata @@ -3,23 +3,22 @@ { "git": { "name": ".", - "remote": "git@github.com:googleapis/python-datalabeling.git", - "sha": "62a52b2e2143640c62524a0a6970d10a4f2308a6" + "remote": "https://github.com/googleapis/python-datalabeling.git", + "sha": "aa2ef07908ba9585fc93e263d4eb169cd171e9e9" } }, { "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "d13e2da37268f7ed2808a7288d934274627cdefd", - "internalRef": "318386808" + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "5f2f711c91199ba2f609d3f06a2fe22aee4e5be3" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "652d446edabb0ea07de0ce542c6b37ab7dad3a19" + "sha": "5f2f711c91199ba2f609d3f06a2fe22aee4e5be3" } } ], diff --git a/packages/google-cloud-datalabeling/synth.py b/packages/google-cloud-datalabeling/synth.py index 0eef5a9204c9..436414536a65 100644 --- a/packages/google-cloud-datalabeling/synth.py +++ b/packages/google-cloud-datalabeling/synth.py @@ -16,6 +16,7 @@ import synthtool as s from synthtool import gcp +from synthtool.languages import python gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() @@ -83,7 +84,14 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=79) +templated_files = common.py_library(cov_level=79, samples=True) s.move(templated_files) +# ---------------------------------------------------------------------------- +# Samples templates +# ---------------------------------------------------------------------------- +python.py_samples(skip_readmes=True) + +s.replace("noxfile.py", '[\"\']-W[\"\']', '# "-W"') + s.shell.run(["nox", "-s", "blacken"], hide_output=False)