diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8dd83b4d65b0..80a407f6a570 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -73,4 +73,5 @@ /talent/**/* @GoogleCloudPlatform/python-samples-reviewers /vision/**/* @GoogleCloudPlatform/python-samples-reviewers /workflows/**/* @GoogleCloudPlatform/python-samples-reviewers +/datacatalog/**/* @GoogleCloudPlatform/python-samples-reviewers /kms/**/** @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/python-samples-reviewers diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index b9c52c6cfdd3..f1f402fcf9dd 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -130,6 +130,10 @@ assign_issues_by: - 'api: monitoring' to: - GoogleCloudPlatform/dee-observability +- labels: + - 'api: datacatalog' + to: + - GoogleCloudPlatform/python-samples-reviewers - labels: - 'api: kms' - 'api: cloudkms' diff --git a/datacatalog/quickstart/conftest.py b/datacatalog/quickstart/conftest.py new file mode 100644 index 000000000000..138ead5d605e --- /dev/null +++ b/datacatalog/quickstart/conftest.py @@ -0,0 +1,76 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import uuid + +import google.auth +from google.cloud import bigquery, datacatalog_v1 +import pytest + + +def temp_suffix(): + now = datetime.datetime.now() + return "{}_{}".format(now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]) + + +@pytest.fixture(scope="session") +def client(credentials): + return datacatalog_v1.DataCatalogClient(credentials=credentials) + + +@pytest.fixture(scope="session") +def bigquery_client(credentials, project_id): + return bigquery.Client(project=project_id, credentials=credentials) + + +@pytest.fixture(scope="session") +def default_credentials(): + return google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + + +@pytest.fixture(scope="session") +def credentials(default_credentials): + return default_credentials[0] + + +@pytest.fixture(scope="session") +def project_id(default_credentials): + return default_credentials[1] + + +@pytest.fixture +def dataset_id(bigquery_client): + dataset_id = f"python_data_catalog_sample_{temp_suffix()}" + dataset = bigquery_client.create_dataset(dataset_id) + yield dataset.dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def table_id(bigquery_client, project_id, dataset_id): + table_id = f"python_data_catalog_sample_{temp_suffix()}" + table = bigquery.Table("{}.{}.{}".format(project_id, dataset_id, table_id)) + table = bigquery_client.create_table(table) + yield table.table_id + bigquery_client.delete_table(table, not_found_ok=True) + + +@pytest.fixture +def random_tag_template_id(): + random_tag_template_id = f"python_sample_{temp_suffix()}" + yield random_tag_template_id diff --git a/datacatalog/quickstart/quickstart.py b/datacatalog/quickstart/quickstart.py new file mode 100644 index 000000000000..f6579e53690f --- /dev/null +++ b/datacatalog/quickstart/quickstart.py @@ -0,0 +1,131 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def quickstart(override_values): + """Creates a tag template and attach a tag to a BigQuery table.""" + # [START data_catalog_quickstart] + # Import required modules. + from google.cloud import datacatalog_v1 + + # TODO: Set these values before running the sample. + # Google Cloud Platform project. + project_id = "my_project" + # Set dataset_id to the ID of existing dataset. + dataset_id = "demo_dataset" + # Set table_id to the ID of existing table. + table_id = "trips" + # Tag template to create. + tag_template_id = "example_tag_template" + + # [END data_catalog_quickstart] + + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + tag_template_id = override_values.get("tag_template_id", tag_template_id) + + # [START data_catalog_quickstart] + # For all regions available, see: + # https://cloud.google.com/data-catalog/docs/concepts/regions + location = "us-central1" + + # Use Application Default Credentials to create a new + # Data Catalog client. GOOGLE_APPLICATION_CREDENTIALS + # environment variable must be set with the location + # of a service account key file. + datacatalog_client = datacatalog_v1.DataCatalogClient() + + # Create a Tag Template. + tag_template = datacatalog_v1.types.TagTemplate() + + tag_template.display_name = "Demo Tag Template" + + tag_template.fields["source"] = datacatalog_v1.types.TagTemplateField() + tag_template.fields["source"].display_name = "Source of data asset" + tag_template.fields[ + "source" + ].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.STRING + + tag_template.fields["num_rows"] = datacatalog_v1.types.TagTemplateField() + tag_template.fields["num_rows"].display_name = "Number of rows in data asset" + tag_template.fields[ + "num_rows" + ].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.DOUBLE + + tag_template.fields["has_pii"] = datacatalog_v1.types.TagTemplateField() + tag_template.fields["has_pii"].display_name = "Has PII" + tag_template.fields[ + "has_pii" + ].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.BOOL + + tag_template.fields["pii_type"] = datacatalog_v1.types.TagTemplateField() + tag_template.fields["pii_type"].display_name = "PII type" + + for display_name in ["EMAIL", "SOCIAL SECURITY NUMBER", "NONE"]: + enum_value = datacatalog_v1.types.FieldType.EnumType.EnumValue( + display_name=display_name + ) + tag_template.fields["pii_type"].type_.enum_type.allowed_values.append( + enum_value + ) + + expected_template_name = datacatalog_v1.DataCatalogClient.tag_template_path( + project_id, location, tag_template_id + ) + + # Create the Tag Template. + try: + tag_template = datacatalog_client.create_tag_template( + parent=f"projects/{project_id}/locations/{location}", + tag_template_id=tag_template_id, + tag_template=tag_template, + ) + print(f"Created template: {tag_template.name}") + except OSError as e: + print(f"Cannot create template: {expected_template_name}") + print(f"{e}") + + # Lookup Data Catalog's Entry referring to the table. + resource_name = ( + f"//bigquery.googleapis.com/projects/{project_id}" + f"/datasets/{dataset_id}/tables/{table_id}" + ) + table_entry = datacatalog_client.lookup_entry( + request={"linked_resource": resource_name} + ) + + # Attach a Tag to the table. + tag = datacatalog_v1.types.Tag() + + tag.template = tag_template.name + tag.name = "my_super_cool_tag" + + tag.fields["source"] = datacatalog_v1.types.TagField() + tag.fields["source"].string_value = "Copied from tlc_yellow_trips_2018" + + tag.fields["num_rows"] = datacatalog_v1.types.TagField() + tag.fields["num_rows"].double_value = 113496874 + + tag.fields["has_pii"] = datacatalog_v1.types.TagField() + tag.fields["has_pii"].bool_value = False + + tag.fields["pii_type"] = datacatalog_v1.types.TagField() + tag.fields["pii_type"].enum_value.display_name = "NONE" + + tag = datacatalog_client.create_tag(parent=table_entry.name, tag=tag) + print(f"Created tag: {tag.name}") + # [END data_catalog_quickstart] diff --git a/datacatalog/quickstart/quickstart_test.py b/datacatalog/quickstart/quickstart_test.py new file mode 100644 index 000000000000..286259eaa9ab --- /dev/null +++ b/datacatalog/quickstart/quickstart_test.py @@ -0,0 +1,35 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import quickstart + + +def test_quickstart( + capsys, client, project_id, dataset_id, table_id, random_tag_template_id +): + location = "us-central1" + override_values = { + "project_id": project_id, + "dataset_id": dataset_id, + "table_id": table_id, + "tag_template_id": random_tag_template_id, + } + tag_template_name = client.tag_template_path( + project_id, location, random_tag_template_id + ) + quickstart.quickstart(override_values) + out, err = capsys.readouterr() + assert "Created template: {}".format(tag_template_name) in out + assert "Created tag:" in out + client.delete_tag_template(name=tag_template_name, force=True) diff --git a/datacatalog/quickstart/requirements-test.txt b/datacatalog/quickstart/requirements-test.txt new file mode 100644 index 000000000000..1955fcd9b307 --- /dev/null +++ b/datacatalog/quickstart/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==7.2.0 +google-cloud-bigquery==3.3.5 \ No newline at end of file diff --git a/datacatalog/quickstart/requirements.txt b/datacatalog/quickstart/requirements.txt new file mode 100644 index 000000000000..5f697f48245e --- /dev/null +++ b/datacatalog/quickstart/requirements.txt @@ -0,0 +1 @@ +google-cloud-datacatalog==3.9.3 diff --git a/datacatalog/snippets/conftest.py b/datacatalog/snippets/conftest.py new file mode 100644 index 000000000000..520d83042b7b --- /dev/null +++ b/datacatalog/snippets/conftest.py @@ -0,0 +1,149 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.api_core.exceptions import NotFound, PermissionDenied +import google.auth +from google.cloud import datacatalog_v1 +import pytest + +LOCATION = "us-central1" + + +def temp_suffix(): + now = datetime.datetime.now() + return "{}_{}".format(now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]) + + +@pytest.fixture(scope="session") +def client(credentials): + return datacatalog_v1.DataCatalogClient(credentials=credentials) + + +@pytest.fixture(scope="session") +def default_credentials(): + return google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + + +@pytest.fixture(scope="session") +def credentials(default_credentials): + return default_credentials[0] + + +@pytest.fixture(scope="session") +def project_id(default_credentials): + return default_credentials[1] + + +@pytest.fixture +def valid_member_id(client, project_id, random_existing_tag_template_id): + template_name = datacatalog_v1.DataCatalogClient.tag_template_path( + project_id, LOCATION, random_existing_tag_template_id + ) + + # Retrieve Template's current IAM Policy. + policy = client.get_iam_policy(resource=template_name) + yield policy.bindings[0].members[0] + + +@pytest.fixture +def resources_to_delete(client, project_id): + doomed = { + "entries": [], + "entry_groups": [], + "templates": [], + } + yield doomed + + for entry_name in doomed["entries"]: + try: + client.delete_entry(name=entry_name) + except (NotFound, PermissionDenied): + pass + for group_name in doomed["entry_groups"]: + try: + client.delete_entry_group(name=group_name) + except (NotFound, PermissionDenied): + pass + for template_name in doomed["templates"]: + try: + client.delete_tag_template(name=template_name, force=True) + except (NotFound, PermissionDenied): + pass + + +@pytest.fixture +def random_entry_id(): + random_entry_id = f"python_sample_entry_{temp_suffix()}" + yield random_entry_id + + +@pytest.fixture +def random_entry_group_id(): + random_entry_group_id = f"python_sample_group_{temp_suffix()}" + yield random_entry_group_id + + +@pytest.fixture +def random_tag_template_id(): + random_tag_template_id = f"python_sample_{temp_suffix()}" + yield random_tag_template_id + + +@pytest.fixture +def random_existing_tag_template_id(client, project_id, resources_to_delete): + random_tag_template_id = f"python_sample_{temp_suffix()}" + random_tag_template = datacatalog_v1.types.TagTemplate() + random_tag_template.fields["source"] = datacatalog_v1.types.TagTemplateField() + random_tag_template.fields[ + "source" + ].type_.primitive_type = datacatalog_v1.FieldType.PrimitiveType.STRING.value + random_tag_template = client.create_tag_template( + parent=datacatalog_v1.DataCatalogClient.common_location_path( + project_id, LOCATION + ), + tag_template_id=random_tag_template_id, + tag_template=random_tag_template, + ) + yield random_tag_template_id + resources_to_delete["templates"].append(random_tag_template.name) + + +@pytest.fixture(scope="session") +def policy_tag_manager_client(credentials): + return datacatalog_v1.PolicyTagManagerClient(credentials=credentials) + + +@pytest.fixture +def random_taxonomy_display_name(policy_tag_manager_client, project_id): + now = datetime.datetime.now() + random_display_name = ( + f"example_taxonomy" + f'_{now.strftime("%Y%m%d%H%M%S")}' + f"_{uuid.uuid4().hex[:8]}" + ) + yield random_display_name + parent = datacatalog_v1.PolicyTagManagerClient.common_location_path( + project_id, "us" + ) + taxonomies = policy_tag_manager_client.list_taxonomies(parent=parent) + taxonomy = next( + (t for t in taxonomies if t.display_name == random_display_name), None + ) + if taxonomy: + policy_tag_manager_client.delete_taxonomy(name=taxonomy.name) diff --git a/datacatalog/snippets/create_custom_entry.py b/datacatalog/snippets/create_custom_entry.py new file mode 100644 index 000000000000..1556a042fdf5 --- /dev/null +++ b/datacatalog/snippets/create_custom_entry.py @@ -0,0 +1,88 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_custom_entry(override_values): + """Creates a custom entry within an entry group.""" + # [START data_catalog_create_custom_entry] + # Import required modules. + from google.cloud import datacatalog_v1 + + # Google Cloud Platform project. + project_id = "my-project" + # Entry Group to be created. + entry_group_id = "my_new_entry_group_id" + # Entry to be created. + entry_id = "my_new_entry_id" + # Currently, Data Catalog stores metadata in the us-central1 region. + location = "us-central1" + + # [END data_catalog_create_custom_entry] + + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + entry_id = override_values.get("entry_id", entry_id) + entry_group_id = override_values.get("entry_group_id", entry_group_id) + + # [START data_catalog_create_custom_entry] + datacatalog = datacatalog_v1.DataCatalogClient() + + # Create an Entry Group. + entry_group_obj = datacatalog_v1.types.EntryGroup() + entry_group_obj.display_name = "My awesome Entry Group" + entry_group_obj.description = "This Entry Group represents an external system" + + entry_group = datacatalog.create_entry_group( + parent=datacatalog_v1.DataCatalogClient.common_location_path( + project_id, location + ), + entry_group_id=entry_group_id, + entry_group=entry_group_obj, + ) + entry_group_name = entry_group.name + print("Created entry group: {}".format(entry_group_name)) + + # Create an Entry. + entry = datacatalog_v1.types.Entry() + entry.user_specified_system = "onprem_data_system" + entry.user_specified_type = "onprem_data_asset" + entry.display_name = "My awesome data asset" + entry.description = "This data asset is managed by an external system." + entry.linked_resource = "//my-onprem-server.com/dataAssets/my-awesome-data-asset" + + # Create the Schema, this is optional. + entry.schema.columns.append( + datacatalog_v1.types.ColumnSchema( + column="first_column", + type_="STRING", + description="This columns consists of ....", + mode=None, + ) + ) + + entry.schema.columns.append( + datacatalog_v1.types.ColumnSchema( + column="second_column", + type_="DOUBLE", + description="This columns consists of ....", + mode=None, + ) + ) + + entry = datacatalog.create_entry( + parent=entry_group_name, entry_id=entry_id, entry=entry + ) + print("Created entry: {}".format(entry.name)) + # [END data_catalog_create_custom_entry] diff --git a/datacatalog/snippets/create_custom_entry_test.py b/datacatalog/snippets/create_custom_entry_test.py new file mode 100644 index 000000000000..597f80e9ce02 --- /dev/null +++ b/datacatalog/snippets/create_custom_entry_test.py @@ -0,0 +1,42 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import create_custom_entry + + +def test_create_custom_entry( + capsys, + client, + project_id, + random_entry_group_id, + random_entry_id, + resources_to_delete, +): + location = "us-central1" + override_values = { + "project_id": project_id, + "entry_id": random_entry_id, + "entry_group_id": random_entry_group_id, + } + expected_entry_group = client.entry_group_path( + project_id, location, random_entry_group_id + ) + expected_entry = client.entry_path( + project_id, location, random_entry_group_id, random_entry_id + ) + create_custom_entry.create_custom_entry(override_values) + out, err = capsys.readouterr() + assert f"Created entry group: {expected_entry_group}" in out + assert f"Created entry: {expected_entry}" in out + resources_to_delete["entries"].append(expected_entry) diff --git a/datacatalog/snippets/create_fileset.py b/datacatalog/snippets/create_fileset.py new file mode 100644 index 000000000000..b76d96a611b1 --- /dev/null +++ b/datacatalog/snippets/create_fileset.py @@ -0,0 +1,105 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_fileset(override_values): + """Creates a fileset within an entry group.""" + # [START data_catalog_create_fileset] + # Import required modules. + from google.cloud import datacatalog_v1 + + # TODO: Set these values before running the sample. + project_id = "project_id" + fileset_entry_group_id = "entry_group_id" + fileset_entry_id = "entry_id" + + # [END data_catalog_create_fileset] + + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + fileset_entry_group_id = override_values.get( + "fileset_entry_group_id", fileset_entry_group_id + ) + fileset_entry_id = override_values.get("fileset_entry_id", fileset_entry_id) + + # [START data_catalog_create_fileset] + # For all regions available, see: + # https://cloud.google.com/data-catalog/docs/concepts/regions + location = "us-central1" + + datacatalog = datacatalog_v1.DataCatalogClient() + + # Create an Entry Group. + entry_group_obj = datacatalog_v1.types.EntryGroup() + entry_group_obj.display_name = "My Fileset Entry Group" + entry_group_obj.description = "This Entry Group consists of ...." + + entry_group = datacatalog.create_entry_group( + parent=datacatalog_v1.DataCatalogClient.common_location_path( + project_id, location + ), + entry_group_id=fileset_entry_group_id, + entry_group=entry_group_obj, + ) + print(f"Created entry group: {entry_group.name}") + + # Create a Fileset Entry. + entry = datacatalog_v1.types.Entry() + entry.display_name = "My Fileset" + entry.description = "This fileset consists of ...." + entry.gcs_fileset_spec.file_patterns.append("gs://my_bucket/*.csv") + entry.type_ = datacatalog_v1.EntryType.FILESET + + # Create the Schema, for example when you have a csv file. + entry.schema.columns.append( + datacatalog_v1.types.ColumnSchema( + column="first_name", + description="First name", + mode="REQUIRED", + type_="STRING", + ) + ) + + entry.schema.columns.append( + datacatalog_v1.types.ColumnSchema( + column="last_name", description="Last name", mode="REQUIRED", type_="STRING" + ) + ) + + # Create the addresses parent column + addresses_column = datacatalog_v1.types.ColumnSchema( + column="addresses", description="Addresses", mode="REPEATED", type_="RECORD" + ) + + # Create sub columns for the addresses parent column + addresses_column.subcolumns.append( + datacatalog_v1.types.ColumnSchema( + column="city", description="City", mode="NULLABLE", type_="STRING" + ) + ) + + addresses_column.subcolumns.append( + datacatalog_v1.types.ColumnSchema( + column="state", description="State", mode="NULLABLE", type_="STRING" + ) + ) + + entry.schema.columns.append(addresses_column) + + entry = datacatalog.create_entry( + parent=entry_group.name, entry_id=fileset_entry_id, entry=entry + ) + print(f"Created fileset entry: {entry.name}") + # [END data_catalog_create_fileset] diff --git a/datacatalog/snippets/create_fileset_test.py b/datacatalog/snippets/create_fileset_test.py new file mode 100644 index 000000000000..d4f928550c21 --- /dev/null +++ b/datacatalog/snippets/create_fileset_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_fileset + + +def test_create_fileset( + capsys, + client, + project_id, + random_entry_group_id, + random_entry_id, + resources_to_delete, +): + location = "us-central1" + override_values = { + "project_id": project_id, + "fileset_entry_group_id": random_entry_group_id, + "fileset_entry_id": random_entry_id, + } + expected_group_name = client.entry_group_path( + project_id, location, random_entry_group_id + ) + expected_entry_name = client.entry_path( + project_id, location, random_entry_group_id, random_entry_id + ) + create_fileset.create_fileset(override_values) + out, err = capsys.readouterr() + assert f"Created entry group: {expected_group_name}" in out + assert f"Created fileset entry: {expected_entry_name}" in out + resources_to_delete["entry_groups"].append(expected_group_name) + resources_to_delete["entries"].append(expected_entry_name) diff --git a/datacatalog/snippets/data_catalog_ptm_create_taxonomy.py b/datacatalog/snippets/data_catalog_ptm_create_taxonomy.py new file mode 100644 index 000000000000..5a9d8be1a35b --- /dev/null +++ b/datacatalog/snippets/data_catalog_ptm_create_taxonomy.py @@ -0,0 +1,49 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START data_catalog_ptm_create_taxonomy] +from google.cloud import datacatalog_v1 + + +def create_taxonomy( + # TODO(developer): Set project_id to the ID of the project the + # taxonomy will belong to. + project_id: str = "your-project-id", + # TODO(developer): Specify the geographic location where the + # taxonomy should reside. + location_id: str = "us", + # TODO(developer): Set the display name of the taxonomy. + display_name: str = "example-taxonomy", +): + # TODO(developer): Construct a Policy Tag Manager client object. To avoid + # extra delays due to authentication, create a single client for your + # program and share it across operations. + client = datacatalog_v1.PolicyTagManagerClient() + + # Construct a full location path to be the parent of the taxonomy. + parent = datacatalog_v1.PolicyTagManagerClient.common_location_path( + project_id, location_id + ) + + # TODO(developer): Construct a full Taxonomy object to send to the API. + taxonomy = datacatalog_v1.Taxonomy() + taxonomy.display_name = display_name + taxonomy.description = "This Taxonomy represents ..." + + # Send the taxonomy to the API for creation. + taxonomy = client.create_taxonomy(parent=parent, taxonomy=taxonomy) + print(f"Created taxonomy {taxonomy.name}") + + +# [END data_catalog_ptm_create_taxonomy] diff --git a/datacatalog/snippets/data_catalog_ptm_create_taxonomy_test.py b/datacatalog/snippets/data_catalog_ptm_create_taxonomy_test.py new file mode 100644 index 000000000000..d45cddf074a0 --- /dev/null +++ b/datacatalog/snippets/data_catalog_ptm_create_taxonomy_test.py @@ -0,0 +1,27 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import data_catalog_ptm_create_taxonomy + + +def test_create_taxonomy(capsys, project_id: str, random_taxonomy_display_name: str): + + data_catalog_ptm_create_taxonomy.create_taxonomy( + project_id=project_id, + location_id="us", + display_name=random_taxonomy_display_name, + ) + out, _ = capsys.readouterr() + assert f"Created taxonomy projects/{project_id}/locations/us/taxonomies/" in out diff --git a/datacatalog/snippets/grant_tag_template_user_role.py b/datacatalog/snippets/grant_tag_template_user_role.py new file mode 100644 index 000000000000..71afec5c361f --- /dev/null +++ b/datacatalog/snippets/grant_tag_template_user_role.py @@ -0,0 +1,69 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def grant_tag_template_user_role(override_values): + """Grants a user the Tag Template User role for a given template.""" + # [START data_catalog_grant_tag_template_user_role] + from google.cloud import datacatalog_v1 + from google.iam.v1 import iam_policy_pb2 as iam_policy + from google.iam.v1 import policy_pb2 + + datacatalog = datacatalog_v1.DataCatalogClient() + + # TODO: Set these values before running the sample. + project_id = "project_id" + tag_template_id = "existing_tag_template_id" + # For a full list of values a member can have, see: + # https://cloud.google.com/iam/docs/reference/rest/v1/Policy?hl=en#binding + member_id = "user:super-cool.test-user@gmail.com" + + # [END data_catalog_grant_tag_template_user_role] + + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + tag_template_id = override_values.get("tag_template_id", tag_template_id) + member_id = override_values.get("member_id", member_id) + + # [START data_catalog_grant_tag_template_user_role] + # For all regions available, see: + # https://cloud.google.com/data-catalog/docs/concepts/regions + location = "us-central1" + + # Format the Template name. + template_name = datacatalog_v1.DataCatalogClient.tag_template_path( + project_id, location, tag_template_id + ) + + # Retrieve Template's current IAM Policy. + policy = datacatalog.get_iam_policy(resource=template_name) + + # Add Tag Template User role and member to the policy. + binding = policy_pb2.Binding() + binding.role = "roles/datacatalog.tagTemplateUser" + binding.members.append(member_id) + policy.bindings.append(binding) + + set_policy_request = iam_policy.SetIamPolicyRequest( + resource=template_name, policy=policy + ) + + # Update Template's policy. + policy = datacatalog.set_iam_policy(set_policy_request) + + for binding in policy.bindings: + for member in binding.members: + print(f"Member: {member}, Role: {binding.role}") + # [END data_catalog_grant_tag_template_user_role] diff --git a/datacatalog/snippets/grant_tag_template_user_role_test.py b/datacatalog/snippets/grant_tag_template_user_role_test.py new file mode 100644 index 000000000000..005638dd3fca --- /dev/null +++ b/datacatalog/snippets/grant_tag_template_user_role_test.py @@ -0,0 +1,29 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import grant_tag_template_user_role + + +def test_grant_tag_template_user_role( + capsys, project_id, random_existing_tag_template_id, valid_member_id +): + override_values = { + "project_id": project_id, + "tag_template_id": random_existing_tag_template_id, + "member_id": valid_member_id, + } + grant_tag_template_user_role.grant_tag_template_user_role(override_values) + out, err = capsys.readouterr() + assert f"Member: {valid_member_id}, Role: roles/datacatalog.tagTemplateUser" in out diff --git a/datacatalog/snippets/lookup_entry.py b/datacatalog/snippets/lookup_entry.py new file mode 100644 index 000000000000..13e57c4fd1f7 --- /dev/null +++ b/datacatalog/snippets/lookup_entry.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def lookup_entry(override_values): + """Retrieves Data Catalog entry for the given Google Cloud Platform resource.""" + # [START data_catalog_lookup_dataset] + # [START data_catalog_lookup_entry] + from google.cloud import datacatalog_v1 + + datacatalog = datacatalog_v1.DataCatalogClient() + + bigquery_project_id = "my_bigquery_project" + dataset_id = "my_dataset" + table_id = "my_table" + pubsub_project_id = "my_pubsub_project" + topic_id = "my_topic" + + # [END data_catalog_lookup_entry] + + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + bigquery_project_id = override_values.get( + "bigquery_project_id", bigquery_project_id + ) + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + pubsub_project_id = override_values.get("pubsub_project_id", pubsub_project_id) + topic_id = override_values.get("topic_id", topic_id) + + # [START data_catalog_lookup_entry] + # BigQuery Dataset via linked_resource + resource_name = f"//bigquery.googleapis.com/projects/{bigquery_project_id}/datasets/{dataset_id}" + + entry = datacatalog.lookup_entry(request={"linked_resource": resource_name}) + print( + f"Retrieved entry {entry.name} for BigQuery Dataset resource {entry.linked_resource}" + ) + + # BigQuery Dataset via sql_resource + sql_resource = f"bigquery.dataset.`{bigquery_project_id}`.`{dataset_id}`" + + entry = datacatalog.lookup_entry(request={"sql_resource": sql_resource}) + print( + f"Retrieved entry {entry.name} for BigQuery Dataset resource {entry.linked_resource}" + ) + + # BigQuery Table via linked_resource + resource_name = ( + f"//bigquery.googleapis.com/projects/{bigquery_project_id}/datasets/{dataset_id}" + f"/tables/{table_id}" + ) + + entry = datacatalog.lookup_entry(request={"linked_resource": resource_name}) + print(f"Retrieved entry {entry.name} for BigQuery Table {entry.linked_resource}") + + # BigQuery Table via sql_resource + sql_resource = f"bigquery.table.`{bigquery_project_id}`.`{dataset_id}`.`{table_id}`" + + entry = datacatalog.lookup_entry(request={"sql_resource": sql_resource}) + print( + f"Retrieved entry {entry.name} for BigQuery Table resource {entry.linked_resource}" + ) + + # Pub/Sub Topic via linked_resource + resource_name = ( + f"//pubsub.googleapis.com/projects/{pubsub_project_id}/topics/{topic_id}" + ) + + entry = datacatalog.lookup_entry(request={"linked_resource": resource_name}) + print( + f"Retrieved entry {entry.name} for Pub/Sub Topic resource {entry.linked_resource}" + ) + + # Pub/Sub Topic via sql_resource + sql_resource = f"pubsub.topic.`{pubsub_project_id}`.`{topic_id}`" + + entry = datacatalog.lookup_entry(request={"sql_resource": sql_resource}) + print( + f"Retrieved entry {entry.name} for Pub/Sub Topic resource {entry.linked_resource}" + ) + # [END data_catalog_lookup_entry] + # [END data_catalog_lookup_dataset] diff --git a/datacatalog/snippets/lookup_entry_test.py b/datacatalog/snippets/lookup_entry_test.py new file mode 100644 index 000000000000..2e56485faf64 --- /dev/null +++ b/datacatalog/snippets/lookup_entry_test.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import lookup_entry + +BIGQUERY_PROJECT = "bigquery-public-data" +BIGQUERY_DATASET = "new_york_taxi_trips" +BIGQUERY_TABLE = "taxi_zone_geom" + +PUBSUB_PROJECT = "pubsub-public-data" +PUBSUB_TOPIC = "taxirides-realtime" + + +def test_lookup_entry(capsys): + override_values = { + "bigquery_project_id": BIGQUERY_PROJECT, + "dataset_id": BIGQUERY_DATASET, + "table_id": BIGQUERY_TABLE, + "pubsub_project_id": PUBSUB_PROJECT, + "topic_id": PUBSUB_TOPIC, + } + dataset_resource = f"//bigquery.googleapis.com/projects/{BIGQUERY_PROJECT}/datasets/{BIGQUERY_DATASET}" + table_resource = f"//bigquery.googleapis.com/projects/{BIGQUERY_PROJECT}/datasets/{BIGQUERY_DATASET}/tables/{BIGQUERY_TABLE}" + topic_resource = ( + f"//pubsub.googleapis.com/projects/{PUBSUB_PROJECT}/topics/{PUBSUB_TOPIC}" + ) + lookup_entry.lookup_entry(override_values) + out, err = capsys.readouterr() + assert re.search( + f"(Retrieved entry .+ for BigQuery Dataset resource {dataset_resource})", out + ) + assert re.search( + f"(Retrieved entry .+ for BigQuery Table resource {table_resource})", out + ) + assert re.search( + f"(Retrieved entry .+ for Pub/Sub Topic resource {topic_resource})", out + ) diff --git a/datacatalog/snippets/requirements-test.txt b/datacatalog/snippets/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/datacatalog/snippets/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/datacatalog/snippets/requirements.txt b/datacatalog/snippets/requirements.txt new file mode 100644 index 000000000000..5f697f48245e --- /dev/null +++ b/datacatalog/snippets/requirements.txt @@ -0,0 +1 @@ +google-cloud-datacatalog==3.9.3 diff --git a/datacatalog/snippets/search_assets.py b/datacatalog/snippets/search_assets.py new file mode 100644 index 000000000000..113acbd2eeb4 --- /dev/null +++ b/datacatalog/snippets/search_assets.py @@ -0,0 +1,48 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def search_assets(override_values): + """Searches Data Catalog entries for a given project.""" + # [START data_catalog_search_assets] + from google.cloud import datacatalog_v1 + + datacatalog = datacatalog_v1.DataCatalogClient() + + # TODO: Set these values before running the sample. + project_id = "project_id" + + # Set custom query. + search_string = "type=dataset" + # [END data_catalog_search_assets] + + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + tag_template_id = override_values.get("tag_template_id", search_string) + search_string = f"name:{tag_template_id}" + + # [START data_catalog_search_assets] + scope = datacatalog_v1.types.SearchCatalogRequest.Scope() + scope.include_project_ids.append(project_id) + + # Alternatively, search using organization scopes. + # scope.include_org_ids.append("my_organization_id") + + search_results = datacatalog.search_catalog(scope=scope, query=search_string) + + print("Results in project:") + for result in search_results: + print(result) + # [END data_catalog_search_assets] diff --git a/datacatalog/snippets/search_assets_test.py b/datacatalog/snippets/search_assets_test.py new file mode 100644 index 000000000000..84c266d3397d --- /dev/null +++ b/datacatalog/snippets/search_assets_test.py @@ -0,0 +1,26 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import search_assets + + +def test_search_assets(capsys, project_id, random_existing_tag_template_id): + override_values = { + "project_id": project_id, + "tag_template_id": random_existing_tag_template_id, + } + search_assets.search_assets(override_values) + out, err = capsys.readouterr() + assert "Results in project:" in out + assert random_existing_tag_template_id in out diff --git a/datacatalog/v1beta1/conftest.py b/datacatalog/v1beta1/conftest.py new file mode 100644 index 000000000000..e2745b5a63db --- /dev/null +++ b/datacatalog/v1beta1/conftest.py @@ -0,0 +1,118 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import uuid + +import google.auth +from google.cloud import datacatalog_v1beta1 +import pytest + + +@pytest.fixture(scope="session") +def client(credentials): + return datacatalog_v1beta1.DataCatalogClient(credentials=credentials) + + +@pytest.fixture(scope="session") +def default_credentials(): + return google.auth.default() + + +@pytest.fixture(scope="session") +def credentials(default_credentials): + return default_credentials[0] + + +@pytest.fixture(scope="session") +def project_id(default_credentials): + return default_credentials[1] + + +@pytest.fixture +def random_entry_id(client, project_id, random_entry_group_id): + now = datetime.datetime.now() + random_entry_id = "example_entry_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + yield random_entry_id + entry_name = datacatalog_v1beta1.DataCatalogClient.entry_path( + project_id, "us-central1", random_entry_group_id, random_entry_id + ) + client.delete_entry(request={"name": entry_name}) + + +@pytest.fixture +def random_entry_group_id(client, project_id): + now = datetime.datetime.now() + random_entry_group_id = "example_entry_group_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + yield random_entry_group_id + entry_group_name = datacatalog_v1beta1.DataCatalogClient.entry_group_path( + project_id, "us-central1", random_entry_group_id + ) + client.delete_entry_group(request={"name": entry_group_name}) + + +@pytest.fixture +def random_entry_name(client, entry_group_name): + now = datetime.datetime.now() + random_entry_id = "example_entry_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + random_entry_name = "{}/entries/{}".format(entry_group_name, random_entry_id) + yield random_entry_name + client.delete_entry(request={"name": random_entry_name}) + + +@pytest.fixture +def entry(client, entry_group_name): + now = datetime.datetime.now() + random_entry_id = "example_entry_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + + request = datacatalog_v1beta1.CreateEntryRequest( + parent=entry_group_name, + entry_id=random_entry_id, + entry=datacatalog_v1beta1.Entry( + type_=datacatalog_v1beta1.EntryType.DATA_STREAM, + name="samples_test_entry", + user_specified_system="sample_system", + ), + ) + + entry = client.create_entry(request) + + yield entry.name + client.delete_entry(request={"name": entry.name}) + + +@pytest.fixture +def entry_group_name(client, project_id): + now = datetime.datetime.now() + entry_group_id = "python_entry_group_sample_{}_{}".format( + now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] + ) + entry_group = client.create_entry_group( + request={ + "parent": f"projects/{project_id}/locations/us-central1", + "entry_group_id": entry_group_id, + "entry_group": datacatalog_v1beta1.EntryGroup(), + } + ) + yield entry_group.name + client.delete_entry_group(request={"name": entry_group.name}) diff --git a/datacatalog/v1beta1/create_entry_group.py b/datacatalog/v1beta1/create_entry_group.py new file mode 100644 index 000000000000..452f1ded681d --- /dev/null +++ b/datacatalog/v1beta1/create_entry_group.py @@ -0,0 +1,54 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_entry_group(project_id, entry_group_id): + # [START data_catalog_create_entry_group_v1beta1] + from google.cloud import datacatalog_v1beta1 + + client = datacatalog_v1beta1.DataCatalogClient() + + # TODO(developer): Set entry_group_id to the ID of the + # entry group to create. + # project_id = "your-project-id" + + # TODO(developer): Specify the geographic location where the + # entry group should reside. + # Currently, Data Catalog stores metadata in the us-central1 region. + location_id = "us-central1" + + # TODO(developer): Set entry_group_id to the ID of the + # entry group to create. + # entry_group_id = "your_entry_group_id" + + # Construct a full location path to be the parent of the entry group. + parent = f"projects/{project_id}/locations/{location_id}" + + # Construct a full EntryGroup object to send to the API. + entry_group = datacatalog_v1beta1.EntryGroup() + entry_group.display_name = "My Entry Group" + entry_group.description = "This Entry Group consists of ..." + + # Send the entry group to the API for creation. + # Raises google.api_core.exceptions.AlreadyExists if the Entry Group + # already exists within the project. + entry_group = client.create_entry_group( + request={ + "parent": parent, + "entry_group_id": entry_group_id, + "entry_group": entry_group, + } + ) # Make an API request. + print("Created entry group {}".format(entry_group.name)) + # [END data_catalog_create_entry_group_v1beta1] diff --git a/datacatalog/v1beta1/create_fileset_entry.py b/datacatalog/v1beta1/create_fileset_entry.py new file mode 100644 index 000000000000..d589a6a048ae --- /dev/null +++ b/datacatalog/v1beta1/create_fileset_entry.py @@ -0,0 +1,84 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_fileset_entry(client, entry_group_name, entry_id): + # [START data_catalog_create_fileset_v1beta1] + from google.cloud import datacatalog_v1beta1 + + # TODO(developer): Construct a Data Catalog client object. + # client = datacatalog_v1beta1.DataCatalogClient() + # TODO(developer): Set entry_group_name to the Name of the entry group + # the entry will belong. + # entry_group_name = "your_entry_group_name" + # TODO(developer): Set entry_id to the ID of the entry to create. + # entry_id = "your_entry_id" + # Construct a full Entry object to send to the API. + entry = datacatalog_v1beta1.types.Entry() + entry.display_name = "My Fileset" + entry.description = "This Fileset consists of ..." + entry.gcs_fileset_spec.file_patterns.append("gs://my_bucket/*") + entry.type_ = datacatalog_v1beta1.EntryType.FILESET + + # Create the Schema, for example when you have a csv file. + columns = [] + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="first_name", + description="First name", + mode="REQUIRED", + type_="STRING", + ) + ) + + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="last_name", description="Last name", mode="REQUIRED", type_="STRING" + ) + ) + + # Create sub columns for the addresses parent column + subcolumns = [] + subcolumns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="city", description="City", mode="NULLABLE", type_="STRING" + ) + ) + + subcolumns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="state", description="State", mode="NULLABLE", type_="STRING" + ) + ) + + columns.append( + datacatalog_v1beta1.types.ColumnSchema( + column="addresses", + description="Addresses", + mode="REPEATED", + subcolumns=subcolumns, + type_="RECORD", + ) + ) + + entry.schema.columns.extend(columns) + + # Send the entry to the API for creation. + # Raises google.api_core.exceptions.AlreadyExists if the Entry already + # exists within the project. + entry = client.create_entry( + request={"parent": entry_group_name, "entry_id": entry_id, "entry": entry} + ) + print("Created entry {}".format(entry.name)) + # [END data_catalog_create_fileset_v1beta1] diff --git a/datacatalog/v1beta1/get_entry.py b/datacatalog/v1beta1/get_entry.py new file mode 100644 index 000000000000..a797958ba44e --- /dev/null +++ b/datacatalog/v1beta1/get_entry.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-datacatalog + +# sample-metadata +# title: +# description: Get Entry +# usage: python3 samples/v1beta1/datacatalog_get_entry.py [--project_id "[Google Cloud Project ID]"] [--location_id "[Google Cloud Location ID]"] [--entry_group_id "[Entry Group ID]"] [--entry_id "[Entry ID]"] + + +def sample_get_entry( + project_id: str, location_id: str, entry_group_id: str, entry_id: str +): + # [START data_catalog_get_entry_v1beta1] + from google.cloud import datacatalog_v1beta1 + + """ + Get Entry + + Args: + project_id (str): Your Google Cloud project ID + location_id (str): Google Cloud region, e.g. us-central1 + entry_group_id (str): ID of the Entry Group, e.g. @bigquery, @pubsub, my_entry_group + entry_id (str): ID of the Entry + """ + + client = datacatalog_v1beta1.DataCatalogClient() + + # project_id = '[Google Cloud Project ID]' + # location_id = '[Google Cloud Location ID]' + # entry_group_id = '[Entry Group ID]' + # entry_id = '[Entry ID]' + name = client.entry_path(project_id, location_id, entry_group_id, entry_id) + + entry = client.get_entry(request={"name": name}) + print(f"Entry name: {entry.name}") + print(f"Entry type: {datacatalog_v1beta1.EntryType(entry.type_).name}") + print(f"Linked resource: {entry.linked_resource}") + # [END data_catalog_get_entry_v1beta1] + return entry + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--project_id", type_=str, default="[Google Cloud Project ID]") + parser.add_argument( + "--location_id", type_=str, default="[Google Cloud Location ID]" + ) + parser.add_argument("--entry_group_id", type_=str, default="[Entry Group ID]") + parser.add_argument("--entry_id", type_=str, default="[Entry ID]") + args = parser.parse_args() + + sample_get_entry( + args.project_id, args.location_id, args.entry_group_id, args.entry_id + ) + + +if __name__ == "__main__": + main() diff --git a/datacatalog/v1beta1/lookup_entry.py b/datacatalog/v1beta1/lookup_entry.py new file mode 100644 index 000000000000..2e0a74628432 --- /dev/null +++ b/datacatalog/v1beta1/lookup_entry.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-datacatalog + +# sample-metadata +# title: +# description: Lookup Entry +# usage: python3 samples/v1beta1/datacatalog_lookup_entry.py [--resource_name "[Full Resource Name]"] + + +def sample_lookup_entry(resource_name: str): + # [START data_catalog_lookup_entry_v1beta1] + from google.cloud import datacatalog_v1beta1 + + """ + Lookup Entry + + Args: + resource_name (str): The full name of the Google Cloud Platform resource the Data + Catalog entry represents. + See: https://cloud.google.com/apis/design/resource_names#full_resource_name + Examples: + //bigquery.googleapis.com/projects/bigquery-public-data/datasets/new_york_taxi_trips/tables/taxi_zone_geom + //pubsub.googleapis.com/projects/pubsub-public-data/topics/taxirides-realtime + """ + + client = datacatalog_v1beta1.DataCatalogClient() + entry = client.lookup_entry(request={"linked_resource": resource_name}) + print(f"Entry name: {entry.name}") + print(f"Entry type: {datacatalog_v1beta1.EntryType(entry.type_).name}") + print(f"Linked resource: {entry.linked_resource}") + # [END data_catalog_lookup_entry_v1beta1] + return entry + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--resource_name", type_=str, default="[Full Resource Name]") + args = parser.parse_args() + + sample_lookup_entry(args.resource_name) + + +if __name__ == "__main__": + main() diff --git a/datacatalog/v1beta1/lookup_entry_sql_resource.py b/datacatalog/v1beta1/lookup_entry_sql_resource.py new file mode 100644 index 000000000000..f60ac3dcef14 --- /dev/null +++ b/datacatalog/v1beta1/lookup_entry_sql_resource.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-datacatalog + +# sample-metadata +# title: +# description: Lookup Entry using SQL resource +# usage: python3 samples/v1beta1/datacatalog_lookup_entry_sql_resource.py [--sql_name "[SQL Resource Name]"] + + +def sample_lookup_entry(sql_name: str): + # [START data_catalog_lookup_entry_sql_resource_v1beta1] + from google.cloud import datacatalog_v1beta1 + + """ + Lookup Entry using SQL resource + + Args: + sql_name (str): The SQL name of the Google Cloud Platform resource the Data Catalog + entry represents. + Examples: + bigquery.table.`bigquery-public-data`.new_york_taxi_trips.taxi_zone_geom + pubsub.topic.`pubsub-public-data`.`taxirides-realtime` + """ + + client = datacatalog_v1beta1.DataCatalogClient() + + # sql_name = '[SQL Resource Name]' + entry = client.lookup_entry(request={"sql_resource": sql_name}) + print(f"Entry name: {entry.name}") + print(f"Entry type: {datacatalog_v1beta1.EntryType(entry.type_).name}") + print(f"Linked resource: {entry.linked_resource}") + # [END data_catalog_lookup_entry_sql_resource_v1beta1] + return entry + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--sql_name", type=str, default="[SQL Resource Name]") + args = parser.parse_args() + + sample_lookup_entry(args.sql_name) + + +if __name__ == "__main__": + main() diff --git a/datacatalog/v1beta1/requirements-test.txt b/datacatalog/v1beta1/requirements-test.txt new file mode 100644 index 000000000000..49780e035690 --- /dev/null +++ b/datacatalog/v1beta1/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.0 diff --git a/datacatalog/v1beta1/requirements.txt b/datacatalog/v1beta1/requirements.txt new file mode 100644 index 000000000000..5f697f48245e --- /dev/null +++ b/datacatalog/v1beta1/requirements.txt @@ -0,0 +1 @@ +google-cloud-datacatalog==3.9.3 diff --git a/datacatalog/v1beta1/search.py b/datacatalog/v1beta1/search.py new file mode 100644 index 000000000000..cc2d3b2c2aee --- /dev/null +++ b/datacatalog/v1beta1/search.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-datacatalog + +# sample-metadata +# title: +# description: Search Catalog +# usage: python3 samples/v1beta1/datacatalog_search.py [--include_project_id "[Google Cloud Project ID]"] [--include_gcp_public_datasets false] [--query "[String in search query syntax]"] + + +def sample_search_catalog( + include_project_id: str, include_gcp_public_datasets: bool, query: str +): + # [START data_catalog_search_v1beta1] + from google.cloud import datacatalog_v1beta1 + + """ + Search Catalog + + Args: + include_project_id (str): Your Google Cloud project ID. + include_gcp_public_datasets (bool): If true, include Google Cloud Platform (GCP) public + datasets in the search results. + query (str): Your query string. + See: https://cloud.google.com/data-catalog/docs/how-to/search-reference + Example: system=bigquery type=dataset + """ + + client = datacatalog_v1beta1.DataCatalogClient() + + # include_project_id = '[Google Cloud Project ID]' + # include_gcp_public_datasets = False + # query = '[String in search query syntax]' + include_project_ids = [include_project_id] + scope = { + "include_project_ids": include_project_ids, + "include_gcp_public_datasets": include_gcp_public_datasets, + } + + # Iterate over all results + results = client.search_catalog(request={"scope": scope, "query": query}) + for response_item in results: + print( + f"Result type: {datacatalog_v1beta1.SearchResultType(response_item.search_result_type).name}" + ) + print(f"Result subtype: {response_item.search_result_subtype}") + print(f"Relative resource name: {response_item.relative_resource_name}") + print(f"Linked resource: {response_item.linked_resource}\n") + # [END data_catalog_search_v1beta1] + return results + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--include_project_id", type=str, default="[Google Cloud Project ID]" + ) + parser.add_argument("--include_gcp_public_datasets", type=bool, default=False) + parser.add_argument("--query", type=str, default="[String in search query syntax]") + args = parser.parse_args() + + sample_search_catalog( + args.include_project_id, args.include_gcp_public_datasets, args.query + ) + + +if __name__ == "__main__": + main() diff --git a/datacatalog/v1beta1/test_create_entry_group.py b/datacatalog/v1beta1/test_create_entry_group.py new file mode 100644 index 000000000000..f7fe80cc025c --- /dev/null +++ b/datacatalog/v1beta1/test_create_entry_group.py @@ -0,0 +1,29 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_entry_group + + +def test_create_entry_group(capsys, client, project_id, random_entry_group_id): + + create_entry_group.create_entry_group(project_id, random_entry_group_id) + out, err = capsys.readouterr() + assert ( + "Created entry group" + " projects/{}/locations/{}/entryGroups/{}".format( + project_id, "us-central1", random_entry_group_id + ) + in out + ) diff --git a/datacatalog/v1beta1/test_create_fileset_entry.py b/datacatalog/v1beta1/test_create_fileset_entry.py new file mode 100644 index 000000000000..b9af5d8c3706 --- /dev/null +++ b/datacatalog/v1beta1/test_create_fileset_entry.py @@ -0,0 +1,30 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import re + +import create_fileset_entry + + +def test_create_fileset_entry(capsys, client, random_entry_name): + + entry_name_pattern = "(?P.+?)/entries/(?P.+?$)" + entry_name_matches = re.match(entry_name_pattern, random_entry_name) + entry_group_name = entry_name_matches.group("entry_group_name") + entry_id = entry_name_matches.group("entry_id") + + create_fileset_entry.create_fileset_entry(client, entry_group_name, entry_id) + out, err = capsys.readouterr() + assert "Created entry {}".format(random_entry_name) in out diff --git a/datacatalog/v1beta1/test_get_entry.py b/datacatalog/v1beta1/test_get_entry.py new file mode 100644 index 000000000000..70d703a52a86 --- /dev/null +++ b/datacatalog/v1beta1/test_get_entry.py @@ -0,0 +1,25 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import get_entry + + +def test_get_entry(client, entry): + # break entry name into parts + name = client.parse_entry_path(entry) + retrieved_entry = get_entry.sample_get_entry( + name["project"], name["location"], name["entry_group"], name["entry"] + ) + assert retrieved_entry.name == entry diff --git a/datacatalog/v1beta1/test_lookup_entry.py b/datacatalog/v1beta1/test_lookup_entry.py new file mode 100644 index 000000000000..5091cd2b0255 --- /dev/null +++ b/datacatalog/v1beta1/test_lookup_entry.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import lookup_entry + +BIGQUERY_PROJECT = "bigquery-public-data" +BIGQUERY_DATASET = "new_york_taxi_trips" + + +def test_lookup_entry(client, entry, project_id): + bigquery_dataset = f"projects/{BIGQUERY_PROJECT}/datasets/{BIGQUERY_DATASET}" + resource_name = f"//bigquery.googleapis.com/{bigquery_dataset}" + + found_entry = lookup_entry.sample_lookup_entry(resource_name) + assert found_entry.linked_resource == resource_name diff --git a/datacatalog/v1beta1/test_lookup_entry_sql_resource.py b/datacatalog/v1beta1/test_lookup_entry_sql_resource.py new file mode 100644 index 000000000000..daf45523a2a1 --- /dev/null +++ b/datacatalog/v1beta1/test_lookup_entry_sql_resource.py @@ -0,0 +1,26 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import lookup_entry_sql_resource + +BIGQUERY_PROJECT = "bigquery-public-data" +BIGQUERY_DATASET = "new_york_taxi_trips" + + +def test_lookup_entry(): + sql_name = f"bigquery.dataset.`{BIGQUERY_PROJECT}`.`{BIGQUERY_DATASET}`" + resource_name = f"//bigquery.googleapis.com/projects/{BIGQUERY_PROJECT}/datasets/{BIGQUERY_DATASET}" + entry = lookup_entry_sql_resource.sample_lookup_entry(sql_name) + assert entry.linked_resource == resource_name diff --git a/datacatalog/v1beta1/test_search.py b/datacatalog/v1beta1/test_search.py new file mode 100644 index 000000000000..c0ba8eb5ad2b --- /dev/null +++ b/datacatalog/v1beta1/test_search.py @@ -0,0 +1,23 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import search + + +def test_search_catalog(client, project_id, entry_group_name): + results = search.sample_search_catalog( + project_id, False, f"name:{entry_group_name}" + ) + assert results is not None