Skip to content

Commit

Permalink
Merge pull request #8508 from GoogleCloudPlatform/python-datacatalog-…
Browse files Browse the repository at this point in the history
…migration

migrate code from googleapis/python-datacatalog
  • Loading branch information
dandhlee authored Nov 15, 2022
2 parents 1c76780 + 074f650 commit a8c4afb
Show file tree
Hide file tree
Showing 37 changed files with 1,780 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,5 @@
/talent/**/* @GoogleCloudPlatform/python-samples-reviewers
/vision/**/* @GoogleCloudPlatform/python-samples-reviewers
/workflows/**/* @GoogleCloudPlatform/python-samples-reviewers
/datacatalog/**/* @GoogleCloudPlatform/python-samples-reviewers
/kms/**/** @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/python-samples-reviewers
4 changes: 4 additions & 0 deletions .github/blunderbuss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ assign_issues_by:
- 'api: monitoring'
to:
- GoogleCloudPlatform/dee-observability
- labels:
- 'api: datacatalog'
to:
- GoogleCloudPlatform/python-samples-reviewers
- labels:
- 'api: kms'
- 'api: cloudkms'
Expand Down
76 changes: 76 additions & 0 deletions datacatalog/quickstart/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import datetime
import uuid

import google.auth
from google.cloud import bigquery, datacatalog_v1
import pytest


def temp_suffix():
now = datetime.datetime.now()
return "{}_{}".format(now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8])


@pytest.fixture(scope="session")
def client(credentials):
return datacatalog_v1.DataCatalogClient(credentials=credentials)


@pytest.fixture(scope="session")
def bigquery_client(credentials, project_id):
return bigquery.Client(project=project_id, credentials=credentials)


@pytest.fixture(scope="session")
def default_credentials():
return google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)


@pytest.fixture(scope="session")
def credentials(default_credentials):
return default_credentials[0]


@pytest.fixture(scope="session")
def project_id(default_credentials):
return default_credentials[1]


@pytest.fixture
def dataset_id(bigquery_client):
dataset_id = f"python_data_catalog_sample_{temp_suffix()}"
dataset = bigquery_client.create_dataset(dataset_id)
yield dataset.dataset_id
bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)


@pytest.fixture
def table_id(bigquery_client, project_id, dataset_id):
table_id = f"python_data_catalog_sample_{temp_suffix()}"
table = bigquery.Table("{}.{}.{}".format(project_id, dataset_id, table_id))
table = bigquery_client.create_table(table)
yield table.table_id
bigquery_client.delete_table(table, not_found_ok=True)


@pytest.fixture
def random_tag_template_id():
random_tag_template_id = f"python_sample_{temp_suffix()}"
yield random_tag_template_id
131 changes: 131 additions & 0 deletions datacatalog/quickstart/quickstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def quickstart(override_values):
"""Creates a tag template and attach a tag to a BigQuery table."""
# [START data_catalog_quickstart]
# Import required modules.
from google.cloud import datacatalog_v1

# TODO: Set these values before running the sample.
# Google Cloud Platform project.
project_id = "my_project"
# Set dataset_id to the ID of existing dataset.
dataset_id = "demo_dataset"
# Set table_id to the ID of existing table.
table_id = "trips"
# Tag template to create.
tag_template_id = "example_tag_template"

# [END data_catalog_quickstart]

# To facilitate testing, we replace values with alternatives
# provided by the testing harness.
project_id = override_values.get("project_id", project_id)
dataset_id = override_values.get("dataset_id", dataset_id)
table_id = override_values.get("table_id", table_id)
tag_template_id = override_values.get("tag_template_id", tag_template_id)

# [START data_catalog_quickstart]
# For all regions available, see:
# https://cloud.google.com/data-catalog/docs/concepts/regions
location = "us-central1"

# Use Application Default Credentials to create a new
# Data Catalog client. GOOGLE_APPLICATION_CREDENTIALS
# environment variable must be set with the location
# of a service account key file.
datacatalog_client = datacatalog_v1.DataCatalogClient()

# Create a Tag Template.
tag_template = datacatalog_v1.types.TagTemplate()

tag_template.display_name = "Demo Tag Template"

tag_template.fields["source"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["source"].display_name = "Source of data asset"
tag_template.fields[
"source"
].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.STRING

tag_template.fields["num_rows"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["num_rows"].display_name = "Number of rows in data asset"
tag_template.fields[
"num_rows"
].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.DOUBLE

tag_template.fields["has_pii"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["has_pii"].display_name = "Has PII"
tag_template.fields[
"has_pii"
].type_.primitive_type = datacatalog_v1.types.FieldType.PrimitiveType.BOOL

tag_template.fields["pii_type"] = datacatalog_v1.types.TagTemplateField()
tag_template.fields["pii_type"].display_name = "PII type"

for display_name in ["EMAIL", "SOCIAL SECURITY NUMBER", "NONE"]:
enum_value = datacatalog_v1.types.FieldType.EnumType.EnumValue(
display_name=display_name
)
tag_template.fields["pii_type"].type_.enum_type.allowed_values.append(
enum_value
)

expected_template_name = datacatalog_v1.DataCatalogClient.tag_template_path(
project_id, location, tag_template_id
)

# Create the Tag Template.
try:
tag_template = datacatalog_client.create_tag_template(
parent=f"projects/{project_id}/locations/{location}",
tag_template_id=tag_template_id,
tag_template=tag_template,
)
print(f"Created template: {tag_template.name}")
except OSError as e:
print(f"Cannot create template: {expected_template_name}")
print(f"{e}")

# Lookup Data Catalog's Entry referring to the table.
resource_name = (
f"//bigquery.googleapis.com/projects/{project_id}"
f"/datasets/{dataset_id}/tables/{table_id}"
)
table_entry = datacatalog_client.lookup_entry(
request={"linked_resource": resource_name}
)

# Attach a Tag to the table.
tag = datacatalog_v1.types.Tag()

tag.template = tag_template.name
tag.name = "my_super_cool_tag"

tag.fields["source"] = datacatalog_v1.types.TagField()
tag.fields["source"].string_value = "Copied from tlc_yellow_trips_2018"

tag.fields["num_rows"] = datacatalog_v1.types.TagField()
tag.fields["num_rows"].double_value = 113496874

tag.fields["has_pii"] = datacatalog_v1.types.TagField()
tag.fields["has_pii"].bool_value = False

tag.fields["pii_type"] = datacatalog_v1.types.TagField()
tag.fields["pii_type"].enum_value.display_name = "NONE"

tag = datacatalog_client.create_tag(parent=table_entry.name, tag=tag)
print(f"Created tag: {tag.name}")
# [END data_catalog_quickstart]
35 changes: 35 additions & 0 deletions datacatalog/quickstart/quickstart_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import quickstart


def test_quickstart(
capsys, client, project_id, dataset_id, table_id, random_tag_template_id
):
location = "us-central1"
override_values = {
"project_id": project_id,
"dataset_id": dataset_id,
"table_id": table_id,
"tag_template_id": random_tag_template_id,
}
tag_template_name = client.tag_template_path(
project_id, location, random_tag_template_id
)
quickstart.quickstart(override_values)
out, err = capsys.readouterr()
assert "Created template: {}".format(tag_template_name) in out
assert "Created tag:" in out
client.delete_tag_template(name=tag_template_name, force=True)
2 changes: 2 additions & 0 deletions datacatalog/quickstart/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pytest==7.2.0
google-cloud-bigquery==3.3.5
1 change: 1 addition & 0 deletions datacatalog/quickstart/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
google-cloud-datacatalog==3.9.3
Loading

0 comments on commit a8c4afb

Please sign in to comment.