From 3e130166f43dcc06704fe90edf9068dfd44842a6 Mon Sep 17 00:00:00 2001 From: Keunsoo Park <43742836+keunsoopark@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:17:13 +0100 Subject: [PATCH] feat: resource tags in dataset (#2090) * feat: resource tags in dataset * fix: fix unittets * Delete dataset/pyvenv.cfg * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * added system tests & fix unittest for none * add missing assert * remove venv * include resourcemanager in noxfile.py * fix fixture for tag keys * register tags before using in tests * handle alreadyexist error * fix: tag keys & values creation & deletion * fix comment * make tag keys unique * remove unused import --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 23 ++++++++ noxfile.py | 4 ++ tests/system/test_client.py | 90 ++++++++++++++++++++++++++++++- tests/unit/test_client.py | 6 +++ tests/unit/test_create_dataset.py | 5 ++ tests/unit/test_dataset.py | 22 ++++++++ 6 files changed, 148 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c49a52faf..4d06d729d 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -530,6 +530,7 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "resource_tags": "resourceTags", } def __init__(self, dataset_ref) -> None: @@ -801,6 +802,28 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties["labels"] = value + @property + def resource_tags(self): + """Dict[str, str]: Resource tags of the dataset. + + Optional. The tags attached to this dataset. Tag keys are globally + unique. Tag key is expected to be in the namespaced format, for + example "123456789012/environment" where 123456789012 is + the ID of the parent organization or project resource for this tag + key. Tag value is expected to be the short name, for example + "Production". + + Raises: + ValueError: for invalid value types. + """ + return self._properties.setdefault("resourceTags", {}) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("Pass a dict") + self._properties["resourceTags"] = value + @property def default_encryption_configuration(self): """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom diff --git a/noxfile.py b/noxfile.py index 750a6b459..e08956b11 100644 --- a/noxfile.py +++ b/noxfile.py @@ -219,6 +219,9 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) + # Resource Manager needed for test with a real Resource Tag. + session.install("google-cloud-resource-manager", "-c", constraints_path) + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: @@ -366,6 +369,7 @@ def prerelease_deps(session): session.install( "freezegun", "google-cloud-datacatalog", + "google-cloud-resource-manager", "google-cloud-storage", "google-cloud-testutils", "psutil", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 95c679a14..c0dd83b12 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,8 @@ import time import unittest import uuid +import random +import string from typing import Optional from google.api_core.exceptions import PreconditionFailed @@ -45,6 +47,8 @@ from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient +from google.cloud.resourcemanager_v3 import types as resourcemanager_types +from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient import psutil import pytest from test_utils.retry import RetryErrors @@ -156,9 +160,12 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): self.to_delete = [] + self.to_delete_tag_keys_values = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() + tag_keys_client = TagKeysClient() + tag_values_client = TagValuesClient() def _still_in_use(bad_request): return any( @@ -181,6 +188,18 @@ def _still_in_use(bad_request): else: doomed.delete() + # The TagKey cannot be deleted if it has any child TagValues. + for key_values in self.to_delete_tag_keys_values: + tag_key = key_values.pop() + + # Delete tag values first + [ + tag_values_client.delete_tag_value(name=tag_value.name).result() + for tag_value in key_values + ] + + tag_keys_client.delete_tag_key(name=tag_key.name).result() + def test_get_service_account_email(self): client = Config.CLIENT @@ -278,24 +297,74 @@ def test_create_dataset_with_default_rounding_mode(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def _create_resource_tag_key_and_values(self, key, values): + tag_key_client = TagKeysClient() + tag_value_client = TagValuesClient() + + tag_key_parent = f"projects/{Config.CLIENT.project}" + new_tag_key = resourcemanager_types.TagKey( + short_name=key, parent=tag_key_parent + ) + tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result() + self.to_delete_tag_keys_values.insert(0, [tag_key]) + + for value in values: + new_tag_value = resourcemanager_types.TagValue( + short_name=value, parent=tag_key.name + ) + tag_value = tag_value_client.create_tag_value( + tag_value=new_tag_value + ).result() + self.to_delete_tag_keys_values[0].insert(0, tag_value) + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertEqual(dataset.resource_tags, {}) self.assertIs(dataset.is_case_insensitive, False) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"env_{tag_postfix}" + tag_2 = f"component_{tag_postfix}" + tag_3 = f"project_{tag_postfix}" + + # Tags need to be created before they can be used in a dataset. + self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"]) + self._create_resource_tag_key_and_values(tag_2, ["batch"]) + self._create_resource_tag_key_and_values(tag_3, ["atlas"]) + dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + } dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels", "is_case_insensitive") + dataset, + ( + "friendly_name", + "description", + "labels", + "resource_tags", + "is_case_insensitive", + ), ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + ds2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + }, + ) self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { @@ -303,8 +372,25 @@ def test_update_dataset(self): "shape": "circle", # add "priority": None, # delete } - ds3 = Config.CLIENT.update_dataset(ds2, ["labels"]) + ds2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "dev", # change + f"{Config.CLIENT.project}/{tag_3}": "atlas", # add + f"{Config.CLIENT.project}/{tag_2}": None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"]) self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + ds3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "dev", + f"{Config.CLIENT.project}/{tag_3}": "atlas", + }, + ) + + # Remove all tags + ds3.resource_tags = None + ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"]) + self.assertEqual(ds4.resource_tags, {}) # If we try to update using d2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..14089b031 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2028,6 +2028,7 @@ def test_update_dataset(self): LABELS = {"priority": "high"} ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] EXP = 17 + RESOURCE_TAGS = {"123456789012/key": "value"} RESOURCE = { "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, "etag": "etag", @@ -2037,6 +2038,7 @@ def test_update_dataset(self): "defaultTableExpirationMs": EXP, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2048,12 +2050,14 @@ def test_update_dataset(self): ds.default_table_expiration_ms = EXP ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + ds.resource_tags = RESOURCE_TAGS fields = [ "description", "friendly_name", "location", "labels", "access_entries", + "resource_tags", ] with mock.patch( @@ -2077,6 +2081,7 @@ def test_update_dataset(self): "location": LOCATION, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, }, path="/" + PATH, timeout=7.5, @@ -2086,6 +2091,7 @@ def test_update_dataset(self): self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) self.assertEqual(ds2.access_entries, ds.access_entries) + self.assertEqual(ds2.resource_tags, ds.resource_tags) # ETag becomes If-Match header. ds._properties["etag"] = "etag" diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index a2491a812..bd7c6a8f8 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "tableId": "northern-hemisphere", } DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" + RESOURCE_TAGS = {"123456789012/foo": "bar"} RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "defaultRoundingMode": DEFAULT_ROUNDING_MODE, + "resourceTags": RESOURCE_TAGS, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.resource_tags = RESOURCE_TAGS before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) assert after.dataset_id == DS_ID @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE + assert after.resource_tags == RESOURCE_TAGS conn.api_request.assert_called_once_with( method="POST", @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): {"view": VIEW, "role": None}, ], "labels": LABELS, + "resourceTags": RESOURCE_TAGS, }, timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c0164bc73..46bcd6611 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -894,6 +894,28 @@ def test_location_setter(self): dataset.location = "LOCATION" self.assertEqual(dataset.location, "LOCATION") + def test_resource_tags_update_in_place(self): + dataset = self._make_one(self.DS_REF) + tags = dataset.resource_tags + tags["123456789012/foo"] = "bar" # update in place + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.resource_tags = {"123456789012/foo": "bar"} + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.resource_tags = "invalid" + with self.assertRaises(ValueError): + dataset.resource_tags = 123 + + def test_resource_tags_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.resource_tags, {}) + def test_labels_update_in_place(self): dataset = self._make_one(self.DS_REF) del dataset._properties["labels"] # don't start w/ existing dict