Skip to content

Commit

Permalink
feat: resource tags in dataset (#2090)
Browse files Browse the repository at this point in the history
* feat: resource tags in dataset

* fix: fix unittets

* Delete dataset/pyvenv.cfg

* Update google/cloud/bigquery/dataset.py

Co-authored-by: Lingqing Gan <lingqing.gan@gmail.com>

* Update google/cloud/bigquery/dataset.py

Co-authored-by: Lingqing Gan <lingqing.gan@gmail.com>

* added system tests & fix unittest for none

* add missing assert

* remove venv

* include resourcemanager in noxfile.py

* fix fixture for tag keys

* register tags before using in tests

* handle alreadyexist error

* fix: tag keys & values creation & deletion

* fix comment

* make tag keys unique

* remove unused import

---------

Co-authored-by: Lingqing Gan <lingqing.gan@gmail.com>
  • Loading branch information
keunsoopark and Linchin authored Jan 14, 2025
1 parent 6be0272 commit 3e13016
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 2 deletions.
23 changes: 23 additions & 0 deletions google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ class Dataset(object):
"storage_billing_model": "storageBillingModel",
"max_time_travel_hours": "maxTimeTravelHours",
"default_rounding_mode": "defaultRoundingMode",
"resource_tags": "resourceTags",
}

def __init__(self, dataset_ref) -> None:
Expand Down Expand Up @@ -801,6 +802,28 @@ def labels(self, value):
raise ValueError("Pass a dict")
self._properties["labels"] = value

@property
def resource_tags(self):
"""Dict[str, str]: Resource tags of the dataset.
Optional. The tags attached to this dataset. Tag keys are globally
unique. Tag key is expected to be in the namespaced format, for
example "123456789012/environment" where 123456789012 is
the ID of the parent organization or project resource for this tag
key. Tag value is expected to be the short name, for example
"Production".
Raises:
ValueError: for invalid value types.
"""
return self._properties.setdefault("resourceTags", {})

@resource_tags.setter
def resource_tags(self, value):
if not isinstance(value, dict) and value is not None:
raise ValueError("Pass a dict")
self._properties["resourceTags"] = value

@property
def default_encryption_configuration(self):
"""google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
Expand Down
4 changes: 4 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ def system(session):
# Data Catalog needed for the column ACL test with a real Policy Tag.
session.install("google-cloud-datacatalog", "-c", constraints_path)

# Resource Manager needed for test with a real Resource Tag.
session.install("google-cloud-resource-manager", "-c", constraints_path)

if session.python in ["3.11", "3.12"]:
extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
else:
Expand Down Expand Up @@ -366,6 +369,7 @@ def prerelease_deps(session):
session.install(
"freezegun",
"google-cloud-datacatalog",
"google-cloud-resource-manager",
"google-cloud-storage",
"google-cloud-testutils",
"psutil",
Expand Down
90 changes: 88 additions & 2 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import time
import unittest
import uuid
import random
import string
from typing import Optional

from google.api_core.exceptions import PreconditionFailed
Expand All @@ -45,6 +47,8 @@
from google.cloud import storage
from google.cloud.datacatalog_v1 import types as datacatalog_types
from google.cloud.datacatalog_v1 import PolicyTagManagerClient
from google.cloud.resourcemanager_v3 import types as resourcemanager_types
from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient
import psutil
import pytest
from test_utils.retry import RetryErrors
Expand Down Expand Up @@ -156,9 +160,12 @@ def setUpModule():
class TestBigQuery(unittest.TestCase):
def setUp(self):
self.to_delete = []
self.to_delete_tag_keys_values = []

def tearDown(self):
policy_tag_client = PolicyTagManagerClient()
tag_keys_client = TagKeysClient()
tag_values_client = TagValuesClient()

def _still_in_use(bad_request):
return any(
Expand All @@ -181,6 +188,18 @@ def _still_in_use(bad_request):
else:
doomed.delete()

# The TagKey cannot be deleted if it has any child TagValues.
for key_values in self.to_delete_tag_keys_values:
tag_key = key_values.pop()

# Delete tag values first
[
tag_values_client.delete_tag_value(name=tag_value.name).result()
for tag_value in key_values
]

tag_keys_client.delete_tag_key(name=tag_key.name).result()

def test_get_service_account_email(self):
client = Config.CLIENT

Expand Down Expand Up @@ -278,33 +297,100 @@ def test_create_dataset_with_default_rounding_mode(self):
self.assertTrue(_dataset_exists(dataset))
self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN")

def _create_resource_tag_key_and_values(self, key, values):
tag_key_client = TagKeysClient()
tag_value_client = TagValuesClient()

tag_key_parent = f"projects/{Config.CLIENT.project}"
new_tag_key = resourcemanager_types.TagKey(
short_name=key, parent=tag_key_parent
)
tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result()
self.to_delete_tag_keys_values.insert(0, [tag_key])

for value in values:
new_tag_value = resourcemanager_types.TagValue(
short_name=value, parent=tag_key.name
)
tag_value = tag_value_client.create_tag_value(
tag_value=new_tag_value
).result()
self.to_delete_tag_keys_values[0].insert(0, tag_value)

def test_update_dataset(self):
dataset = self.temp_dataset(_make_dataset_id("update_dataset"))
self.assertTrue(_dataset_exists(dataset))
self.assertIsNone(dataset.friendly_name)
self.assertIsNone(dataset.description)
self.assertEqual(dataset.labels, {})
self.assertEqual(dataset.resource_tags, {})
self.assertIs(dataset.is_case_insensitive, False)

# This creates unique tag keys for each of test runnings for different Python versions
tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4))
tag_1 = f"env_{tag_postfix}"
tag_2 = f"component_{tag_postfix}"
tag_3 = f"project_{tag_postfix}"

# Tags need to be created before they can be used in a dataset.
self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"])
self._create_resource_tag_key_and_values(tag_2, ["batch"])
self._create_resource_tag_key_and_values(tag_3, ["atlas"])

dataset.friendly_name = "Friendly"
dataset.description = "Description"
dataset.labels = {"priority": "high", "color": "blue"}
dataset.resource_tags = {
f"{Config.CLIENT.project}/{tag_1}": "prod",
f"{Config.CLIENT.project}/{tag_2}": "batch",
}
dataset.is_case_insensitive = True
ds2 = Config.CLIENT.update_dataset(
dataset, ("friendly_name", "description", "labels", "is_case_insensitive")
dataset,
(
"friendly_name",
"description",
"labels",
"resource_tags",
"is_case_insensitive",
),
)
self.assertEqual(ds2.friendly_name, "Friendly")
self.assertEqual(ds2.description, "Description")
self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"})
self.assertEqual(
ds2.resource_tags,
{
f"{Config.CLIENT.project}/{tag_1}": "prod",
f"{Config.CLIENT.project}/{tag_2}": "batch",
},
)
self.assertIs(ds2.is_case_insensitive, True)

ds2.labels = {
"color": "green", # change
"shape": "circle", # add
"priority": None, # delete
}
ds3 = Config.CLIENT.update_dataset(ds2, ["labels"])
ds2.resource_tags = {
f"{Config.CLIENT.project}/{tag_1}": "dev", # change
f"{Config.CLIENT.project}/{tag_3}": "atlas", # add
f"{Config.CLIENT.project}/{tag_2}": None, # delete
}
ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"])
self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"})
self.assertEqual(
ds3.resource_tags,
{
f"{Config.CLIENT.project}/{tag_1}": "dev",
f"{Config.CLIENT.project}/{tag_3}": "atlas",
},
)

# Remove all tags
ds3.resource_tags = None
ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"])
self.assertEqual(ds4.resource_tags, {})

# If we try to update using d2 again, it will fail because the
# previous update changed the ETag.
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2028,6 +2028,7 @@ def test_update_dataset(self):
LABELS = {"priority": "high"}
ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}]
EXP = 17
RESOURCE_TAGS = {"123456789012/key": "value"}
RESOURCE = {
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
"etag": "etag",
Expand All @@ -2037,6 +2038,7 @@ def test_update_dataset(self):
"defaultTableExpirationMs": EXP,
"labels": LABELS,
"access": ACCESS,
"resourceTags": RESOURCE_TAGS,
}
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
Expand All @@ -2048,12 +2050,14 @@ def test_update_dataset(self):
ds.default_table_expiration_ms = EXP
ds.labels = LABELS
ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")]
ds.resource_tags = RESOURCE_TAGS
fields = [
"description",
"friendly_name",
"location",
"labels",
"access_entries",
"resource_tags",
]

with mock.patch(
Expand All @@ -2077,6 +2081,7 @@ def test_update_dataset(self):
"location": LOCATION,
"labels": LABELS,
"access": ACCESS,
"resourceTags": RESOURCE_TAGS,
},
path="/" + PATH,
timeout=7.5,
Expand All @@ -2086,6 +2091,7 @@ def test_update_dataset(self):
self.assertEqual(ds2.location, ds.location)
self.assertEqual(ds2.labels, ds.labels)
self.assertEqual(ds2.access_entries, ds.access_entries)
self.assertEqual(ds2.resource_tags, ds.resource_tags)

# ETag becomes If-Match header.
ds._properties["etag"] = "etag"
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/test_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"tableId": "northern-hemisphere",
}
DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN"
RESOURCE_TAGS = {"123456789012/foo": "bar"}
RESOURCE = {
"datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
"etag": "etag",
Expand All @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"labels": LABELS,
"access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
"defaultRoundingMode": DEFAULT_ROUNDING_MODE,
"resourceTags": RESOURCE_TAGS,
}
conn = client._connection = make_connection(RESOURCE)
entries = [
Expand All @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
before.default_table_expiration_ms = 3600
before.location = LOCATION
before.labels = LABELS
before.resource_tags = RESOURCE_TAGS
before.default_rounding_mode = DEFAULT_ROUNDING_MODE
after = client.create_dataset(before)
assert after.dataset_id == DS_ID
Expand All @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
assert after.default_table_expiration_ms == 3600
assert after.labels == LABELS
assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE
assert after.resource_tags == RESOURCE_TAGS

conn.api_request.assert_called_once_with(
method="POST",
Expand All @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
{"view": VIEW, "role": None},
],
"labels": LABELS,
"resourceTags": RESOURCE_TAGS,
},
timeout=DEFAULT_TIMEOUT,
)
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,28 @@ def test_location_setter(self):
dataset.location = "LOCATION"
self.assertEqual(dataset.location, "LOCATION")

def test_resource_tags_update_in_place(self):
dataset = self._make_one(self.DS_REF)
tags = dataset.resource_tags
tags["123456789012/foo"] = "bar" # update in place
self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"})

def test_resource_tags_setter(self):
dataset = self._make_one(self.DS_REF)
dataset.resource_tags = {"123456789012/foo": "bar"}
self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"})

def test_resource_tags_setter_bad_value(self):
dataset = self._make_one(self.DS_REF)
with self.assertRaises(ValueError):
dataset.resource_tags = "invalid"
with self.assertRaises(ValueError):
dataset.resource_tags = 123

def test_resource_tags_getter_missing_value(self):
dataset = self._make_one(self.DS_REF)
self.assertEqual(dataset.resource_tags, {})

def test_labels_update_in_place(self):
dataset = self._make_one(self.DS_REF)
del dataset._properties["labels"] # don't start w/ existing dict
Expand Down

0 comments on commit 3e13016

Please sign in to comment.