From 62960f255d05b15940a8d2cdc595592175fada11 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 10 Jan 2025 13:22:06 -0500 Subject: [PATCH] feat: adds the SerDeInfo class and tests (#2108) * feat: adds SerDeInfo class and tests * cleans up type hints and some minor tweaks --- google/cloud/bigquery/schema.py | 88 +++++++++++++++++++++++++++++++ tests/unit/test_schema.py | 92 +++++++++++++++++++++++++++++++-- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b062396cf..f93877d45 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,8 +14,10 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations import collections import enum +import typing from typing import Any, cast, Dict, Iterable, Optional, Union from google.cloud.bigquery import _helpers @@ -556,3 +558,89 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class SerDeInfo: + """Serializer and deserializer information. + + Args: + serialization_library (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): + self._properties: Dict[str, Any] = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> str: + """Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.""" + + return typing.cast(str, self._properties.get("serializationLibrary")) + + @serialization_library.setter + def serialization_library(self, value: str): + value = _helpers._isinstance_or_raise(value, str, none_allowed=False) + self._properties["serializationLibrary"] = value + + @property + def name(self) -> Optional[str]: + """Optional. Name of the SerDe. The maximum length is 256 characters.""" + + return self._properties.get("name") + + @name.setter + def name(self, value: Optional[str] = None): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value + + @property + def parameters(self) -> Optional[dict[str, str]]: + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls("PLACEHOLDER") + config._properties = api_repr + return config diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 4b0b28158..380067dc8 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -20,6 +20,7 @@ from google.cloud import bigquery from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -130,8 +131,6 @@ def test_constructor_range_str(self): self.assertEqual(field.range_element_type.element_type, "DATETIME") def test_to_api_repr(self): - from google.cloud.bigquery.schema import PolicyTagList - policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( policy.to_api_repr(), @@ -886,8 +885,6 @@ def test_valid_mapping_representation(self): class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.schema import PolicyTagList - return PolicyTagList def _make_one(self, *args, **kw): @@ -1129,3 +1126,90 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return schema.SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a SerDeInfo object (i.e. resource) + WHEN converted into a SerDeInfo object using from_api_repr() + THEN it will have the representation in dict format as a SerDeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + + expected = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr()