diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 1eda80712..6fd8d525d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ import re import os import warnings -from typing import Optional, Union +from typing import Optional, Union, Any, Tuple, Type from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1004,3 +1004,36 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): job_config=job_config, ) ) + + +def _isinstance_or_raise( + value: Any, + dtype: Union[Type, Tuple[Type, ...]], + none_allowed: Optional[bool] = False, +) -> Any: + """Determine whether a value type matches a given datatype or None. + + Args: + value (Any): Value to be checked. + dtype (type): Expected data type or tuple of data types. + none_allowed Optional(bool): whether value is allowed to be None. Default + is False. + + Returns: + Any: Returns the input value if the type check is successful. + + Raises: + TypeError: If the input value's type does not match the expected data type(s). + """ + if none_allowed and value is None: + return value + + if isinstance(value, dtype): + return value + + or_none = "" + if none_allowed: + or_none = " (or None)" + + msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." + raise TypeError(msg) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c49a52faf..c790c74e2 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -23,10 +23,13 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers +from google.cloud.bigquery._helpers import _isinstance_or_raise from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + from typing import Optional, List, Dict, Any, Union @@ -530,6 +533,7 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -937,10 +941,31 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) - table = _get_table_reference + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" - model = _get_model_reference + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + if prop is not None: + prop = ExternalCatalogDatasetOptions().from_api_repr(prop) + return prop + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + value = _isinstance_or_raise( + value, ExternalCatalogDatasetOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = value.to_api_repr() + + table = _get_table_reference + model = _get_model_reference routine = _get_routine_reference def __repr__(self): diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d8cbe9969..bb594bea2 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -246,6 +246,11 @@ class KeyResultStatementKind: class StandardSqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in GoogleSQL. + """ + def _generate_next_value_(name, start, count, last_values): return name @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): ARRAY = enum.auto() STRUCT = enum.auto() RANGE = enum.auto() + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = enum.auto() class EntityTypes(str, enum.Enum): @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): - """Enum of allowed SQL type names in schema.SchemaField.""" + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in Legacy SQL. + """ STRING = "STRING" BYTES = "BYTES" @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types RANGE = "RANGE" # NOTE: not available in legacy types + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = "FOREIGN" class WriteDisposition(object): @@ -344,3 +358,9 @@ class DeterminismLevel: NOT_DETERMINISTIC = "NOT_DETERMINISTIC" """The UDF is not deterministic.""" + + +class RoundingMode(enum.Enum): + ROUNDING_MODE_UNSPECIFIED = 0 + ROUND_HALF_AWAY_FROM_ZERO = 1 + ROUND_HALF_EVEN = 2 diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index a891bc232..8d66f8347 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -18,18 +18,22 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy from typing import Any, Dict, FrozenSet, Iterable, Optional, Union -from google.cloud.bigquery._helpers import _to_bytes -from google.cloud.bigquery._helpers import _bytes_to_json -from google.cloud.bigquery._helpers import _int_or_none -from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import ( + _to_bytes, + _bytes_to_json, + _int_or_none, + _str_or_none, + _isinstance_or_raise, + _get_sub_prop, +) from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions -from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import SchemaField, StorageDescriptor class ExternalSourceFormat(object): @@ -1003,3 +1007,186 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions: + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + default_storage_location_uri (Optional[str]): The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ): + self._properties: Dict[str, Any] = {} + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters + + @property + def default_storage_location_uri(self) -> Any: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: str): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Any: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: dict[str, Any]): + value = _isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + return config + + @classmethod + def from_api_repr(cls, resource: dict) -> ExternalCatalogDatasetOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + +class ExternalCatalogTableOptions: + """Metadata about open source compatible table. The fields contained in these + options correspond to hive metastore's table level properties. + + Args: + connection_id (Optional[str]): The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information + about the physical storage of this table. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + parameters: Union[Dict[str, Any], None] = None, + storage_descriptor: Optional[ + StorageDescriptor + ] = None, # TODO implement StorageDescriptor, then correct this type hint + ): + self._properties = {} # type: Dict[str, Any] + self.connection_id = connection_id + self.parameters = parameters + self.storage_descriptor = storage_descriptor + + @property + def connection_id(self): + """Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. (str) + """ + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["connectionId"] = value + + @property + def parameters(self) -> Any: + """Optional. A map of key value pairs defining the parameters and + properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + """ + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Union[Dict[str, Any], None]): + value = _isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + @property + def storage_descriptor(self) -> Any: + """Optional. A storage descriptor containing information about the + physical storage of this table.""" + + prop = _get_sub_prop(self._properties, ["storageDescriptor"]) + + if prop is not None: + prop = StorageDescriptor().from_api_repr(prop) + return prop + + @storage_descriptor.setter + def storage_descriptor(self, value): + value = _isinstance_or_raise(value, StorageDescriptor, none_allowed=True) + if value is not None: + self._properties["storageDescriptor"] = value.to_api_repr() + else: + self._properties["storageDescriptor"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + config = copy.deepcopy(self._properties) + return config + + @classmethod + def from_api_repr(cls, resource: dict) -> ExternalCatalogTableOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def __eq__(self, value): + return self.to_api_repr() == value.to_api_repr() diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f1090a7dc..a95c91346 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1003,7 +1003,6 @@ def __init__( ): self.name = name self.range_element_type = self._parse_range_element_type(range_element_type) - print(self.range_element_type.type_._type) self.start = start self.end = end diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b062396cf..3003af250 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,20 +14,27 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations + import collections +import copy import enum from typing import Any, cast, Dict, Iterable, Optional, Union from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql -from google.cloud.bigquery.enums import StandardSqlTypeNames +from google.cloud.bigquery._helpers import ( + _isinstance_or_raise, + _get_sub_prop, +) +from google.cloud.bigquery.enums import StandardSqlTypeNames, RoundingMode _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: -# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types -# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { "STRING": StandardSqlTypeNames.STRING, "BYTES": StandardSqlTypeNames.BYTES, @@ -46,6 +53,7 @@ "DATE": StandardSqlTypeNames.DATE, "TIME": StandardSqlTypeNames.TIME, "DATETIME": StandardSqlTypeNames.DATETIME, + "FOREIGN": StandardSqlTypeNames.FOREIGN, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" @@ -164,6 +172,34 @@ class SchemaField(object): the type is RANGE, this field is required. Possible values for the field element type of a RANGE include `DATE`, `DATETIME` and `TIMESTAMP`. + + rounding_mode: Union[RoundingMode, str, None] + Specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + + Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. + + ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN rounds half values to the nearest even value + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5 => 2 + 1.6, 1.7, 1.8, 1.9 => 2 + 2.5 => 2 + + foreign_type_definition: Optional[str] + Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. """ def __init__( @@ -179,11 +215,15 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, range_element_type: Union[FieldElementType, str, None] = None, + rounding_mode: Union[RoundingMode, str, None] = None, + foreign_type_definition: Optional[str] = None, ): self._properties: Dict[str, Any] = { "name": name, "type": field_type, } + + self._properties["name"] = name if mode is not None: self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: @@ -204,6 +244,22 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() + if isinstance(rounding_mode, RoundingMode): + self._properties["roundingMode"] = rounding_mode.name + if isinstance(rounding_mode, str): + self._properties["roundingMode"] = rounding_mode + if isinstance(foreign_type_definition, str): + self._properties["foreignTypeDefinition"] = foreign_type_definition + + # The order of operations is important: + # If field_type is FOREIGN, then foreign_type_definition must be set. + if field_type != "FOREIGN": + self._properties["type"] = field_type + else: + if self._properties.get("foreignTypeDefinition") is None: + raise ValueError( + "If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required." + ) if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] @@ -296,6 +352,22 @@ def range_element_type(self): ret = self._properties.get("rangeElementType") return FieldElementType.from_api_repr(ret) + @property + def rounding_mode(self): + """Enum that specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + """ + return self._properties.get("roundingMode") + + @property + def foreign_type_definition(self): + """Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. + """ + return self._properties.get("foreignTypeDefinition") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. @@ -461,6 +533,7 @@ def _to_schema_fields(schema): sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. """ + for field in schema: if not isinstance(field, (SchemaField, collections.abc.Mapping)): raise ValueError( @@ -556,3 +629,260 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class ForeignTypeInfo: + """Metadata about the foreign data type definition such as the system in which the + type is defined. + + Args: + typeSystem (str): Required. Specifies the system which defines the + foreign data type. + """ + + def __init__(self, type_system="TYPE_SYSTEM_UNSPECIFIED"): + self._properties = {} + self.type_system = type_system + + @property + def type_system(self): + """Required. Specifies the system which defines the foreign data + type.""" + + return self._properties.get("typeSystem") + + @type_system.setter + def type_system(self, value: str): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["typeSystem"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + +class StorageDescriptor: + """Contains information about how a table's data is stored and accessed by open + source query engines. + + Args: + inputFormat (Optional[str]): Specifies the fully qualified class name of + the InputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters. + locationUri (Optional[str]): The physical location of the table (e.g. + 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or + 'gs://spark-dataproc-data/pangea-data/'). The maximum length is + 2056 bytes. + outputFormat (Optional[str]): Specifies the fully qualified class name + of the OutputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum + length is 128 characters. + serdeInfo (Optional[Any]): Serializer and deserializer information. + """ + + def __init__( + self, + input_format: Optional[str] = None, + location_uri: Optional[str] = None, + output_format: Optional[str] = None, + serde_info: Optional[SerDeInfo] = None, + ): + self._properties: Dict[str, Any] = {} + self.input_format = input_format + self.location_uri = location_uri + self.output_format = output_format + self.serde_info = serde_info + + @property + def input_format(self) -> Any: + """Optional. Specifies the fully qualified class name of the InputFormat + (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters.""" + + return self._properties.get("inputFormat") + + @input_format.setter + def input_format(self, value: Optional[str]): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["inputFormat"] = value + + @property + def location_uri(self) -> Any: + """Optional. The physical location of the table (e.g. 'gs://spark- + dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc- + data/pangea-data/'). The maximum length is 2056 bytes.""" + + return self._properties.get("locationUri") + + @location_uri.setter + def location_uri(self, value: Optional[str]): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["locationUri"] = value + + @property + def output_format(self) -> Any: + """Optional. Specifies the fully qualified class name of the + OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). + The maximum length is 128 characters.""" + + return self._properties.get("outputFormat") + + @output_format.setter + def output_format(self, value: Optional[str]): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["outputFormat"] = value + + @property + def serde_info(self) -> Any: + """Optional. Serializer and deserializer information.""" + + prop = _get_sub_prop(self._properties, ["serDeInfo"]) + if prop is not None: + prop = StorageDescriptor().from_api_repr(prop) + print(f"DINOSAUR prop: {prop}") + + return prop + + @serde_info.setter + def serde_info(self, value): + value = _isinstance_or_raise(value, SerDeInfo, none_allowed=True) + if value is not None: + self._properties["serDeInfo"] = value.to_api_repr() + else: + self._properties["serDeInfo"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: dict) -> StorageDescriptor: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + +class SerDeInfo: + """Serializer and deserializer information. + + Args: + serializationLibrary (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): + self._properties: Dict[str, Any] = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> Any: + """Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.""" + + return self._properties.get("serializationLibrary") + + @serialization_library.setter + def serialization_library(self, value: str): + value = _isinstance_or_raise(value, str, none_allowed=False) + self._properties["serializationLibrary"] = value + + @property + def name(self) -> Any: + """Optional. Name of the SerDe. The maximum length is 256 characters.""" + + return self._properties.get("name") + + @name.setter + def name(self, value: Optional[str] = None): + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value + + @property + def parameters(self) -> Any: + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls("") + config._properties = copy.deepcopy(resource) + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 80ab330ba..c3393e5fe 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -59,6 +59,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers +from google.cloud.bigquery._helpers import _isinstance_or_raise from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions as bq_exceptions @@ -69,6 +70,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.external_config import ExternalCatalogTableOptions if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -407,6 +409,7 @@ class Table(_TableBase): "view_query": "view", "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", + "external_catalog_table_options": "externalCatalogTableOptions", "max_staleness": "maxStaleness", } @@ -1023,6 +1026,28 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def external_catalog_table_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ) + if prop is not None: + prop = ExternalCatalogTableOptions.from_api_repr(prop) + return prop + + @external_catalog_table_options.setter + def external_catalog_table_options(self, value): + value = _isinstance_or_raise( + value, ExternalCatalogTableOptions, none_allowed=False + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/tests/unit/Untitled-2.py b/tests/unit/Untitled-2.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0a307498f..8e83047c2 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,6 +24,9 @@ from unittest import mock import google.api_core +from google.cloud.bigquery._helpers import ( + _isinstance_or_raise, +) @pytest.mark.skipif( @@ -1661,3 +1664,35 @@ def test_w_env_var(self): host = self._call_fut() self.assertEqual(host, HOST) + + +class Test__isinstance_or_raise: + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, True, None), + ("hello world.uri", str, True, "hello world.uri"), + ("hello world.uri", str, False, "hello world.uri"), + (None, (str, float), True, None), + ("hello world.uri", (str, float), True, "hello world.uri"), + ("hello world.uri", (str, float), False, "hello world.uri"), + ], + ) + def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) + + assert result == expected + + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, False, pytest.raises(TypeError)), + ({"key": "value"}, str, True, pytest.raises(TypeError)), + ({"key": "value"}, str, False, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), True, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), False, pytest.raises(TypeError)), + ], + ) + def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + with expected: + _isinstance_or_raise(value, dtype, none_allowed=none_allowed) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c0164bc73..010bc9ab5 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -1014,6 +1024,66 @@ def test_from_string_legacy_string(self): with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset") + def test_external_catalog_dataset_options_setter(self): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + + # THEN the api representation of the dataset will match API_REPR + result = dataset.to_api_repr() + expected = self.API_REPR + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_exists(self): + # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions + # THEN confirm that external_catalog_dataset_options is set + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri="gs://test-bucket/test-path", + parameters={"key": "value"}, + ) + dataset.external_catalog_dataset_options = ecdo_obj + expected = ecdo_obj._properties + result = dataset.external_catalog_dataset_options._properties + + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_is_none(self): + # GIVEN only the default dataset + # THEN confirm that external_catalog_dataset_options is None + dataset = self._make_one(self.DS_REF) + expected = None + result = dataset.external_catalog_dataset_options + + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + resource = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(resource) + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = resource["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): + resource = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(resource) + result = dataset.external_catalog_dataset_options._properties + expected = resource["externalCatalogDatasetOptions"] + assert result == expected + def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) dataset._properties["newAlphaProperty"] = "unreleased property" diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 9fd16e699..260784756 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -14,10 +14,18 @@ import base64 import copy +from typing import Any, Dict, Optional import unittest from google.cloud.bigquery import external_config +from google.cloud.bigquery.external_config import ( + ExternalCatalogDatasetOptions, + ExternalCatalogTableOptions, +) from google.cloud.bigquery import schema +from google.cloud.bigquery.schema import StorageDescriptor, SerDeInfo + +import pytest class TestExternalConfig(unittest.TestCase): @@ -890,3 +898,213 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + ("gs://test-bucket/test-path", {"key": "value"}), # set all params + ("gs://test-bucket/test-path", None), # set only one parameter at a time + (None, {"key": "value"}), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + default_storage_location_uri, + parameters, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert instance.default_storage_location_uri == default_storage_location_uri + assert instance.parameters == parameters + + def test_ctor_invalid_input(self): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + with pytest.raises(TypeError) as e: + self._make_one(default_storage_location_uri=123) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + with pytest.raises(TypeError) as e: + self._make_one(parameters=123) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + default_storage_location_uri = "gs://test-bucket/test-path" + parameters = {"key": "value"} + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + resource = instance.to_api_repr() + assert resource["defaultStorageLocationUri"] == default_storage_location_uri + assert resource["parameters"] == parameters + + def test_from_api_repr(self): + instance = self._make_one() + resource = { + "defaultStorageLocationUri": "gs://test-bucket/test-path", + "parameters": {"key": "value"}, + } + result = instance.from_api_repr(resource) + + assert isinstance(result, ExternalCatalogDatasetOptions) + assert result._properties == resource + + +@pytest.fixture +def _make_storage_descriptor(): + serdeinfo = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + obj = StorageDescriptor( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=serdeinfo, + ) + return obj + + +class TestExternalCatalogTableOptions: + @staticmethod + def _get_target_class(): + return ExternalCatalogTableOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + ( + "connection123", + {"key": "value"}, + "_make_storage_descriptor", + ), # set all params + ("connection123", None, None), # set only one parameter at a time + (None, {"key": "value"}, None), + (None, None, "_make_storage_descriptor"), + (None, None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, connection_id, parameters, storage_descriptor, request + ): + if storage_descriptor == "_make_storage_descriptor": + storage_descriptor = request.getfixturevalue(storage_descriptor) + + instance = self._make_one( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + assert instance.connection_id == connection_id + assert instance.parameters == parameters + if storage_descriptor is not None: + assert ( + instance.storage_descriptor.to_api_repr() + == storage_descriptor.to_api_repr() + ) + else: + assert instance.storage_descriptor is None + + @pytest.mark.parametrize( + "connection_id, parameters, storage_descriptor", + [ + pytest.param( + 123, + {"test_key": "test_value"}, + "_make_storage_descriptor", + id="connection_id-invalid-type", + ), + pytest.param( + "connection123", + 123, + "_make_storage_descriptor", + id="parameters-invalid-type", + ), + pytest.param( + "connection123", + {"test_key": "test_value"}, + 123, + id="storage_descriptor-invalid-type", + ), + ], + ) + def test_ctor_invalid_input( + self, + connection_id: str, + parameters: Dict[str, Any], + storage_descriptor: Optional[StorageDescriptor], + request, + ): + if storage_descriptor == "_make_storage_descriptor": + storage_descriptor = request.getfixturevalue(storage_descriptor) + with pytest.raises(TypeError) as e: + external_config.ExternalCatalogTableOptions( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self, _make_storage_descriptor): + instance = self._make_one() + + instance._properties = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": _make_storage_descriptor.to_api_repr(), + } + + resource = instance.to_api_repr() + expected = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": _make_storage_descriptor.to_api_repr(), + } + assert resource == expected + + def test_from_api_repr(self, _make_storage_descriptor): + instance = self._make_one() + storage_descriptor = _make_storage_descriptor + resource = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": storage_descriptor, + } + result = instance.from_api_repr(resource) + assert isinstance(result, ExternalCatalogTableOptions) + assert result._properties["connectionId"] == "connection123" + assert result._properties["parameters"] == {"key": "value"} + assert ( + result._properties["storageDescriptor"].to_api_repr() + == storage_descriptor.to_api_repr() + ) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 4b0b28158..07cea2d9e 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import copy import unittest from unittest import mock @@ -19,8 +20,14 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery.enums import RoundingMode from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +from google.cloud.bigquery.schema import ( + PolicyTagList, + ForeignTypeInfo, + StorageDescriptor, + SerDeInfo, +) class TestSchemaField(unittest.TestCase): @@ -48,9 +55,12 @@ def test_constructor_defaults(self): self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) self.assertIsNone(field.default_value_expression) + self.assertEqual(field.rounding_mode, None) + self.assertEqual(field.foreign_type_definition, None) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" + ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED field = self._make_one( "test", "STRING", @@ -63,6 +73,8 @@ def test_constructor_explicit(self): ) ), default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, + rounding_mode=ROUNDINGMODE, + foreign_type_definition="INTEGER", ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -79,9 +91,16 @@ def test_constructor_explicit(self): ) ), ) + self.assertEqual(field.rounding_mode, ROUNDINGMODE.name) + self.assertEqual(field.foreign_type_definition, "INTEGER") def test_constructor_explicit_none(self): - field = self._make_one("test", "STRING", description=None, policy_tags=None) + field = self._make_one( + "test", + "STRING", + description=None, + policy_tags=None, + ) self.assertIsNone(field.description) self.assertIsNone(field.policy_tags) @@ -137,10 +156,18 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) + ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED field = self._make_one( - "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + "foo", + "INTEGER", + "NULLABLE", + description="hello world", + policy_tags=policy, + rounding_mode=ROUNDINGMODE, + foreign_type_definition=None, ) + print(f"DINOSAUR: {field}\n\n{field.to_api_repr()}") self.assertEqual( field.to_api_repr(), { @@ -149,6 +176,7 @@ def test_to_api_repr(self): "type": "INTEGER", "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", }, ) @@ -182,6 +210,7 @@ def test_from_api_repr(self): "description": "test_description", "name": "foo", "type": "record", + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", } ) self.assertEqual(field.name, "foo") @@ -193,6 +222,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -284,6 +314,11 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertEqual(schema_field.fields, fields) + def test_roundingmode_property_str(self): + ROUNDINGMODE = "ROUNDING_MODE_UNSPECIFIED" + schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -458,6 +493,32 @@ def test_to_standard_sql_unknown_type(self): bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) + def test_to_standard_sql_foreign_type_valid(self): + legacy_type = "FOREIGN" + standard_type = bigquery.StandardSqlTypeNames.FOREIGN + foreign_type_definition = "INTEGER" + + field = self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + + def test_to_standard_sql_foreign_type_invalid(self): + legacy_type = "FOREIGN" + foreign_type_definition = None + + with self.assertRaises(ValueError) as context: + self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + self.assertTrue("If the 'field_type'" in context.exception.args[0]) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() @@ -1129,3 +1190,295 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestForeignTypeInfo: + """Tests metadata re: the foreign data type definition in field schema. + + Specifies the system which defines the foreign data type. + + TypeSystems are external systems, such as query engines or table formats, + that have their own data types. + + TypeSystem may be: + TypeSystem not specified: TYPE_SYSTEM_UNSPECIFIED + Represents Hive data types: HIVE + """ + + @staticmethod + def _get_target_class(): + return ForeignTypeInfo + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "type_system,expected", + [ + (None, None), + ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"), + ("HIVE", "HIVE"), + ], + ) + def test_ctor_valid_input(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.type_system == expected + + def test_ctor_invalid_input(self): + with pytest.raises(TypeError) as e: + self._make_one(type_system=123) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + @pytest.mark.parametrize( + "type_system,expected", + [ + ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), + ("HIVE", {"typeSystem": "HIVE"}), + (None, {"typeSystem": None}), + ], + ) + def test_to_api_repr(self, type_system, expected): + result = self._make_one(type_system=type_system) + assert result.to_api_repr() == expected + + def test_from_api_repr(self): + """GIVEN an api representation of a ForeignTypeInfo object (i.e. resource) + WHEN converted into a ForeignTypeInfo object using from_api_repr() and + displayed as a dict + THEN it will have the same representation a ForeignTypeInfo object created + directly (via _make_one()) and displayed as a dict. + """ + resource = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} + + expected = self._make_one(type_system="TYPE_SYSTEM_UNSPECIFIED") + + klass = self._get_target_class() + result = klass.from_api_repr(resource) + + assert result.to_api_repr() == expected.to_api_repr() + + +@pytest.fixture +def _make_storage_descriptor(): + serdeinfo = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + obj = StorageDescriptor( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=serdeinfo, + ) + return obj + + +class TestStorageDescriptor: + """Tests for the StorageDescriptor class.""" + + @staticmethod + def _get_target_class(): + return StorageDescriptor + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + SERDEINFO = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (None, None, None, None), + ("testpath.to.OrcInputFormat", None, None, None), + (None, "gs://test/path/", None, None), + (None, None, "testpath.to.OrcOutputFormat", None), + (None, None, None, SERDEINFO), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + SERDEINFO, + ), + ], + ) + def test_ctor_valid_input( + self, input_format, location_uri, output_format, serde_info + ): + storage_descriptor = self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + if serde_info is not None: + assert ( + storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr() + ) + else: + assert storage_descriptor.serde_info is None + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (123, None, None, None), + (None, 123, None, None), + (None, None, 123, None), + (None, None, None, 123), + ], + ) + def test_ctor_invalid_input( + self, input_format, location_uri, output_format, serde_info + ): + with pytest.raises(TypeError) as e: + self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + storage_descriptor = self._make_one( + input_format="input_format", + location_uri="location_uri", + output_format="output_format", + serde_info=self.SERDEINFO, + ) + expected_repr = { + "inputFormat": "input_format", + "locationUri": "location_uri", + "outputFormat": "output_format", + "serDeInfo": self.SERDEINFO.to_api_repr(), + } + + assert storage_descriptor.to_api_repr() == expected_repr + + SERDEINFO = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + API_REPR = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": SERDEINFO.to_api_repr(), + } + + def test_from_api_repr(self, _make_storage_descriptor): + """GIVEN an api representation of a StorageDescriptor (i.e. API_REPR) + WHEN converted into a StorageDescriptor using from_api_repr() and + displayed as a dict + THEN it will have the same representation a StorageDescriptor created + directly (via the fixture) and displayed as a dict. + """ + # generate via fixture + expected = _make_storage_descriptor + resource = self.API_REPR + klass = self._get_target_class() + # generate via API_REPR + result = klass.from_api_repr(resource) + + assert result.to_api_repr() == expected.to_api_repr() + + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr + + def test_from_api_repr(self, _make_storage_descriptor): + """GIVEN an api representation of a SerDeInfo object (i.e. resource) + WHEN converted into a SerDeInfo using from_api_repr() and + displayed as a dict + THEN it will have the same representation a SerDeInfo object created + directly (via _make_one()) and displayed as a dict. + """ + resource = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + + expected = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + + klass = self._get_target_class() + result = klass.from_api_repr(resource) + + assert result.to_api_repr() == expected.to_api_repr() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e9d461e9d..19ed1c169 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor def _mock_client(): @@ -5899,6 +5900,110 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): assert got == expected +@pytest.fixture +def _make_storage_descriptor(): + serdeinfo = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + obj = StorageDescriptor( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=serdeinfo, + ) + return obj + + +@pytest.fixture() +def external_catalog_table_options(_make_storage_descriptor): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions( + connection_id="connection123", + parameters={"key": "value"}, + storage_descriptor=_make_storage_descriptor, + ) + + +class TestExternalCatalogTableOptions: + PROJECT = "project_id" + DS_ID = "dataset_id" + TABLE_NAME = "table_name" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import Table + + return Table + + @classmethod + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_external_catalog_table_options_getter( + self, + external_catalog_table_options, + _make_storage_descriptor, + request, + ): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + # create objects for the test + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + expected = external_catalog_table_options + + # Confirm that external catalog table options have not been set + assert table.external_catalog_table_options is None + + # Add an ExternalCatalogTableOptions object to the table. + table._properties[ + "externalCatalogTableOptions" + ] = external_catalog_table_options.to_api_repr() + + # Extract the ecto object. + result = table.external_catalog_table_options + + # Confirm that external catalog table options are an + # ExternalCatalogTableOptions object + assert isinstance(result, ExternalCatalogTableOptions) + assert isinstance(expected, ExternalCatalogTableOptions) + assert result == expected + + def test_external_catalog_table_options_setter( + self, + external_catalog_table_options, + _make_storage_descriptor, + ): + # create objects for the test + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + # Add an ExternalCatalogTableOptions object to the table. + table.external_catalog_table_options = external_catalog_table_options + expected = { + "tableReference": { + "projectId": "project_id", + "datasetId": "dataset_id", + "tableId": "table_name", + }, + "labels": {}, + "externalCatalogTableOptions": { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": _make_storage_descriptor.to_api_repr(), + }, + } + # Confirm that the api_repr of the ecto_output matches the inputs + result = table.to_api_repr() + assert result == expected + + @pytest.mark.parametrize("preserve_order", [True, False]) def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): pytest.importorskip("pandas")