Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: preserve unknown fields from the REST API representation in SchemaField #2097

Merged
merged 5 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 25 additions & 57 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

import collections
import enum
from typing import Any, Dict, Iterable, Optional, Union, cast
from typing import Any, cast, Dict, Iterable, Optional, Union

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
from google.cloud.bigquery.enums import StandardSqlTypeNames

Expand Down Expand Up @@ -203,15 +204,8 @@ def __init__(
self._properties["rangeElementType"] = {"type": range_element_type}
if isinstance(range_element_type, FieldElementType):
self._properties["rangeElementType"] = range_element_type.to_api_repr()

self._fields = tuple(fields)

@staticmethod
def __get_int(api_repr, name):
v = api_repr.get(name, _DEFAULT_VALUE)
if v is not _DEFAULT_VALUE:
v = int(v)
return v
if fields: # Don't set the property if it's not set.
self._properties["fields"] = [field.to_api_repr() for field in fields]

@classmethod
def from_api_repr(cls, api_repr: dict) -> "SchemaField":
Expand All @@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
Returns:
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
"""
field_type = api_repr["type"].upper()

# Handle optional properties with default values
mode = api_repr.get("mode", "NULLABLE")
description = api_repr.get("description", _DEFAULT_VALUE)
fields = api_repr.get("fields", ())
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")

default_value_expression = api_repr.get("defaultValueExpression", None)
# Note: we don't make a copy of api_repr because this can cause
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
# fields. See https://github.com/googleapis/python-bigquery/issues/6
placeholder._properties = api_repr

if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

if api_repr.get("rangeElementType"):
range_element_type = cast(dict, api_repr.get("rangeElementType"))
element_type = range_element_type.get("type")
else:
element_type = None

return cls(
field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
mode=mode.upper(),
default_value_expression=default_value_expression,
description=description,
name=api_repr["name"],
policy_tags=policy_tags,
precision=cls.__get_int(api_repr, "precision"),
scale=cls.__get_int(api_repr, "scale"),
max_length=cls.__get_int(api_repr, "maxLength"),
range_element_type=element_type,
)
return placeholder

@property
def name(self):
"""str: The name of the field."""
return self._properties["name"]
return self._properties.get("name", "")

@property
def field_type(self):
Expand All @@ -270,7 +240,10 @@ def field_type(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
"""
return self._properties["type"]
type_ = self._properties.get("type")
if type_ is None: # Shouldn't happen, but some unit tests do this.
return None
return cast(str, type_).upper()

@property
def mode(self):
Expand All @@ -279,7 +252,7 @@ def mode(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
"""
return self._properties.get("mode")
return cast(str, self._properties.get("mode", "NULLABLE")).upper()

@property
def is_nullable(self):
Expand All @@ -299,17 +272,17 @@ def description(self):
@property
def precision(self):
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
return self._properties.get("precision")
return _helpers._int_or_none(self._properties.get("precision"))

@property
def scale(self):
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
return self._properties.get("scale")
return _helpers._int_or_none(self._properties.get("scale"))

@property
def max_length(self):
"""Optional[int]: Maximum length for the STRING or BYTES field."""
return self._properties.get("maxLength")
return _helpers._int_or_none(self._properties.get("maxLength"))

@property
def range_element_type(self):
Expand All @@ -329,7 +302,7 @@ def fields(self):
Must be empty unset if ``field_type`` is not 'RECORD'.
"""
return self._fields
return tuple(_to_schema_fields(self._properties.get("fields", [])))

@property
def policy_tags(self):
Expand All @@ -345,15 +318,10 @@ def to_api_repr(self) -> dict:
Returns:
Dict: A dictionary representing the SchemaField in a serialized form.
"""
answer = self._properties.copy()

# If this is a RECORD type, then sub-fields are also included,
# add this to the serialized representation.
if self.field_type.upper() in _STRUCT_TYPES:
answer["fields"] = [f.to_api_repr() for f in self.fields]

# Done; return the serialized dictionary.
return answer
# Note: we don't make a copy of _properties because this can cause
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
# fields. See https://github.com/googleapis/python-bigquery/issues/6
return self._properties

def _key(self):
"""A tuple key that uniquely describes this field.
Expand Down Expand Up @@ -389,7 +357,7 @@ def _key(self):
self.mode.upper(), # pytype: disable=attribute-error
self.default_value_expression,
self.description,
self._fields,
self.fields,
policy_tags,
)

Expand Down
29 changes: 24 additions & 5 deletions tests/unit/job/test_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import warnings

import pytest
Expand Down Expand Up @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self):
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
)

def test_schema_setter_invalid_mappings_list(self):
def test_schema_setter_allows_unknown_properties(self):
config = self._get_target_class()()

schema = [
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"},
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"someNewProperty": "test-value",
},
{
"name": "age",
# Note: This type should be included, too. Avoid client-side
# validation, as it could prevent backwards-compatible
# evolution of the server-side behavior.
"typo": "INTEGER",
"mode": "REQUIRED",
"anotherNewProperty": "another-test",
},
]

with self.assertRaises(Exception):
config.schema = schema
# Make sure the setter doesn't mutate schema.
expected_schema = copy.deepcopy(schema)

config.schema = schema

# _properties should include all fields, including unknown ones.
assert config._properties["load"]["schema"]["fields"] == expected_schema

def test_schema_setter_unsetting_schema(self):
from google.cloud.bigquery.schema import SchemaField
Expand Down
37 changes: 29 additions & 8 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery.schema import PolicyTagList
import copy
import unittest
from unittest import mock

import pytest

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery.schema import PolicyTagList


class TestSchemaField(unittest.TestCase):
@staticmethod
Expand Down Expand Up @@ -821,13 +823,32 @@ def test_schema_fields_sequence(self):
result = self._call_fut(schema)
self.assertEqual(result, schema)

def test_invalid_mapping_representation(self):
def test_unknown_properties(self):
schema = [
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "address", "typeooo": "STRING", "mode": "REQUIRED"},
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"someNewProperty": "test-value",
},
{
"name": "age",
# Note: This type should be included, too. Avoid client-side
# validation, as it could prevent backwards-compatible
# evolution of the server-side behavior.
"typo": "INTEGER",
"mode": "REQUIRED",
"anotherNewProperty": "another-test",
},
]
with self.assertRaises(Exception):
self._call_fut(schema)

# Make sure the setter doesn't mutate schema.
expected_schema = copy.deepcopy(schema)

result = self._call_fut(schema)

for api_repr, field in zip(expected_schema, result):
assert field.to_api_repr() == api_repr

def test_valid_mapping_representation(self):
from google.cloud.bigquery.schema import SchemaField
Expand Down
32 changes: 27 additions & 5 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import datetime
import logging
import re
Expand Down Expand Up @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self):
table.schema = [full_name, age]
self.assertEqual(table.schema, [full_name, age])

def test_schema_setter_invalid_mapping_representation(self):
def test_schema_setter_allows_unknown_properties(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)
full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}
invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"}
with self.assertRaises(Exception):
table.schema = [full_name, invalid_field]
schema = [
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"someNewProperty": "test-value",
},
{
"name": "age",
# Note: This type should be included, too. Avoid client-side
# validation, as it could prevent backwards-compatible
# evolution of the server-side behavior.
"typo": "INTEGER",
"mode": "REQUIRED",
"anotherNewProperty": "another-test",
},
]

# Make sure the setter doesn't mutate schema.
expected_schema = copy.deepcopy(schema)

table.schema = schema

# _properties should include all fields, including unknown ones.
assert table._properties["schema"]["fields"] == expected_schema

def test_schema_setter_valid_mapping_representation(self):
from google.cloud.bigquery.schema import SchemaField
Expand Down
Loading