From ec623958d61ed2f9f835c6d573859e8656b17af3 Mon Sep 17 00:00:00 2001 From: Linchin Date: Thu, 2 May 2024 15:09:10 -0700 Subject: [PATCH 1/5] feat: support insertAll for range --- google/cloud/bigquery/_helpers.py | 49 ++++++++++++- tests/unit/test__helpers.py | 110 +++++++++++++++++++++++++++++- 2 files changed, 154 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 083eb9f9d..1fada3ebf 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -46,10 +46,11 @@ # BigQuery sends INTERVAL data in "canonical format" # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type _INTERVAL_PATTERN = re.compile( - r"(?P-?)(?P\d+)-(?P\d+) " + r"(?P-?)(?Pprint(json_rows)\d+)-(?P\d+) " r"(?P-?\d+) " r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) +_RANGE_PATTERN = re.compile(r"\[.*, .*\)") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" @@ -334,9 +335,8 @@ def _range_from_json(value, field): The parsed range object from ``value`` if the ``field`` is not null (otherwise it is :data:`None`). """ - range_literal = re.compile(r"\[.*, .*\)") if _not_null(value, field): - if range_literal.match(value): + if _RANGE_PATTERN.match(value): start, end = value[1:-1].split(", ") start = _range_element_from_json(start, field.range_element_type) end = _range_element_from_json(end, field.range_element_type) @@ -531,6 +531,47 @@ def _time_to_json(value): return value +def _range_element_to_json(value, element_type=None): + """Coerce 'value' to an JSON-compatible representation.""" + if value is None: + return None + elif isinstance(value, str): + if value.upper() in ("UNBOUNDED", "NULL"): + return None + else: + # We do not enforce range element value to be valid to reduce + # redundancy with backend. + return value + elif element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS: + converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper()) + return converter(value) + else: + raise ValueError(f"Unsupported RANGE element type {element_type}, or " + "element type is empty. Must be DATE, DATETIME, or " + "TIMESTAMP") + + +def _range_field_to_json(range_element_type, value): + """Coerce 'value' to an JSON-compatible representation.""" + if isinstance(value, str): + # string literal + if _RANGE_PATTERN.match(value): + start, end = value[1:-1].split(", ") + else: + raise ValueError(f"RANGE literal {value} has incorrect format") + elif isinstance(value, dict): + # dictionary + start = value.get("start") + end = value.get("end") + else: + raise ValueError(f"Unsupported type of RANGE value {value}, must be " + "string or dict") + + start = _range_element_to_json(start, range_element_type) + end = _range_element_to_json(end, range_element_type) + return {"start": start, "end": end} + + # Converters used for scalar values marshalled to the BigQuery API, such as in # query parameters or the tabledata.insert API. _SCALAR_VALUE_TO_JSON_ROW = { @@ -676,6 +717,8 @@ def _single_field_to_json(field, row_value): if field.field_type == "RECORD": return _record_field_to_json(field.fields, row_value) + if field.field_type == "RANGE": + return _range_field_to_json(field.range_element_type, row_value) return _scalar_field_to_json(field, row_value) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index a50625e2a..21392662c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1049,10 +1049,22 @@ def test_w_datetime(self): self.assertEqual(self._call_fut(when), "12:13:41") -def _make_field(field_type, mode="NULLABLE", name="testing", fields=()): +def _make_field( + field_type, + mode="NULLABLE", + name="testing", + fields=(), + range_element_type=None, + ): from google.cloud.bigquery.schema import SchemaField - return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields) + return SchemaField( + name=name, + field_type=field_type, + mode=mode, + fields=fields, + range_element_type=range_element_type, + ) class Test_scalar_field_to_json(unittest.TestCase): @@ -1251,6 +1263,100 @@ def test_w_dict_unknown_fields(self): ) +class Test_range_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _range_field_to_json + + return _range_field_to_json(field, value) + + def test_w_date(self): + field = _make_field("RANGE", range_element_type="DATE") + start = datetime.date(2016, 12, 3) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_date_string(self): + field = _make_field("RANGE", range_element_type="DATE") + original = {"start": "2016-12-03"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_datetime(self): + field = _make_field("RANGE", range_element_type="DATETIME") + start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456", "end": None} + self.assertEqual(converted, expected) + + def test_w_datetime_string(self): + field = _make_field("RANGE", range_element_type="DATETIME") + original = {"start": "2016-12-03T14:11:27.123456"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp(self): + from google.cloud._helpers import UTC + + field = _make_field("RANGE", range_element_type="TIMESTAMP") + start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) + original = {"start": start} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp_string(self): + field = _make_field("RANGE", range_element_type="TIMESTAMP") + original = {"start": "2016-12-03T14:11:27.123456Z"} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None} + self.assertEqual(converted, expected) + + def test_w_timestamp_float(self): + from google.cloud._helpers import UTC + + field = _make_field("RANGE", range_element_type="TIMESTAMP") + original = {"start": 12.34567} + converted = self._call_fut(field.range_element_type, original) + expected = {"start": 12.34567, "end": None} + self.assertEqual(converted, expected) + + def test_w_string_literal(self): + field = _make_field("RANGE", range_element_type="DATE") + original = "[2016-12-03, UNBOUNDED)" + converted = self._call_fut(field.range_element_type, original) + expected = {"start": "2016-12-03", "end": None} + self.assertEqual(converted, expected) + + def test_w_unsupported_range_element_type(self): + field = _make_field("RANGE", range_element_type="TIME") + with self.assertRaises(ValueError): + self._call_fut( + field.range_element_type, + {"start": datetime.time(12, 13, 41)}, + ) + + def test_w_no_range_element_type(self): + field = _make_field("RANGE") + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, "2016-12-03") + + def test_w_incorrect_literal_format(self): + field = _make_field("RANGE", range_element_type="DATE") + original = "[2016-12-03, UNBOUNDED]" + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, original) + + def test_w_unsupported_representation(self): + field = _make_field("RANGE", range_element_type="DATE") + with self.assertRaises(ValueError): + self._call_fut(field.range_element_type, object()) + + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): from google.cloud.bigquery._helpers import _field_to_json From c50f241d654c31ea8e02d96271d041070ef980c4 Mon Sep 17 00:00:00 2001 From: Linchin Date: Fri, 3 May 2024 11:01:18 -0700 Subject: [PATCH 2/5] revert INTERVAL regex --- google/cloud/bigquery/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 1fada3ebf..b256bc748 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -46,7 +46,7 @@ # BigQuery sends INTERVAL data in "canonical format" # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type _INTERVAL_PATTERN = re.compile( - r"(?P-?)(?Pprint(json_rows)\d+)-(?P\d+) " + r"(?P-?)(?P\d+)-(?P\d+) " r"(?P-?\d+) " r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) From 7a7710c9ffc451accbd2bc9262dcf280d96defdc Mon Sep 17 00:00:00 2001 From: Linchin Date: Fri, 3 May 2024 11:05:43 -0700 Subject: [PATCH 3/5] lint --- google/cloud/bigquery/_helpers.py | 17 +++++++++++------ tests/unit/test__helpers.py | 14 ++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b256bc748..668b4ca3d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -542,13 +542,17 @@ def _range_element_to_json(value, element_type=None): # We do not enforce range element value to be valid to reduce # redundancy with backend. return value - elif element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS: + elif ( + element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS + ): converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper()) return converter(value) else: - raise ValueError(f"Unsupported RANGE element type {element_type}, or " - "element type is empty. Must be DATE, DATETIME, or " - "TIMESTAMP") + raise ValueError( + f"Unsupported RANGE element type {element_type}, or " + "element type is empty. Must be DATE, DATETIME, or " + "TIMESTAMP" + ) def _range_field_to_json(range_element_type, value): @@ -564,8 +568,9 @@ def _range_field_to_json(range_element_type, value): start = value.get("start") end = value.get("end") else: - raise ValueError(f"Unsupported type of RANGE value {value}, must be " - "string or dict") + raise ValueError( + f"Unsupported type of RANGE value {value}, must be " "string or dict" + ) start = _range_element_to_json(start, range_element_type) end = _range_element_to_json(end, range_element_type) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 21392662c..73f4decab 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1050,12 +1050,12 @@ def test_w_datetime(self): def _make_field( - field_type, - mode="NULLABLE", - name="testing", - fields=(), - range_element_type=None, - ): + field_type, + mode="NULLABLE", + name="testing", + fields=(), + range_element_type=None, +): from google.cloud.bigquery.schema import SchemaField return SchemaField( @@ -1317,8 +1317,6 @@ def test_w_timestamp_string(self): self.assertEqual(converted, expected) def test_w_timestamp_float(self): - from google.cloud._helpers import UTC - field = _make_field("RANGE", range_element_type="TIMESTAMP") original = {"start": 12.34567} converted = self._call_fut(field.range_element_type, original) From a416e6523d0d6ef2167167c9161a10db43211568 Mon Sep 17 00:00:00 2001 From: Linchin Date: Fri, 3 May 2024 11:29:12 -0700 Subject: [PATCH 4/5] add unit test --- tests/unit/test__helpers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 73f4decab..191d9543d 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1389,6 +1389,11 @@ def test_w_scalar(self): converted = self._call_fut(field, original) self.assertEqual(converted, str(original)) + def test_w_range(self): + field = _make_field("RANGE", range_element_type="DATE") + original = {"start": "2016-12-03", "end": "2024-12-03"} + converted = self._call_fut(field, original) + self.assertEqual(converted, original) class Test_snake_to_camel_case(unittest.TestCase): def _call_fut(self, value): From e0a0351e179770f764525ae87b34096ac2dbb20e Mon Sep 17 00:00:00 2001 From: Linchin Date: Fri, 3 May 2024 11:29:47 -0700 Subject: [PATCH 5/5] lint --- tests/unit/test__helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 191d9543d..1bf21479f 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1395,6 +1395,7 @@ def test_w_range(self): converted = self._call_fut(field, original) self.assertEqual(converted, original) + class Test_snake_to_camel_case(unittest.TestCase): def _call_fut(self, value): from google.cloud.bigquery._helpers import _snake_to_camel_case