From 8f5a41d283a965ca161019588d3a3b2947b04b5b Mon Sep 17 00:00:00 2001
From: Suzy Mueller <suzmue@google.com>
Date: Tue, 13 Aug 2024 15:24:24 -0700
Subject: [PATCH] fix: add warning when encountering unknown field types 
 (#1989)

* fix: add warning when encountering unknown field types

The types returned for currently unsupported field types may change
in the future, when support is added. Warn users that the types they
are using are not yet supported.

* fix: add warning for unknown subfield types as well

* fix: remove unused warnings

* fix: remove leftover debugging code

* move test case closer to related test

* add comments

* fix formatting

* fix test_table and use warnings.warn instead of pytest.warn

* add explicit warning about behavior subject to change in the future
add warning for write and warn about future behavior changes

* add default converter for _SCALAR_VALUE_TO_JSON_PARAM

* factor out shared warning

* fix test case and make coverage happy

* add unit test to StructQueryParameter class

---------

Co-authored-by: Lingqing Gan <lingqing.gan@gmail.com>
---
 google/cloud/bigquery/_helpers.py        | 38 +++++++++++----
 google/cloud/bigquery/_pandas_helpers.py |  4 +-
 google/cloud/bigquery/query.py           | 20 ++++----
 tests/unit/test__helpers.py              | 62 +++++++++++++++++++++++-
 tests/unit/test_query.py                 | 19 ++++++++
 tests/unit/test_table.py                 |  6 +--
 6 files changed, 123 insertions(+), 26 deletions(-)

diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
index 5ee5e1850..1eda80712 100644
--- a/google/cloud/bigquery/_helpers.py
+++ b/google/cloud/bigquery/_helpers.py
@@ -21,6 +21,7 @@
 import math
 import re
 import os
+import warnings
 from typing import Optional, Union
 
 from dateutil import relativedelta
@@ -297,12 +298,7 @@ def _record_from_json(value, field):
         record = {}
         record_iter = zip(field.fields, value["f"])
         for subfield, cell in record_iter:
-            converter = _CELLDATA_FROM_JSON[subfield.field_type]
-            if subfield.mode == "REPEATED":
-                value = [converter(item["v"], subfield) for item in cell["v"]]
-            else:
-                value = converter(cell["v"], subfield)
-            record[subfield.name] = value
+            record[subfield.name] = _field_from_json(cell["v"], subfield)
         return record
 
 
@@ -382,7 +378,11 @@ def _field_to_index_mapping(schema):
 
 
 def _field_from_json(resource, field):
-    converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value)
+    def default_converter(value, field):
+        _warn_unknown_field_type(field)
+        return value
+
+    converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter)
     if field.mode == "REPEATED":
         return [converter(item["v"], field) for item in resource]
     else:
@@ -484,6 +484,11 @@ def _json_to_json(value):
     return json.dumps(value)
 
 
+def _string_to_json(value):
+    """NOOP string -> string coercion"""
+    return value
+
+
 def _timestamp_to_json_parameter(value):
     """Coerce 'value' to an JSON-compatible representation.
 
@@ -596,6 +601,7 @@ def _range_field_to_json(range_element_type, value):
     "DATE": _date_to_json,
     "TIME": _time_to_json,
     "JSON": _json_to_json,
+    "STRING": _string_to_json,
     # Make sure DECIMAL and BIGDECIMAL are handled, even though
     # requests for them should be converted to NUMERIC.  Better safe
     # than sorry.
@@ -609,6 +615,15 @@ def _range_field_to_json(range_element_type, value):
 _SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter
 
 
+def _warn_unknown_field_type(field):
+    warnings.warn(
+        "Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format(
+            field.field_type, field.name
+        ),
+        FutureWarning,
+    )
+
+
 def _scalar_field_to_json(field, row_value):
     """Maps a field and value to a JSON-safe value.
 
@@ -621,9 +636,12 @@ def _scalar_field_to_json(field, row_value):
     Returns:
         Any: A JSON-serializable object.
     """
-    converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
-    if converter is None:  # STRING doesn't need converting
-        return row_value
+
+    def default_converter(value):
+        _warn_unknown_field_type(field)
+        return value
+
+    converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter)
     return converter(row_value)
 
 
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 8395478fb..c21a02569 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None):
             metadata=metadata,
         )
 
-    warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
+    warnings.warn(
+        "Unable to determine Arrow type for field '{}'.".format(bq_field.name)
+    )
     return None
 
 
diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py
index 9c59056fd..f1090a7dc 100644
--- a/google/cloud/bigquery/query.py
+++ b/google/cloud/bigquery/query.py
@@ -591,9 +591,8 @@ def to_api_repr(self) -> dict:
             Dict: JSON mapping
         """
         value = self.value
-        converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
-        if converter is not None:
-            value = converter(value)  # type: ignore
+        converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value)
+        value = converter(value)  # type: ignore
         resource: Dict[str, Any] = {
             "parameterType": {"type": self.type_},
             "parameterValue": {"value": value},
@@ -748,9 +747,10 @@ def to_api_repr(self) -> dict:
             else:
                 a_type = self.array_type.to_api_repr()
 
-            converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"])
-            if converter is not None:
-                values = [converter(value) for value in values]  # type: ignore
+            converter = _SCALAR_VALUE_TO_JSON_PARAM.get(
+                a_type["type"], lambda value: value
+            )
+            values = [converter(value) for value in values]  # type: ignore
             a_values = [{"value": value} for value in values]
 
         resource = {
@@ -792,7 +792,7 @@ def __repr__(self):
 
 
 class StructQueryParameter(_AbstractQueryParameter):
-    """Named / positional query parameters for struct values.
+    """Name / positional query parameters for struct values.
 
     Args:
         name (Optional[str]):
@@ -897,10 +897,8 @@ def to_api_repr(self) -> dict:
                 values[name] = repr_["parameterValue"]
             else:
                 s_types[name] = {"name": name, "type": {"type": type_}}
-                converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
-                if converter is not None:
-                    value = converter(value)  # type: ignore
-                values[name] = {"value": value}
+                converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value)
+                values[name] = {"value": converter(value)}
 
         resource = {
             "parameterType": {
diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py
index 1bf21479f..0a307498f 100644
--- a/tests/unit/test__helpers.py
+++ b/tests/unit/test__helpers.py
@@ -17,6 +17,7 @@
 import decimal
 import json
 import os
+import warnings
 import pytest
 import packaging
 import unittest
@@ -640,6 +641,17 @@ def test_w_single_scalar_column(self):
         row = {"f": [{"v": "1"}]}
         self.assertEqual(self._call_fut(row, schema=[col]), (1,))
 
+    def test_w_unknown_type(self):
+        # SELECT 1 AS col
+        col = _Field("REQUIRED", "col", "UNKNOWN")
+        row = {"f": [{"v": "1"}]}
+        with warnings.catch_warnings(record=True) as warned:
+            self.assertEqual(self._call_fut(row, schema=[col]), ("1",))
+        self.assertEqual(len(warned), 1)
+        warning = warned[0]
+        self.assertTrue("UNKNOWN" in str(warning))
+        self.assertTrue("col" in str(warning))
+
     def test_w_single_scalar_geography_column(self):
         # SELECT 1 AS col
         col = _Field("REQUIRED", "geo", "GEOGRAPHY")
@@ -660,6 +672,17 @@ def test_w_single_array_column(self):
         row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
         self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],))
 
+    def test_w_unknown_type_repeated(self):
+        # SELECT 1 AS col
+        col = _Field("REPEATED", "col", "UNKNOWN")
+        row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
+        with warnings.catch_warnings(record=True) as warned:
+            self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],))
+        self.assertEqual(len(warned), 1)
+        warning = warned[0]
+        self.assertTrue("UNKNOWN" in str(warning))
+        self.assertTrue("col" in str(warning))
+
     def test_w_struct_w_nested_array_column(self):
         # SELECT ([1, 2], 3, [4, 5]) as col
         first = _Field("REPEATED", "first", "INTEGER")
@@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self):
             ({"first": [1, 2], "second": 3, "third": [4, 5]},),
         )
 
+    def test_w_unknown_type_subfield(self):
+        # SELECT [(1, 2, 3), (4, 5, 6)] as col
+        first = _Field("REPEATED", "first", "UNKNOWN1")
+        second = _Field("REQUIRED", "second", "UNKNOWN2")
+        third = _Field("REPEATED", "third", "INTEGER")
+        col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third])
+        row = {
+            "f": [
+                {
+                    "v": {
+                        "f": [
+                            {"v": [{"v": "1"}, {"v": "2"}]},
+                            {"v": "3"},
+                            {"v": [{"v": "4"}, {"v": "5"}]},
+                        ]
+                    }
+                }
+            ]
+        }
+        with warnings.catch_warnings(record=True) as warned:
+            self.assertEqual(
+                self._call_fut(row, schema=[col]),
+                ({"first": ["1", "2"], "second": "3", "third": [4, 5]},),
+            )
+        self.assertEqual(len(warned), 2)  # 1 warning per unknown field.
+        warned = [str(warning) for warning in warned]
+        self.assertTrue(
+            any(["first" in warning and "UNKNOWN1" in warning for warning in warned])
+        )
+        self.assertTrue(
+            any(["second" in warning and "UNKNOWN2" in warning for warning in warned])
+        )
+
     def test_w_array_of_struct(self):
         # SELECT [(1, 2, 3), (4, 5, 6)] as col
         first = _Field("REQUIRED", "first", "INTEGER")
@@ -1076,8 +1132,12 @@ def _call_fut(self, field, value):
     def test_w_unknown_field_type(self):
         field = _make_field("UNKNOWN")
         original = object()
-        converted = self._call_fut(field, original)
+        with warnings.catch_warnings(record=True) as warned:
+            converted = self._call_fut(field, original)
         self.assertIs(converted, original)
+        self.assertEqual(len(warned), 1)
+        warning = warned[0]
+        self.assertTrue("UNKNOWN" in str(warning))
 
     def test_w_known_field_type(self):
         field = _make_field("INT64")
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index 7c36eb75b..40ef080f7 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -1780,6 +1780,25 @@ def test_to_api_repr_w_nested_struct(self):
         param = self._make_one("foo", scalar_1, sub)
         self.assertEqual(param.to_api_repr(), EXPECTED)
 
+    def test_to_api_repr_w_unknown_type(self):
+        EXPECTED = {
+            "name": "foo",
+            "parameterType": {
+                "type": "STRUCT",
+                "structTypes": [
+                    {"name": "bar", "type": {"type": "INT64"}},
+                    {"name": "baz", "type": {"type": "UNKNOWN_TYPE"}},
+                ],
+            },
+            "parameterValue": {
+                "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}}
+            },
+        }
+        sub_1 = _make_subparam("bar", "INT64", 123)
+        sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc")
+        param = self._make_one("foo", sub_1, sub_2)
+        self.assertEqual(param.to_api_repr(), EXPECTED)
+
     def test___eq___wrong_type(self):
         field = self._make_one("test", _make_subparam("bar", "STRING", "abc"))
         other = object()
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 7a97c7b78..d6febcfb1 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self):
         self.assertEqual(ages, [33, 29])
         self.assertEqual(sports, ["volleyball", "basketball"])
 
-        self.assertEqual(len(warned), 1)
-        warning = warned[0]
-        self.assertTrue("sport" in str(warning))
+        # Expect warning from both the arrow conversion, and the json deserialization.
+        self.assertEqual(len(warned), 2)
+        self.assertTrue(all("sport" in str(warning) for warning in warned))
 
     def test_to_arrow_w_empty_table(self):
         pyarrow = pytest.importorskip(