From 95de884a909d5213d2eb7338d19b353d027ce499 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Aug 2019 18:24:22 +0200 Subject: [PATCH] BigQuery: Raise helpful error when loading table from dataframe with STRUCT columns (#9053) * Issue warning if no schema when loading from DF * Raise error if serializing DF with struct fields * Rewrite test assertion to make coverage happy * Make the unsupported type message more general * Remove warning on missing schema The warning will be added once the support for partial schemas and automatic schema detection is implemented. --- bigquery/google/cloud/bigquery/client.py | 11 ++++++++ bigquery/tests/unit/test_client.py | 34 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 04c596975eec9..ae9adb4da15ff 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -60,6 +60,7 @@ from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.schema import _STRUCT_TYPES from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _table_arg_to_table from google.cloud.bigquery.table import _table_arg_to_table_ref @@ -1529,6 +1530,15 @@ def load_table_from_dataframe( os.close(tmpfd) try: + if job_config.schema: + for field in job_config.schema: + if field.field_type in _STRUCT_TYPES: + raise ValueError( + "Uploading dataframes with struct (record) column types " + "is not supported. See: " + "https://github.com/googleapis/google-cloud-python/issues/8191" + ) + if pyarrow and job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -1548,6 +1558,7 @@ def load_table_from_dataframe( PendingDeprecationWarning, stacklevel=2, ) + dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index c4e9c5e830ac0..d7ff3d2a90b37 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -5328,6 +5328,40 @@ def test_load_table_from_dataframe_w_custom_job_config(self): assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_struct_fields_error(self): + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + + records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}] + dataframe = pandas.DataFrame(data=records) + + schema = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "agg_col", + "RECORD", + fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")], + ), + ] + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with pytest.raises(ValueError) as exc_info, load_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + err_msg = str(exc_info.value) + assert "struct" in err_msg + assert "not support" in err_msg + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):