Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Allow subset of schema to be passed into load_table_from_dataframe. #9064

Merged
merged 9 commits into from
Aug 22, 2019
15 changes: 11 additions & 4 deletions bigquery/google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,18 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
type for some or all of the DataFrame columns.

Returns:
Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]:
The automatically determined schema. Returns None if the type of
any column cannot be determined.
Sequence[google.cloud.bigquery.schema.SchemaField]:
The automatically determined schema. Returns empty tuple if the
type of any column cannot be determined.
"""
if bq_schema:
for field in bq_schema:
if field.field_type in schema._STRUCT_TYPES:
raise ValueError(
"Uploading dataframes with struct (record) column types "
"is not supported. See: "
"https://github.com/googleapis/google-cloud-python/issues/8191"
)
bq_schema_index = {field.name: field for field in bq_schema}
else:
bq_schema_index = {}
Expand All @@ -220,7 +227,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
if not bq_type:
warnings.warn("Unable to determine type of column '{}'.".format(column))
return None
return ()
bq_field = schema.SchemaField(column, bq_type)
bq_schema_out.append(bq_field)
return tuple(bq_schema_out)
Expand Down
20 changes: 1 addition & 19 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from google.cloud.bigquery.retry import DEFAULT_RETRY
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.schema import _STRUCT_TYPES
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import _table_arg_to_table
from google.cloud.bigquery.table import _table_arg_to_table_ref
Expand Down Expand Up @@ -1532,27 +1531,10 @@ def load_table_from_dataframe(
if location is None:
location = self.location

if job_config.schema:
for field in job_config.schema:
if field.field_type in _STRUCT_TYPES:
raise ValueError(
"Uploading dataframes with struct (record) column types "
"is not supported. See: "
"https://github.com/googleapis/google-cloud-python/issues/8191"
)

autodetected_schema = _pandas_helpers.dataframe_to_bq_schema(
job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
dataframe, job_config.schema
)

# Only use an explicit schema if we were able to determine one
# matching the dataframe. If not, fallback to the pandas to_parquet
# method.
if autodetected_schema:
job_config.schema = autodetected_schema
else:
job_config.schema = ()

tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8]))
os.close(tmpfd)

Expand Down
8 changes: 4 additions & 4 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5230,7 +5230,7 @@ def test_load_table_from_dataframe(self):
from google.cloud.bigquery import job

client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
plamut marked this conversation as resolved.
Show resolved Hide resolved
dataframe = pandas.DataFrame(records)

load_patch = mock.patch(
Expand Down Expand Up @@ -5265,7 +5265,7 @@ def test_load_table_from_dataframe_w_client_location(self):
from google.cloud.bigquery import job

client = self._make_client(location=self.LOCATION)
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)

load_patch = mock.patch(
Expand Down Expand Up @@ -5300,7 +5300,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
from google.cloud.bigquery import job

client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)
job_config = job.LoadJobConfig()

Expand Down Expand Up @@ -5702,7 +5702,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self):
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self):
client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)

load_patch = mock.patch(
Expand Down