Skip to content

Commit

Permalink
Merge branch 'main' into fix-jobs-retry-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kiraksi authored Feb 6, 2024
2 parents 015bb53 + fe3183e commit e15b7f2
Show file tree
Hide file tree
Showing 6 changed files with 276 additions and 7 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,25 @@
[1]: https://pypi.org/project/google-cloud-bigquery/#history


## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30)


### Bug Fixes

* Change load_table_from_json autodetect logic ([#1804](https://github.com/googleapis/python-bigquery/issues/1804)) ([6249032](https://github.com/googleapis/python-bigquery/commit/62490325f64e5d66303d9218992e28ac5f21cb3f))


### Documentation

* Update to use API ([#1781](https://github.com/googleapis/python-bigquery/issues/1781)) ([81563b0](https://github.com/googleapis/python-bigquery/commit/81563b06298fe3a64be6a89b583c3d64758ca12a))
* Update `client_query_destination_table.py` sample to use `query_and_wait` ([#1783](https://github.com/googleapis/python-bigquery/issues/1783)) ([68ebbe1](https://github.com/googleapis/python-bigquery/commit/68ebbe12d455ce8e9b1784fb11787c2fb842ef22))
* Update query_external_sheets_permanent_table.py to use query_and_wait API ([#1778](https://github.com/googleapis/python-bigquery/issues/1778)) ([a7be88a](https://github.com/googleapis/python-bigquery/commit/a7be88adf8a480ee61aa79789cb53df1b79bb091))
* Update sample for query_to_arrow to use query_and_wait API ([#1776](https://github.com/googleapis/python-bigquery/issues/1776)) ([dbf10de](https://github.com/googleapis/python-bigquery/commit/dbf10dee51a7635e9b98658f205ded2de087a06f))
* Update the query destination table legacy file to use query_and_wait API ([#1775](https://github.com/googleapis/python-bigquery/issues/1775)) ([ef89f9e](https://github.com/googleapis/python-bigquery/commit/ef89f9e58c22b3af5a7757b69daa030116012350))
* Update to use `query_and_wait` in `client_query_w_positional_params.py` ([#1786](https://github.com/googleapis/python-bigquery/issues/1786)) ([410f71e](https://github.com/googleapis/python-bigquery/commit/410f71e6b6e755928e363ed89c1044e14b0db9cc))
* Update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` ([#1785](https://github.com/googleapis/python-bigquery/issues/1785)) ([ba36948](https://github.com/googleapis/python-bigquery/commit/ba3694852c13c8a29fe0f9d923353e82acfd4278))
* Update to_geodataframe to use query_and_wait functionality ([#1800](https://github.com/googleapis/python-bigquery/issues/1800)) ([1298594](https://github.com/googleapis/python-bigquery/commit/12985942942b8f205ecd261fcdf620df9a640460))

## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24)


Expand Down
18 changes: 16 additions & 2 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2833,8 +2833,22 @@ def load_table_from_json(

new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON

if new_job_config.schema is None:
new_job_config.autodetect = True
# In specific conditions, we check if the table alread exists, and/or
# set the autodetect value for the user. For exact conditions, see table
# https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
if new_job_config.schema is None and new_job_config.autodetect is None:
if new_job_config.write_disposition in (
job.WriteDisposition.WRITE_TRUNCATE,
job.WriteDisposition.WRITE_EMPTY,
):
new_job_config.autodetect = True
else:
try:
self.get_table(destination)
except core_exceptions.NotFound:
new_job_config.autodetect = True
else:
new_job_config.autodetect = False

if project is None:
project = self.project
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "3.17.1"
__version__ = "3.17.2"
2 changes: 1 addition & 1 deletion samples/geography/to_geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ def get_austin_service_requests_as_geography() -> "pandas.DataFrame":
LIMIT 10
"""

df = client.query(sql).to_geodataframe()
df = client.query_and_wait(sql).to_geodataframe()
# [END bigquery_query_results_geodataframe]
return df
39 changes: 39 additions & 0 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,45 @@ def test_load_table_from_json_schema_autodetect(self):
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)

# Autodetect makes best effort to infer the schema, but situations exist
# when the detected schema is wrong, and does not match existing schema.
# Thus the client sets autodetect = False when table exists and just uses
# the existing schema. This test case uses a special case where backend has
# no way to distinguish int from string.
def test_load_table_from_json_schema_autodetect_table_exists(self):
json_rows = [
{"name": "123", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
{"name": "456", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
]

dataset_id = _make_dataset_id("bq_system_test")
self.temp_dataset(dataset_id)
table_id = "{}.{}.load_table_from_json_basic_use".format(
Config.CLIENT.project, dataset_id
)

# Use schema with NULLABLE fields, because schema autodetection
# defaults to field mode NULLABLE.
table_schema = (
bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"),
bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
)
# create the table before loading so that the column order is predictable
table = helpers.retry_403(Config.CLIENT.create_table)(
Table(table_id, schema=table_schema)
)
self.to_delete.insert(0, table)

# do not pass an explicit job config to trigger automatic schema detection
load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
load_job.result()

table = Config.CLIENT.get_table(table)
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)

def test_load_avro_from_uri_then_dump_table(self):
from google.cloud.bigquery.job import CreateDisposition
from google.cloud.bigquery.job import SourceFormat
Expand Down
203 changes: 200 additions & 3 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8951,6 +8951,8 @@ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self):
SchemaField("x", "BIGNUMERIC", "NULLABLE", None),
)

# With autodetect specified, we pass the value as is. For more info, see
# https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
def test_load_table_from_json_basic_use(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
Expand All @@ -8962,12 +8964,28 @@ def test_load_table_from_json_basic_use(self):
{"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
]

job_config = job.LoadJobConfig(autodetect=True)

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)

with load_patch as load_table_from_file:
client.load_table_from_json(json_rows, self.TABLE_REF)
# mock: remote table already exists
get_table_reference = {
"projectId": "project_id",
"datasetId": "test_dataset",
"tableId": "test_table",
}
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
return_value=mock.Mock(table_reference=get_table_reference),
)

with load_patch as load_table_from_file, get_table_patch:
client.load_table_from_json(
json_rows, self.TABLE_REF, job_config=job_config
)

load_table_from_file.assert_called_once_with(
client,
Expand Down Expand Up @@ -9066,6 +9084,174 @@ def test_load_table_from_json_w_invalid_job_config(self):
err_msg = str(exc.value)
assert "Expected an instance of LoadJobConfig" in err_msg

# When all following are true:
# (1) no schema provided;
# (2) no autodetect value provided;
# (3) writeDisposition == WRITE_APPEND or None;
# (4) table already exists,
# client sets autodetect == False
# For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
def test_load_table_from_json_wo_schema_wo_autodetect_write_append_w_table(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.job import WriteDisposition

client = self._make_client()

json_rows = [
{"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
{"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
]

job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND)

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)

# mock: remote table already exists
get_table_reference = {
"projectId": "project_id",
"datasetId": "test_dataset",
"tableId": "test_table",
}
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
return_value=mock.Mock(table_reference=get_table_reference),
)

with load_patch as load_table_from_file, get_table_patch:
client.load_table_from_json(
json_rows, self.TABLE_REF, job_config=job_config
)

load_table_from_file.assert_called_once_with(
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
location=client.location,
project=client.project,
job_config=mock.ANY,
timeout=DEFAULT_TIMEOUT,
)

sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert not sent_config.autodetect

# When all following are true:
# (1) no schema provided;
# (2) no autodetect value provided;
# (3) writeDisposition == WRITE_APPEND or None;
# (4) table does NOT exist,
# client sets autodetect == True
# For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
def test_load_table_from_json_wo_schema_wo_autodetect_write_append_wo_table(self):
import google.api_core.exceptions as core_exceptions
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.job import WriteDisposition

client = self._make_client()

json_rows = [
{"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
{"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
]

job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND)

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)

# mock: remote table doesn't exist
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
side_effect=core_exceptions.NotFound(""),
)

with load_patch as load_table_from_file, get_table_patch:
client.load_table_from_json(
json_rows, self.TABLE_REF, job_config=job_config
)

load_table_from_file.assert_called_once_with(
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
location=client.location,
project=client.project,
job_config=mock.ANY,
timeout=DEFAULT_TIMEOUT,
)

sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert sent_config.autodetect

# When all following are true:
# (1) no schema provided;
# (2) no autodetect value provided;
# (3) writeDisposition == WRITE_TRUNCATE or WRITE_EMPTY;
# client sets autodetect == True
# For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
def test_load_table_from_json_wo_schema_wo_autodetect_others(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.job import WriteDisposition

client = self._make_client()

json_rows = [
{"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
{"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
]

job_config = job.LoadJobConfig(
write_disposition=WriteDisposition.WRITE_TRUNCATE
)

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)

with load_patch as load_table_from_file:
client.load_table_from_json(
json_rows, self.TABLE_REF, job_config=job_config
)

load_table_from_file.assert_called_once_with(
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
location=client.location,
project=client.project,
job_config=mock.ANY,
timeout=DEFAULT_TIMEOUT,
)

sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert sent_config.autodetect

def test_load_table_from_json_w_explicit_job_config_override(self):
from google.cloud.bigquery import job
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
Expand Down Expand Up @@ -9190,8 +9376,19 @@ def test_load_table_from_json_unicode_emoji_data_case(self):
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
# mock: remote table already exists
get_table_reference = {
"projectId": "project_id",
"datasetId": "test_dataset",
"tableId": "test_table",
}
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
return_value=mock.Mock(table_reference=get_table_reference),
)

with load_patch as load_table_from_file:
with load_patch as load_table_from_file, get_table_patch:
client.load_table_from_json(json_rows, self.TABLE_REF)

load_table_from_file.assert_called_once_with(
Expand Down

0 comments on commit e15b7f2

Please sign in to comment.