diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py index 89eb390993c6..a92ca9f9b143 100644 --- a/bigquery/google/cloud/bigquery/_helpers.py +++ b/bigquery/google/cloud/bigquery/_helpers.py @@ -58,10 +58,11 @@ def _record_from_json(value, field): """Coerce 'value' to a mapping, if set or not nullable.""" if _not_null(value, field): record = {} - for subfield, cell in zip(field.fields, value['f']): + record_iter = zip(field.fields, value['f']) + for subfield, cell in record_iter: converter = _CELLDATA_FROM_JSON[subfield.field_type] - if field.mode == 'REPEATED': - value = [converter(item, subfield) for item in cell['v']] + if subfield.mode == 'REPEATED': + value = [converter(item['v'], subfield) for item in cell['v']] else: value = converter(cell['v'], subfield) record[subfield.name] = value @@ -103,7 +104,7 @@ def _row_from_json(row, schema): for field, cell in zip(schema, row['f']): converter = _CELLDATA_FROM_JSON[field.field_type] if field.mode == 'REPEATED': - row_data.append([converter(item, field) + row_data.append([converter(item['v'], field) for item in cell['v']]) else: row_data.append(converter(cell['v'], field)) diff --git a/bigquery/unit_tests/test__helpers.py b/bigquery/unit_tests/test__helpers.py index 46c58c8ea405..c3bae86e76ae 100644 --- a/bigquery/unit_tests/test__helpers.py +++ b/bigquery/unit_tests/test__helpers.py @@ -186,7 +186,7 @@ def test_w_scalar_subfield(self): def test_w_repeated_subfield(self): subfield = _Field('REPEATED', 'color', 'STRING') field = _Field('REQUIRED', fields=[subfield]) - value = {'f': [{'v': ['red', 'yellow', 'blue']}]} + value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]} coerced = self._call_fut(value, field) self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']}) @@ -234,6 +234,97 @@ def test_w_string_value(self): self.assertEqual(coerced, 'Wonderful!') +class Test_row_from_json(unittest.TestCase): + + def _call_fut(self, row, schema): + from google.cloud.bigquery._helpers import _row_from_json + return _row_from_json(row, schema) + + def test_w_single_scalar_column(self): + # SELECT 1 AS col + col = _Field('REQUIRED', 'col', 'INTEGER') + row = {u'f': [{u'v': u'1'}]} + self.assertEqual(self._call_fut(row, schema=[col]), (1,)) + + def test_w_single_struct_column(self): + # SELECT (1, 2) AS col + sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER') + sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER') + col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2]) + row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]} + self.assertEqual(self._call_fut(row, schema=[col]), + ({'sub_1': 1, 'sub_2': 2},)) + + def test_w_single_array_column(self): + # SELECT [1, 2, 3] as col + col = _Field('REPEATED', 'col', 'INTEGER') + row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]} + self.assertEqual(self._call_fut(row, schema=[col]), + ([1, 2, 3],)) + + def test_w_struct_w_nested_array_column(self): + # SELECT ([1, 2], 3, [4, 5]) as col + first = _Field('REPEATED', 'first', 'INTEGER') + second = _Field('REQUIRED', 'second', 'INTEGER') + third = _Field('REPEATED', 'third', 'INTEGER') + col = _Field('REQUIRED', 'col', 'RECORD', + fields=[first, second, third]) + row = { + u'f': [ + {u'v': { + u'f': [ + {u'v': [{u'v': u'1'}, {u'v': u'2'}]}, + {u'v': u'3'}, + {u'v': [{u'v': u'4'}, {u'v': u'5'}]} + ] + }}, + ] + } + self.assertEqual( + self._call_fut(row, schema=[col]), + ({u'first': [1, 2], u'second': 3, u'third': [4, 5]},)) + + def test_w_array_of_struct(self): + # SELECT [(1, 2, 3), (4, 5, 6)] as col + first = _Field('REQUIRED', 'first', 'INTEGER') + second = _Field('REQUIRED', 'second', 'INTEGER') + third = _Field('REQUIRED', 'third', 'INTEGER') + col = _Field('REPEATED', 'col', 'RECORD', + fields=[first, second, third]) + row = {u'f': [{u'v': [ + {u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}}, + {u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}}, + ]}]} + self.assertEqual( + self._call_fut(row, schema=[col]), + ([ + {u'first': 1, u'second': 2, u'third': 3}, + {u'first': 4, u'second': 5, u'third': 6}, + ],)) + + def test_w_array_of_struct_w_array(self): + # SELECT [([1, 2, 3], 4), ([5, 6], 7)] + first = _Field('REPEATED', 'first', 'INTEGER') + second = _Field('REQUIRED', 'second', 'INTEGER') + col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second]) + row = {u'f': [{u'v': [ + {u'v': {u'f': [ + {u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}, + {u'v': u'4'} + ]}}, + {u'v': {u'f': [ + {u'v': [{u'v': u'5'}, {u'v': u'6'}]}, + {u'v': u'7'} + ]}} + ]}]} + self.assertEqual( + self._call_fut(row, schema=[col]), + ([ + {u'first': [1, 2, 3], u'second': 4}, + {u'first': [5, 6], u'second': 7}, + ],)) + + class Test_rows_from_json(unittest.TestCase): def _call_fut(self, value, field): @@ -253,12 +344,12 @@ def test_w_record_subfield(self): {'f': [ {'v': 'Phred Phlyntstone'}, {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}}, - {'v': ['orange', 'black']}, + {'v': [{'v': 'orange'}, {'v': 'black'}]}, ]}, {'f': [ {'v': 'Bharney Rhubble'}, {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}}, - {'v': ['brown']}, + {'v': [{'v': 'brown'}]}, ]}, {'f': [ {'v': 'Wylma Phlyntstone'}, diff --git a/bigquery/unit_tests/test_table.py b/bigquery/unit_tests/test_table.py index 73fd84cec5e3..9fcea12d2dce 100644 --- a/bigquery/unit_tests/test_table.py +++ b/bigquery/unit_tests/test_table.py @@ -1173,22 +1173,27 @@ def test_fetch_data_w_repeated_fields(self): 'pageToken': TOKEN, 'rows': [ {'f': [ - {'v': ['red', 'green']}, - {'v': [{'f': [{'v': ['1', '2']}, - {'v': ['3.1415', '1.414']}]}]}, + {'v': [{'v': 'red'}, {'v': 'green'}]}, + {'v': [{ + 'v': { + 'f': [ + {'v': [{'v': '1'}, {'v': '2'}]}, + {'v': [{'v': '3.1415'}, {'v': '1.414'}]}, + ]} + }]}, ]}, ] } conn = _Connection(DATA) client = _Client(project=self.PROJECT, connection=conn) dataset = _Dataset(client) - full_name = SchemaField('color', 'STRING', mode='REPEATED') + color = SchemaField('color', 'STRING', mode='REPEATED') index = SchemaField('index', 'INTEGER', 'REPEATED') score = SchemaField('score', 'FLOAT', 'REPEATED') struct = SchemaField('struct', 'RECORD', mode='REPEATED', fields=[index, score]) table = self._make_one(self.TABLE_NAME, dataset=dataset, - schema=[full_name, struct]) + schema=[color, struct]) iterator = table.fetch_data() page = six.next(iterator.pages) diff --git a/system_tests/bigquery.py b/system_tests/bigquery.py index 0020672c63a8..bf913d499afb 100644 --- a/system_tests/bigquery.py +++ b/system_tests/bigquery.py @@ -478,3 +478,49 @@ def _job_done(instance): # them here. The best we can do is not that the API call didn't # raise an error, and that the job completed (in the `retry()` # above). + + def test_sync_query_w_nested_arrays_and_structs(self): + EXAMPLES = [ + { + 'sql': 'SELECT 1', + 'expected': 1, + }, + { + 'sql': 'SELECT (1, 2)', + 'expected': {'_field_1': 1, '_field_2': 2}, + }, + { + 'sql': 'SELECT [1, 2, 3]', + 'expected': [1, 2, 3], + }, + { + 'sql': 'SELECT ([1, 2], 3, [4, 5])', + 'expected': + {'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]}, + }, + { + 'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]', + 'expected': [ + {'_field_1': 1, '_field_2': 2, '_field_3': 3}, + {'_field_1': 4, '_field_2': 5, '_field_3': 6}, + ], + }, + { + 'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]', + 'expected': [ + {u'_field_1': [1, 2, 3], u'_field_2': 4}, + {u'_field_1': [5, 6], u'_field_2': 7}, + ], + }, + { + 'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))', + 'expected': [{u'_field_1': [1, 2]}], + }, + ] + for example in EXAMPLES: + query = Config.CLIENT.run_sync_query(example['sql']) + query.use_legacy_sql = False + query.run() + self.assertEqual(len(query.rows), 1) + self.assertEqual(len(query.rows[0]), 1) + self.assertEqual(query.rows[0][0], example['expected'])