diff --git a/docs/reading.rst b/docs/reading.rst index c5e814bf..6361280a 100644 --- a/docs/reading.rst +++ b/docs/reading.rst @@ -28,7 +28,7 @@ destination DataFrame as well as a preferred column order as follows: 'SELECT * FROM `test_dataset.test_table`', project_id=projectid, index_col='index_column_name', - col_order=['col1', 'col2', 'col3']) + columns=['col1', 'col2']) Querying with legacy SQL syntax ------------------------------- diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 3d43884a..d4a8d2b7 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -734,7 +734,7 @@ def read_gbq( query_or_table, project_id=None, index_col=None, - col_order=None, + columns=None, reauth=False, auth_local_webserver=True, dialect=None, @@ -750,6 +750,8 @@ def read_gbq( auth_redirect_uri=None, client_id=None, client_secret=None, + *, + col_order=None, ): r"""Load data from Google BigQuery using google-cloud-python @@ -773,7 +775,7 @@ def read_gbq( the environment. index_col : str, optional Name of result column to use for index in results DataFrame. - col_order : list(str), optional + columns : list(str), optional List of BigQuery column names in the desired order for results DataFrame. reauth : boolean, default False @@ -888,6 +890,8 @@ def read_gbq( client_secret : str The Client Secret associated with the Client ID for the Google Cloud Project the user is attempting to connect to. + col_order : list(str), optional + Alias for columns, retained for backwards compatibility. Returns ------- @@ -966,10 +970,19 @@ def read_gbq( 'Index column "{0}" does not exist in DataFrame.'.format(index_col) ) + # Using columns as an alias for col_order, raising an error if both provided + if col_order and not columns: + columns = col_order + elif col_order and columns: + raise ValueError( + "Must specify either columns (preferred) or col_order, not both" + ) + # Change the order of columns in the DataFrame based on provided list - if col_order is not None: - if sorted(col_order) == sorted(final_df.columns): - final_df = final_df[col_order] + # TODO(kiraksi): allow columns to be a subset of all columns in the table, with follow up PR + if columns is not None: + if sorted(columns) == sorted(final_df.columns): + final_df = final_df[columns] else: raise InvalidColumnOrder("Column order does not match this DataFrame.") diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 9aac2357..bc078264 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -600,6 +600,40 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, project_id): ) assert df["max_year"][0] >= 2000 + def test_columns_as_alias(self, project_id): + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + columns = ["string_2", "string_1", "string_3"] + + df = gbq.read_gbq( + query, + project_id=project_id, + columns=columns, + credentials=self.credentials, + dialect="standard", + ) + + expected = DataFrame({"string_1": ["a"], "string_2": ["b"], "string_3": ["c"]})[ + columns + ] + + # Verify that the result_frame matches the expected DataFrame + tm.assert_frame_equal(df, expected) + + def test_columns_and_col_order_raises_error(self, project_id): + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + columns = ["string_2", "string_1"] + col_order = ["string_3", "string_1", "string_2"] + + with pytest.raises(ValueError): + gbq.read_gbq( + query, + project_id=project_id, + columns=columns, + col_order=col_order, + credentials=self.credentials, + dialect="standard", + ) + class TestToGBQIntegration(object): @pytest.fixture(autouse=True, scope="function")