Skip to content

Commit

Permalink
fix(automl): fix uploading pandas dataframe to AutoML Tables (#9647)
Browse files Browse the repository at this point in the history
pandas.dataframe.to_csv() by default exports data index as an column
with empty column name. This causes uploading the export csv file to
fail because AutoML Tables does not allow empty column names. Given
that the data index is not useful for training the model. This PR
fixes the problem by setting the index argument to false so that the
index is not exported.
  • Loading branch information
helinwang authored Nov 12, 2019
1 parent cb9cdc9 commit 8fdd2a4
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
7 changes: 6 additions & 1 deletion automl/google/cloud/automl_v1beta1/tables/gcs_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,12 @@ def upload_pandas_dataframe(self, dataframe, uploaded_csv_name=None):
uploaded_csv_name = "automl-tables-dataframe-{}.csv".format(
int(time.time())
)
csv_string = dataframe.to_csv()

# Setting index to False to ignore exporting the data index:
# 1. The resulting column name for the index column is empty, AutoML
# Tables does not allow empty column name
# 2. The index is not an useful training information
csv_string = dataframe.to_csv(index=False)

bucket = self.client.get_bucket(self.bucket_name)
blob = bucket.blob(uploaded_csv_name)
Expand Down
4 changes: 2 additions & 2 deletions automl/tests/unit/gapic/v1beta1/test_gcs_client_v1beta1.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_upload_pandas_dataframe(self):

gcs_client.client.get_bucket.assert_called_with("my-bucket")
mock_bucket.blob.assert_called_with("my-file.csv")
mock_blob.upload_from_string.assert_called_with(",col1,col2\n0,1,3\n1,2,4\n")
mock_blob.upload_from_string.assert_called_with("col1,col2\n1,3\n2,4\n")
assert gcs_uri == "gs://my-bucket/my-file.csv"

def test_upload_pandas_dataframe_no_csv_name(self):
Expand All @@ -156,7 +156,7 @@ def test_upload_pandas_dataframe_no_csv_name(self):

gcs_client.client.get_bucket.assert_called_with("my-bucket")
mock_bucket.blob.assert_called_with(generated_csv_name)
mock_blob.upload_from_string.assert_called_with(",col1,col2\n0,1,3\n1,2,4\n")
mock_blob.upload_from_string.assert_called_with("col1,col2\n1,3\n2,4\n")
assert re.match("^gs://my-bucket/automl-tables-dataframe-[0-9]*.csv$", gcs_uri)

def test_upload_pandas_dataframe_not_type_dataframe(self):
Expand Down

0 comments on commit 8fdd2a4

Please sign in to comment.