Skip to content

Commit

Permalink
Merge pull request #1107 from jakevdp/col-names-2
Browse files Browse the repository at this point in the history
Appropriately handle non-string column names
  • Loading branch information
jakevdp authored Aug 30, 2018
2 parents 701c656 + b6486c2 commit afe7815
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Enhancements

- Better errors for non-string column names, as well as automatic conversion
of ``pandas.RangeIndex`` columns to strings (#1107)

### Backward-incompatible changes

### Maintenance
Expand Down
14 changes: 12 additions & 2 deletions altair/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def sanitize_dataframe(df):
"""Sanitize a DataFrame to prepare it for serialization.
* Make a copy
* Convert RangeIndex columns to strings
* Raise ValueError if column names are not strings
* Raise ValueError if it has a hierarchical index.
* Convert categoricals to strings.
* Convert np.bool_ dtypes to Python bool objects
Expand All @@ -101,9 +103,17 @@ def sanitize_dataframe(df):
"""
df = df.copy()

if isinstance(df.index, pd.core.index.MultiIndex):
if isinstance(df.columns, pd.RangeIndex):
df.columns = df.columns.astype(str)

for col in df.columns:
if not isinstance(col, six.string_types):
raise ValueError('Dataframe contains invalid column name: {0!r}. '
'Column names must be strings'.format(col))

if isinstance(df.index, pd.MultiIndex):
raise ValueError('Hierarchical indices not supported')
if isinstance(df.columns, pd.core.index.MultiIndex):
if isinstance(df.columns, pd.MultiIndex):
raise ValueError('Hierarchical indices not supported')

def to_list_if_array(val):
Expand Down
15 changes: 15 additions & 0 deletions altair/utils/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import pandas as pd
import six

from .. import infer_vegalite_type, sanitize_dataframe

Expand Down Expand Up @@ -80,6 +81,20 @@ def test_sanitize_dataframe():
assert df.equals(df2)


def test_sanitize_dataframe_colnames():
df = pd.DataFrame(np.arange(12).reshape(4, 3))

# Test that RangeIndex is converted to strings
df = sanitize_dataframe(df)
assert [isinstance(col, six.string_types) for col in df.columns]

# Test that non-string columns result in an error
df.columns = [4, 'foo', 'bar']
with pytest.raises(ValueError) as err:
sanitize_dataframe(df)
assert str(err.value).startswith('Dataframe contains invalid column name: 4.')


def test_sanitize_dataframe_timedelta():
df = pd.DataFrame({'r': pd.timedelta_range(start='1 day', periods=4)})
with pytest.raises(ValueError) as err:
Expand Down

0 comments on commit afe7815

Please sign in to comment.