diff --git a/CHANGES.md b/CHANGES.md index be8148ea3..7269eece2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,9 @@ ### Enhancements +- Better errors for non-string column names, as well as automatic conversion + of ``pandas.RangeIndex`` columns to strings (#1107) + ### Backward-incompatible changes ### Maintenance diff --git a/altair/utils/core.py b/altair/utils/core.py index 006b4fd88..51c16589e 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -91,6 +91,8 @@ def sanitize_dataframe(df): """Sanitize a DataFrame to prepare it for serialization. * Make a copy + * Convert RangeIndex columns to strings + * Raise ValueError if column names are not strings * Raise ValueError if it has a hierarchical index. * Convert categoricals to strings. * Convert np.bool_ dtypes to Python bool objects @@ -101,9 +103,17 @@ def sanitize_dataframe(df): """ df = df.copy() - if isinstance(df.index, pd.core.index.MultiIndex): + if isinstance(df.columns, pd.RangeIndex): + df.columns = df.columns.astype(str) + + for col in df.columns: + if not isinstance(col, six.string_types): + raise ValueError('Dataframe contains invalid column name: {0!r}. ' + 'Column names must be strings'.format(col)) + + if isinstance(df.index, pd.MultiIndex): raise ValueError('Hierarchical indices not supported') - if isinstance(df.columns, pd.core.index.MultiIndex): + if isinstance(df.columns, pd.MultiIndex): raise ValueError('Hierarchical indices not supported') def to_list_if_array(val): diff --git a/altair/utils/tests/test_utils.py b/altair/utils/tests/test_utils.py index bd1245a51..8185ad2d2 100644 --- a/altair/utils/tests/test_utils.py +++ b/altair/utils/tests/test_utils.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +import six from .. import infer_vegalite_type, sanitize_dataframe @@ -80,6 +81,20 @@ def test_sanitize_dataframe(): assert df.equals(df2) +def test_sanitize_dataframe_colnames(): + df = pd.DataFrame(np.arange(12).reshape(4, 3)) + + # Test that RangeIndex is converted to strings + df = sanitize_dataframe(df) + assert [isinstance(col, six.string_types) for col in df.columns] + + # Test that non-string columns result in an error + df.columns = [4, 'foo', 'bar'] + with pytest.raises(ValueError) as err: + sanitize_dataframe(df) + assert str(err.value).startswith('Dataframe contains invalid column name: 4.') + + def test_sanitize_dataframe_timedelta(): df = pd.DataFrame({'r': pd.timedelta_range(start='1 day', periods=4)}) with pytest.raises(ValueError) as err: