Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Deprecate ordered=None for CategoricalDtype #26403

Merged
merged 14 commits into from
Jul 3, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ Deprecations
- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version.


.. _whatsnew_0250.prior_deprecations:
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,15 @@ def update_dtype(self, dtype):
new_categories = self.categories

new_ordered = dtype.ordered

# TODO(GH26336): remove this if block when ordered=None is removed
if new_ordered is None:
new_ordered = self.ordered
if self.ordered:
msg = ("ordered=None is deprecated and will default to False "
"in a future version; ordered=True must be explicitly "
"passed in order to be retained")
warnings.warn(msg, FutureWarning, stacklevel=3)

return CategoricalDtype(new_categories, new_ordered)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,10 @@ def is_copy(self, msg):
def _validate_dtype(self, dtype):
""" validate the passed dtype """

if dtype is not None:
# GH 26336: don't convert 'category' to CategoricalDtype
if isinstance(dtype, str) and dtype == 'category':
jschendel marked this conversation as resolved.
Show resolved Hide resolved
pass
elif dtype is not None:
dtype = pandas_dtype(dtype)

# a compound dtype
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,10 @@ def sanitize_array(data, index, dtype=None, copy=False,
Sanitize input data to an ndarray, copy if specified, coerce to the
dtype if specified.
"""
if dtype is not None:
# GH 26336: don't convert 'category' to CategoricalDtype
if isinstance(dtype, str) and dtype == 'category':
pass
elif dtype is not None:
dtype = pandas_dtype(dtype)

if isinstance(data, ma.MaskedArray):
Expand Down
7 changes: 6 additions & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,12 @@ def decode(obj):
return Interval(obj['left'], obj['right'], obj['closed'])
elif typ == 'series':
dtype = dtype_for(obj['dtype'])
pd_dtype = pandas_dtype(dtype)

# GH 26336: don't convert 'category' to CategoricalDtype
if isinstance(dtype, str) and dtype == 'category':
pd_dtype = dtype
else:
pd_dtype = pandas_dtype(dtype)

index = obj['index']
result = Series(unconvert(obj['data'], dtype, obj['compress']),
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/arrays/categorical/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,14 @@ def test_astype_category(self, dtype_ordered, cat_ordered):
expected = cat
tm.assert_categorical_equal(result, expected)

def test_astype_category_ordered_none_deprecated(self):
# GH 26336
cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True)
cdt2 = CategoricalDtype(categories=list('cedafb'))
cat = Categorical(list('abcdaba'), dtype=cdt1)
with tm.assert_produces_warning(FutureWarning):
cat.astype(cdt2)

def test_iter_python_types(self):
# GH-19909
cat = Categorical([1, 2])
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,14 @@ def test_update_dtype(self, ordered_fixture, new_categories, new_ordered):
if expected_ordered is None:
expected_ordered = dtype.ordered

result = dtype.update_dtype(new_dtype)
# GH 26336
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give some explanation on what you are testing here (the cases)

if new_ordered is None and ordered_fixture is True:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = dtype.update_dtype(new_dtype)
else:
result = dtype.update_dtype(new_dtype)

tm.assert_index_equal(result.categories, expected_categories)
assert result.ordered is expected_ordered

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,14 @@ def test_astype_category(self, name, dtype_ordered, index_ordered):
expected = index
tm.assert_index_equal(result, expected)

def test_astype_category_ordered_none_deprecated(self):
# GH 26336
cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True)
cdt2 = CategoricalDtype(categories=list('cedafb'))
idx = CategoricalIndex(list('abcdaba'), dtype=cdt1)
with tm.assert_produces_warning(FutureWarning):
idx.astype(cdt2)

def test_reindex_base(self):
# Determined by cat ordering.
idx = CategoricalIndex(list("cab"), categories=list("cab"))
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,33 @@ def test_constructor_categorical_dtype(self):
dtype=CategoricalDtype(['a', 'b'], ordered=True))
tm.assert_series_equal(result, expected, check_categorical=True)

def test_constructor_categorical_string(self):
# GH 26336: the string 'category' maintains existing CategoricalDtype
cdt = CategoricalDtype(categories=list('dabc'), ordered=True)
expected = Series(list('abcabc'), dtype=cdt)

# Series(Categorical, dtype='category') keeps existing dtype
cat = Categorical(list('abcabc'), dtype=cdt)
result = Series(cat, dtype='category')
tm.assert_series_equal(result, expected)

# Series(Series[Categorical], dtype='category') keeps existing dtype
result = Series(result, dtype='category')
tm.assert_series_equal(result, expected)

def test_categorical_ordered_none_deprecated(self):
# GH 26336
cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True)
cdt2 = CategoricalDtype(categories=list('cedafb'))

cat = Categorical(list('abcdaba'), dtype=cdt1)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Series(cat, dtype=cdt2)

s = Series(cat)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Series(s, dtype=cdt2)

def test_categorical_sideeffects_free(self):
# Passing a categorical to a Series and then changing values in either
# the series or the categorical should not change the values in the
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,14 @@ def test_astype_categories_deprecation(self):
result = s.astype('category', categories=['a', 'b'], ordered=True)
tm.assert_series_equal(result, expected)

def test_astype_category_ordered_none_deprecated(self):
# GH 26336
cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True)
cdt2 = CategoricalDtype(categories=list('cedafb'))
s = Series(list('abcdaba'), dtype=cdt1)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s.astype(cdt2)

def test_astype_from_categorical(self):
items = ["a", "b", "c", "a"]
s = Series(items)
Expand Down