Skip to content

Commit

Permalink
DEPR: CategoricalBlock; combine Block.replace methods (#40527)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 20, 2021
1 parent 38640d1 commit 09e2036
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 53 deletions.
17 changes: 15 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
)
from pandas.core.internals.blocks import ( # io.pytables, io.packers
Block,
CategoricalBlock,
DatetimeBlock,
DatetimeTZBlock,
ExtensionBlock,
Expand All @@ -28,7 +27,6 @@

__all__ = [
"Block",
"CategoricalBlock",
"NumericBlock",
"DatetimeBlock",
"DatetimeTZBlock",
Expand All @@ -48,3 +46,18 @@
"create_block_manager_from_arrays",
"create_block_manager_from_blocks",
]


def __getattr__(name: str):
import warnings

if name == "CategoricalBlock":
warnings.warn(
"CategoricalBlock is deprecated and will be removed in a future version. "
"Use ExtensionBlock instead.",
FutureWarning,
stacklevel=2,
)
return ExtensionBlock

raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")
64 changes: 22 additions & 42 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ def copy(self, deep: bool = True):
# ---------------------------------------------------------------------
# Replace

@final
def replace(
self,
to_replace,
Expand All @@ -687,15 +688,30 @@ def replace(
"""
inplace = validate_bool_kwarg(inplace, "inplace")

# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list

values = self.values

if isinstance(values, Categorical):
# TODO: avoid special-casing
blk = self if inplace else self.copy()
blk.values.replace(to_replace, value, inplace=True)
return [blk]

regex = should_use_regex(regex, to_replace)

if regex:
return self._replace_regex(to_replace, value, inplace=inplace)

if not self._can_hold_element(to_replace):
# We cannot hold `to_replace`, so we know immediately that
# replacing it is a no-op.
# Note: If to_replace were a list, NDFrame.replace would call
# replace_list instead of replace.
return [self] if inplace else [self.copy()]

values = self.values

mask = missing.mask_missing(values, to_replace)
if not mask.any():
# Note: we get here with test_replace_extension_other incorrectly
Expand All @@ -720,7 +736,7 @@ def replace(
else:
# split so that we only upcast where necessary
return self.split_and_operate(
type(self).replace, to_replace, value, inplace=inplace, regex=regex
type(self).replace, to_replace, value, inplace=True, regex=regex
)

@final
Expand Down Expand Up @@ -1223,7 +1239,7 @@ def take_nd(
Take values according to indexer and return them as a block.bb
"""
# algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
# algos.take_nd dispatches for DatetimeTZBlock
# so need to preserve types
# sparse is treated like an ndarray, but needs .get_values() shaping

Expand Down Expand Up @@ -1422,7 +1438,7 @@ class ExtensionBlock(Block):
Notes
-----
This holds all 3rd-party extension array types. It's also the immediate
parent class for our internal extension types' blocks, CategoricalBlock.
parent class for our internal extension types' blocks.
ExtensionArrays are limited to 1-D.
"""
Expand Down Expand Up @@ -1579,7 +1595,6 @@ def take_nd(

def _can_hold_element(self, element: Any) -> bool:
# TODO: We may need to think about pushing this onto the array.
# We're doing the same as CategoricalBlock here.
return True

def _slice(self, slicer):
Expand Down Expand Up @@ -2019,41 +2034,6 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]:
def _can_hold_element(self, element: Any) -> bool:
return True

def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List[Block]:
# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list

regex = should_use_regex(regex, to_replace)

if regex:
return self._replace_regex(to_replace, value, inplace=inplace)
else:
return super().replace(to_replace, value, inplace=inplace, regex=False)


class CategoricalBlock(ExtensionBlock):
__slots__ = ()

def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List[Block]:
inplace = validate_bool_kwarg(inplace, "inplace")
result = self if inplace else self.copy()

result.values.replace(to_replace, value, inplace=True)
return [result]


# -----------------------------------------------------------------
# Constructor Helpers
Expand Down Expand Up @@ -2116,7 +2096,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None):
# Need this first(ish) so that Sparse[datetime] is sparse
cls = ExtensionBlock
elif isinstance(dtype, CategoricalDtype):
cls = CategoricalBlock
cls = ExtensionBlock
elif vtype is Timestamp:
cls = DatetimeTZBlock
elif vtype is Interval or vtype is Period:
Expand Down
8 changes: 0 additions & 8 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
)
from pandas.core.internals.blocks import (
Block,
CategoricalBlock,
DatetimeTZBlock,
ExtensionBlock,
ObjectValuesExtensionBlock,
Expand Down Expand Up @@ -1867,13 +1866,6 @@ def _form_blocks(
object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_)
blocks.extend(object_blocks)

if len(items_dict["CategoricalBlock"]) > 0:
cat_blocks = [
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
for i, array in items_dict["CategoricalBlock"]
]
blocks.extend(cat_blocks)

if len(items_dict["ExtensionBlock"]):
external_blocks = [
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/internals/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def test_namespace():
]
expected = [
"Block",
"CategoricalBlock",
"NumericBlock",
"DatetimeBlock",
"DatetimeTZBlock",
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def test_read_expands_user_home_dir(
),
],
)
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
def test_read_fspath_all(self, reader, module, path, datapath):
pytest.importorskip(module)
path = datapath(*path)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

@filter_sparse
@pytest.mark.single
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
class TestFeather:
def check_error_on_write(self, df, exc, err_msg):
# check that we are raising the exception
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ def test_write_column_index_nonstring(self, pa):
self.check_error_on_write(df, engine, ValueError, msg)


@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
class TestParquetPyArrow(Base):
def test_basic(self, pa, df_full):

Expand Down

0 comments on commit 09e2036

Please sign in to comment.