diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fd23b89365496..ed77a210b6913 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -813,17 +813,50 @@ def replace( ) return blocks - def _replace_single( + def _replace_regex( self, to_replace, value, inplace: bool = False, - regex: bool = False, convert: bool = True, mask=None, ) -> List["Block"]: - """ no-op on a non-ObjectBlock """ - return [self] if inplace else [self.copy()] + """ + Replace elements by the given value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + List[Block] + """ + if not self._can_hold_element(to_replace): + # i.e. only ObjectBlock, but could in principle include a + # String ExtensionBlock + return [self] if inplace else [self.copy()] + + rx = re.compile(to_replace) + + new_values = self.values if inplace else self.values.copy() + replace_regex(new_values, rx, value, mask) + + block = self.make_block(new_values) + if convert: + nbs = block.convert(numeric=False) + else: + nbs = [block] + return nbs def _replace_list( self, @@ -1598,14 +1631,16 @@ def _replace_coerce( self = self.coerce_to_target_dtype(value) return self.putmask(mask, value, inplace=inplace) else: - return self._replace_single( - to_replace, - value, - inplace=inplace, - regex=regex, - convert=False, - mask=mask, - ) + regex = _should_use_regex(regex, to_replace) + if regex: + return self._replace_regex( + to_replace, + value, + inplace=inplace, + convert=False, + mask=mask, + ) + return self.replace(to_replace, value, inplace=inplace, regex=False) return [self] @@ -2506,72 +2541,26 @@ def replace( # here with listlike to_replace or value, as those cases # go through _replace_list - if is_re(to_replace) or regex: - return self._replace_single(to_replace, value, inplace=inplace, regex=True) - else: - return super().replace(to_replace, value, inplace=inplace, regex=regex) - - def _replace_single( - self, - to_replace, - value, - inplace: bool = False, - regex: bool = False, - convert: bool = True, - mask=None, - ) -> List["Block"]: - """ - Replace elements by the given value. - - Parameters - ---------- - to_replace : object or pattern - Scalar to replace or regular expression to match. - value : object - Replacement object. - inplace : bool, default False - Perform inplace modification. - regex : bool, default False - If true, perform regular expression substitution. - convert : bool, default True - If true, try to coerce any object types to better types. - mask : array-like of bool, optional - True indicate corresponding element is ignored. - - Returns - ------- - List[Block] - """ - inplace = validate_bool_kwarg(inplace, "inplace") - - # to_replace is regex compilable - regex = regex and is_re_compilable(to_replace) + regex = _should_use_regex(regex, to_replace) - # try to get the pattern attribute (compiled re) or it's a string - if is_re(to_replace): - pattern = to_replace.pattern + if regex: + return self._replace_regex(to_replace, value, inplace=inplace) else: - pattern = to_replace + return super().replace(to_replace, value, inplace=inplace, regex=False) - # if the pattern is not empty and to_replace is either a string or a - # regex - if regex and pattern: - rx = re.compile(to_replace) - else: - # if the thing to replace is not a string or compiled regex call - # the superclass method -> to_replace is some kind of object - return super().replace(to_replace, value, inplace=inplace, regex=regex) - new_values = self.values if inplace else self.values.copy() - replace_regex(new_values, rx, value, mask) +def _should_use_regex(regex: bool, to_replace: Any) -> bool: + """ + Decide whether to treat `to_replace` as a regular expression. + """ + if is_re(to_replace): + regex = True - # convert - block = self.make_block(new_values) - if convert: - nbs = block.convert(numeric=False) - else: - nbs = [block] - return nbs + regex = regex and is_re_compilable(to_replace) + + # Don't use regex if the pattern is empty. + regex = regex and re.compile(to_replace).pattern != "" + return regex class CategoricalBlock(ExtensionBlock):