From a85a386b780856842dd81155f7ea066df5ccca6d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Dec 2022 21:36:16 -0800 Subject: [PATCH] REF: avoid _with_infer constructor (#50001) --- pandas/_testing/__init__.py | 2 +- pandas/core/algorithms.py | 2 +- pandas/core/indexes/base.py | 17 +++++++++-------- pandas/core/indexes/multi.py | 2 +- pandas/core/strings/accessor.py | 2 +- pandas/core/util/hashing.py | 4 +--- pandas/tests/arithmetic/test_numeric.py | 6 ++++++ pandas/tests/arrays/integer/test_dtypes.py | 2 +- pandas/tests/extension/base/groupby.py | 4 ++-- pandas/tests/extension/test_string.py | 2 +- pandas/tests/io/test_stata.py | 3 +-- 11 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 02ee13d60427e..43020ae471f10 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -274,7 +274,7 @@ def box_expected(expected, box_cls, transpose: bool = True): else: expected = pd.array(expected, copy=False) elif box_cls is Index: - expected = Index._with_infer(expected) + expected = Index(expected) elif box_cls is Series: expected = Series(expected) elif box_cls is DataFrame: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c94b1068e5e65..cd719a5256ea3 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -894,7 +894,7 @@ def value_counts( # For backwards compatibility, we let Index do its normal type # inference, _except_ for if if infers from object to bool. - idx = Index._with_infer(keys) + idx = Index(keys) if idx.dtype == bool and keys.dtype == object: idx = idx.astype(object) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 01a1ebd459616..0b55416d2bd7e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2678,6 +2678,7 @@ def fillna(self, value=None, downcast=None): if downcast is None: # no need to care metadata other than name # because it can't have freq if it has NaTs + # _with_infer needed for test_fillna_categorical return Index._with_infer(result, name=self.name) raise NotImplementedError( f"{type(self).__name__}.fillna does not support 'downcast' " @@ -4230,10 +4231,10 @@ def _reindex_non_unique( new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) new_indexer[~check] = -1 - if isinstance(self, ABCMultiIndex): - new_index = type(self).from_tuples(new_labels, names=self.names) + if not isinstance(self, ABCMultiIndex): + new_index = Index(new_labels, name=self.name) else: - new_index = Index._with_infer(new_labels, name=self.name) + new_index = type(self).from_tuples(new_labels, names=self.names) return new_index, indexer, new_indexer # -------------------------------------------------------------------- @@ -6477,7 +6478,7 @@ def insert(self, loc: int, item) -> Index: if self._typ == "numericindex": # Use self._constructor instead of Index to retain NumericIndex GH#43921 # TODO(2.0) can use Index instead of self._constructor - return self._constructor._with_infer(new_values, name=self.name) + return self._constructor(new_values, name=self.name) else: return Index._with_infer(new_values, name=self.name) @@ -6850,7 +6851,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: if len(sequences) == 1: if names is not None: names = names[0] - return Index._with_infer(sequences[0], name=names) + return Index(sequences[0], name=names) else: return MultiIndex.from_arrays(sequences, names=names) @@ -6893,7 +6894,7 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: if isinstance(index_like, ABCSeries): name = index_like.name - return Index._with_infer(index_like, name=name, copy=copy) + return Index(index_like, name=name, copy=copy) if is_iterator(index_like): index_like = list(index_like) @@ -6909,9 +6910,9 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: return MultiIndex.from_arrays(index_like) else: - return Index._with_infer(index_like, copy=copy, tupleize_cols=False) + return Index(index_like, copy=copy, tupleize_cols=False) else: - return Index._with_infer(index_like, copy=copy) + return Index(index_like, copy=copy) def ensure_has_len(seq): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f0b0ec23dba1a..012a92793acf9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2112,7 +2112,7 @@ def append(self, other): # setting names to None automatically return MultiIndex.from_tuples(new_tuples) except (TypeError, IndexError): - return Index._with_infer(new_tuples) + return Index(new_tuples) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: if len(args) == 0 and len(kwargs) == 0: diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 71a50c69bfee1..8cd4cb976503d 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -319,7 +319,7 @@ def cons_row(x): out = out.get_level_values(0) return out else: - return Index._with_infer(result, name=name) + return Index(result, name=name) else: index = self._orig.index # This is a mess. diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 5a5e46e0227aa..e0b18047aa0ec 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -344,9 +344,7 @@ def _hash_ndarray( ) codes, categories = factorize(vals, sort=False) - cat = Categorical( - codes, Index._with_infer(categories), ordered=False, fastpath=True - ) + cat = Categorical(codes, Index(categories), ordered=False, fastpath=True) return _hash_categorical(cat, encoding, hash_key) try: diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 529dd6baa70c0..f2af85c2e388d 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1147,6 +1147,9 @@ def test_numarr_with_dtype_add_nan(self, dtype, box_with_array): ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) + if box is Index and dtype is object: + # TODO: avoid this; match behavior with Series + expected = expected.astype(np.float64) result = np.nan + ser tm.assert_equal(result, expected) @@ -1162,6 +1165,9 @@ def test_numarr_with_dtype_add_int(self, dtype, box_with_array): ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) + if box is Index and dtype is object: + # TODO: avoid this; match behavior with Series + expected = expected.astype(np.int64) result = 1 + ser tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1566476c32989..f34953876f5f4 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -89,7 +89,7 @@ def test_astype_index(all_data, dropna): other = all_data dtype = all_data.dtype - idx = pd.Index._with_infer(np.array(other)) + idx = pd.Index(np.array(other)) assert isinstance(idx, ABCIndex) result = idx.astype(dtype) diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 1f46442ee13b0..339c6560d6212 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -33,7 +33,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): _, uniques = pd.factorize(data_for_grouping, sort=True) if as_index: - index = pd.Index._with_infer(uniques, name="B") + index = pd.Index(uniques, name="B") expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") self.assert_series_equal(result, expected) else: @@ -61,7 +61,7 @@ def test_groupby_extension_no_sort(self, data_for_grouping): result = df.groupby("B", sort=False).A.mean() _, index = pd.factorize(data_for_grouping, sort=False) - index = pd.Index._with_infer(index, name="B") + index = pd.Index(index, name="B") expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A") self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index ecc69113882c5..de7967a8578b5 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -391,7 +391,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): _, uniques = pd.factorize(data_for_grouping, sort=True) if as_index: - index = pd.Index._with_infer(uniques, name="B") + index = pd.Index(uniques, name="B") expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") self.assert_series_equal(result, expected) else: diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 535c2d3e7e0f3..530934df72606 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -20,7 +20,6 @@ DataFrame, Series, ) -from pandas.core.indexes.api import ensure_index from pandas.tests.io.test_compression import _compression_to_extension from pandas.io.parsers import read_csv @@ -1144,7 +1143,7 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame: if is_categorical_dtype(ser.dtype): cat = ser._values.remove_unused_categories() if cat.categories.dtype == object: - categories = ensure_index(cat.categories._values) + categories = pd.Index._with_infer(cat.categories._values) cat = cat.set_categories(categories) from_frame[col] = cat return from_frame