diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 19e3eed2d3444..6d0a522becddc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1603,16 +1603,21 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: if isinstance(other, DataFrame): return self._constructor( - np.dot(lvals, rvals), index=left.index, columns=other.columns + np.dot(lvals, rvals), + index=left.index, + columns=other.columns, + copy=False, ) elif isinstance(other, Series): - return self._constructor_sliced(np.dot(lvals, rvals), index=left.index) + return self._constructor_sliced( + np.dot(lvals, rvals), index=left.index, copy=False + ) elif isinstance(rvals, (np.ndarray, Index)): result = np.dot(lvals, rvals) if result.ndim == 2: - return self._constructor(result, index=left.index) + return self._constructor(result, index=left.index, copy=False) else: - return self._constructor_sliced(result, index=left.index) + return self._constructor_sliced(result, index=left.index, copy=False) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") @@ -3610,10 +3615,15 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: else: new_arr = self.values.T - if copy: + if copy and not using_copy_on_write(): new_arr = new_arr.copy() result = self._constructor( - new_arr, index=self.columns, columns=self.index, dtype=new_arr.dtype + new_arr, + index=self.columns, + columns=self.index, + dtype=new_arr.dtype, + # We already made a copy (more than one block) + copy=False, ) return result.__finalize__(self, method="transpose") @@ -3839,7 +3849,7 @@ def _getitem_multilevel(self, key): else: new_values = self._values[:, loc] result = self._constructor( - new_values, index=self.index, columns=result_columns + new_values, index=self.index, columns=result_columns, copy=False ) if using_copy_on_write() and isinstance(loc, slice): result._mgr.add_references(self._mgr) # type: ignore[arg-type] @@ -4079,7 +4089,7 @@ def _setitem_frame(self, key, value): if isinstance(key, np.ndarray): if key.shape != self.shape: raise ValueError("Array conditional must be same shape as self") - key = self._constructor(key, **self._construct_axes_dict()) + key = self._constructor(key, **self._construct_axes_dict(), copy=False) if key.size and not all(is_bool_dtype(dtype) for dtype in key.dtypes): raise TypeError( @@ -4997,7 +5007,9 @@ def _reindex_multi( # condition more specific. indexer = row_indexer, col_indexer new_values = take_2d_multi(self.values, indexer, fill_value=fill_value) - return self._constructor(new_values, index=new_index, columns=new_columns) + return self._constructor( + new_values, index=new_index, columns=new_columns, copy=False + ) else: return self._reindex_with_indexers( {0: [new_index, row_indexer], 1: [new_columns, col_indexer]}, @@ -10527,7 +10539,7 @@ def corr( f"'{method}' was supplied" ) - result = self._constructor(correl, index=idx, columns=cols) + result = self._constructor(correl, index=idx, columns=cols, copy=False) return result.__finalize__(self, method="corr") def cov( @@ -10658,7 +10670,7 @@ def cov( else: base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods) - result = self._constructor(base_cov, index=idx, columns=cols) + result = self._constructor(base_cov, index=idx, columns=cols, copy=False) return result.__finalize__(self, method="cov") def corrwith( @@ -10771,7 +10783,9 @@ def c(x): return nanops.nancorr(x[0], x[1], method=method) correl = self._constructor_sliced( - map(c, zip(left.values.T, right.values.T)), index=left.columns + map(c, zip(left.values.T, right.values.T)), + index=left.columns, + copy=False, ) else: @@ -10882,7 +10896,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False): series_counts = notna(frame).sum(axis=axis) counts = series_counts._values result = self._constructor_sliced( - counts, index=frame._get_agg_axis(axis) + counts, index=frame._get_agg_axis(axis), copy=False ) return result.astype("int64").__finalize__(self, method="count") @@ -10991,7 +11005,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: middle = func(arr, axis=0, skipna=skipna) result = ufunc(result, middle) - res_ser = self._constructor_sliced(result, index=self.index) + res_ser = self._constructor_sliced(result, index=self.index, copy=False) return res_ser def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: @@ -11673,6 +11687,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: ).reshape(self.shape), self.index, self.columns, + copy=False, ) return result.__finalize__(self, method="isin") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c3818a0a425b9..b070f1a9b7d90 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -779,6 +779,8 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self return self._constructor( new_values, *new_axes, + # The no-copy case for CoW is handled above + copy=False, ).__finalize__(self, method="swapaxes") @final @@ -9686,7 +9688,7 @@ def _where( cond = np.asanyarray(cond) if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") - cond = self._constructor(cond, **self._construct_axes_dict()) + cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) # make sure we are boolean fill_value = bool(inplace) @@ -9767,7 +9769,9 @@ def _where( # we are the same shape, so create an actual object for alignment else: - other = self._constructor(other, **self._construct_axes_dict()) + other = self._constructor( + other, **self._construct_axes_dict(), copy=False + ) if axis is None: axis = 0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 23d553c6fecc4..75a009447e037 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -843,7 +843,7 @@ def view(self, dtype: Dtype | None = None) -> Series: # self.array instead of self._values so we piggyback on PandasArray # implementation res_values = self.array.view(dtype) - res_ser = self._constructor(res_values, index=self.index) + res_ser = self._constructor(res_values, index=self.index, copy=False) if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write(): blk = res_ser._mgr._block blk.refs = cast("BlockValuesRefs", self._references) @@ -1044,7 +1044,7 @@ def _get_values_tuple(self, key: tuple): # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) - new_ser = self._constructor(self._values[indexer], index=new_index) + new_ser = self._constructor(self._values[indexer], index=new_index, copy=False) if using_copy_on_write() and isinstance(indexer, slice): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) @@ -1084,7 +1084,9 @@ def _get_value(self, label, takeable: bool = False): new_index = mi[loc] new_index = maybe_droplevels(new_index, label) - new_ser = self._constructor(new_values, index=new_index, name=self.name) + new_ser = self._constructor( + new_values, index=new_index, name=self.name, copy=False + ) if using_copy_on_write() and isinstance(loc, slice): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) @@ -1384,7 +1386,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: nv.validate_repeat((), {"axis": axis}) new_index = self.index.repeat(repeats) new_values = self._values.repeat(repeats) - return self._constructor(new_values, index=new_index).__finalize__( + return self._constructor(new_values, index=new_index, copy=False).__finalize__( self, method="repeat" ) @@ -1550,7 +1552,7 @@ def reset_index( self.index = new_index else: return self._constructor( - self._values.copy(), index=new_index + self._values.copy(), index=new_index, copy=False ).__finalize__(self, method="reset_index") elif inplace: raise TypeError( @@ -2072,7 +2074,7 @@ def mode(self, dropna: bool = True) -> Series: # Ensure index is type stable (should always use int index) return self._constructor( - res_values, index=range(len(res_values)), name=self.name + res_values, index=range(len(res_values)), name=self.name, copy=False ) def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation @@ -2336,7 +2338,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: dtype: bool """ res = self._duplicated(keep=keep) - result = self._constructor(res, index=self.index) + result = self._constructor(res, index=self.index, copy=False) return result.__finalize__(self, method="duplicated") def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: @@ -2514,7 +2516,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series: """ nv.validate_round(args, kwargs) result = self._values.round(decimals) - result = self._constructor(result, index=self.index).__finalize__( + result = self._constructor(result, index=self.index, copy=False).__finalize__( self, method="round" ) @@ -2820,7 +2822,7 @@ def diff(self, periods: int = 1) -> Series: {examples} """ result = algorithms.diff(self._values, periods) - return self._constructor(result, index=self.index).__finalize__( + return self._constructor(result, index=self.index, copy=False).__finalize__( self, method="diff" ) @@ -2938,7 +2940,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray: if isinstance(other, ABCDataFrame): return self._constructor( - np.dot(lvals, rvals), index=other.columns + np.dot(lvals, rvals), index=other.columns, copy=False ).__finalize__(self, method="dot") elif isinstance(other, Series): return np.dot(lvals, rvals) @@ -3167,7 +3169,7 @@ def combine( # try_float=False is to match agg_series npvalues = lib.maybe_convert_objects(new_values, try_float=False) res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) - return self._constructor(res_values, index=new_index, name=new_name) + return self._constructor(res_values, index=new_index, name=new_name, copy=False) def combine_first(self, other) -> Series: """ @@ -3528,7 +3530,7 @@ def sort_values( return self.copy(deep=None) result = self._constructor( - self._values[sorted_index], index=self.index[sorted_index] + self._values[sorted_index], index=self.index[sorted_index], copy=False ) if ignore_index: @@ -3776,7 +3778,9 @@ def argsort( else: result = np.argsort(values, kind=kind) - res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) + res = self._constructor( + result, index=self.index, name=self.name, dtype=np.intp, copy=False + ) return res.__finalize__(self, method="argsort") def nlargest( @@ -4151,7 +4155,7 @@ def explode(self, ignore_index: bool = False) -> Series: else: index = self.index.repeat(counts) - return self._constructor(values, index=index, name=self.name) + return self._constructor(values, index=index, name=self.name, copy=False) def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame: """ @@ -4282,7 +4286,7 @@ def map( dtype: object """ new_values = self._map_values(arg, na_action=na_action) - return self._constructor(new_values, index=self.index).__finalize__( + return self._constructor(new_values, index=self.index, copy=False).__finalize__( self, method="map" ) @@ -4576,7 +4580,7 @@ def _reindex_indexer( new_values = algorithms.take_nd( self._values, indexer, allow_fill=True, fill_value=None ) - return self._constructor(new_values, index=new_index) + return self._constructor(new_values, index=new_index, copy=False) def _needs_reindex_multi(self, axes, method, level) -> bool: """ @@ -5291,7 +5295,7 @@ def isin(self, values) -> Series: dtype: bool """ result = algorithms.isin(self._values, values) - return self._constructor(result, index=self.index).__finalize__( + return self._constructor(result, index=self.index, copy=False).__finalize__( self, method="isin" )