diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ac229c5f50d58..59cea8fc8d99c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1592,16 +1592,21 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series: if isinstance(other, DataFrame): return self._constructor( - np.dot(lvals, rvals), index=left.index, columns=other.columns + np.dot(lvals, rvals), + index=left.index, + columns=other.columns, + copy=False, ) elif isinstance(other, Series): - return self._constructor_sliced(np.dot(lvals, rvals), index=left.index) + return self._constructor_sliced( + np.dot(lvals, rvals), index=left.index, copy=False + ) elif isinstance(rvals, (np.ndarray, Index)): result = np.dot(lvals, rvals) if result.ndim == 2: - return self._constructor(result, index=left.index) + return self._constructor(result, index=left.index, copy=False) else: - return self._constructor_sliced(result, index=left.index) + return self._constructor_sliced(result, index=left.index, copy=False) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") @@ -3571,9 +3576,15 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: else: new_arr = self.values.T - if copy: + if copy and not using_copy_on_write(): new_arr = new_arr.copy() - result = self._constructor(new_arr, index=self.columns, columns=self.index) + result = self._constructor( + new_arr, + index=self.columns, + columns=self.index, + # We already made a copy (more than one block) + copy=False, + ) return result.__finalize__(self, method="transpose") @@ -3795,7 +3806,7 @@ def _getitem_multilevel(self, key): else: new_values = self._values[:, loc] result = self._constructor( - new_values, index=self.index, columns=result_columns + new_values, index=self.index, columns=result_columns, copy=False ) if using_copy_on_write() and isinstance(loc, slice): result._mgr.add_references(self._mgr) # type: ignore[arg-type] @@ -4029,7 +4040,7 @@ def _setitem_frame(self, key, value): if isinstance(key, np.ndarray): if key.shape != self.shape: raise ValueError("Array conditional must be same shape as self") - key = self._constructor(key, **self._construct_axes_dict()) + key = self._constructor(key, **self._construct_axes_dict(), copy=False) if key.size and not all(is_bool_dtype(dtype) for dtype in key.dtypes): raise TypeError( @@ -4939,7 +4950,9 @@ def _reindex_multi( # condition more specific. indexer = row_indexer, col_indexer new_values = take_2d_multi(self.values, indexer, fill_value=fill_value) - return self._constructor(new_values, index=new_index, columns=new_columns) + return self._constructor( + new_values, index=new_index, columns=new_columns, copy=False + ) else: return self._reindex_with_indexers( {0: [new_index, row_indexer], 1: [new_columns, col_indexer]}, @@ -10060,7 +10073,7 @@ def corr( f"'{method}' was supplied" ) - result = self._constructor(correl, index=idx, columns=cols) + result = self._constructor(correl, index=idx, columns=cols, copy=False) return result.__finalize__(self, method="corr") def cov( @@ -10191,7 +10204,7 @@ def cov( else: base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods) - result = self._constructor(base_cov, index=idx, columns=cols) + result = self._constructor(base_cov, index=idx, columns=cols, copy=False) return result.__finalize__(self, method="cov") def corrwith( @@ -10304,7 +10317,9 @@ def c(x): return nanops.nancorr(x[0], x[1], method=method) correl = self._constructor_sliced( - map(c, zip(left.values.T, right.values.T)), index=left.columns + map(c, zip(left.values.T, right.values.T)), + index=left.columns, + copy=False, ) else: @@ -10415,7 +10430,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False): series_counts = notna(frame).sum(axis=axis) counts = series_counts._values result = self._constructor_sliced( - counts, index=frame._get_agg_axis(axis) + counts, index=frame._get_agg_axis(axis), copy=False ) return result.astype("int64").__finalize__(self, method="count") @@ -10524,7 +10539,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: middle = func(arr, axis=0, skipna=skipna) result = ufunc(result, middle) - res_ser = self._constructor_sliced(result, index=self.index) + res_ser = self._constructor_sliced(result, index=self.index, copy=False) return res_ser def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: @@ -11206,6 +11221,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: ).reshape(self.shape), self.index, self.columns, + copy=False, ) return result.__finalize__(self, method="isin") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8423a7d804af..497bacfa24812 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -779,6 +779,8 @@ def swapaxes( return self._constructor( new_values, *new_axes, + # The no-copy case for CoW is handled above + copy=False, ).__finalize__(self, method="swapaxes") @final @@ -9629,7 +9631,7 @@ def _where( cond = np.asanyarray(cond) if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") - cond = self._constructor(cond, **self._construct_axes_dict()) + cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) # make sure we are boolean fill_value = bool(inplace) @@ -9704,7 +9706,9 @@ def _where( # we are the same shape, so create an actual object for alignment else: - other = self._constructor(other, **self._construct_axes_dict()) + other = self._constructor( + other, **self._construct_axes_dict(), copy=False + ) if axis is None: axis = 0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 76debb2a61435..58e0954a56f97 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -840,7 +840,7 @@ def view(self, dtype: Dtype | None = None) -> Series: # self.array instead of self._values so we piggyback on PandasArray # implementation res_values = self.array.view(dtype) - res_ser = self._constructor(res_values, index=self.index) + res_ser = self._constructor(res_values, index=self.index, copy=False) if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write(): blk = res_ser._mgr._block blk.refs = cast("BlockValuesRefs", self._references) @@ -1073,7 +1073,7 @@ def _get_values_tuple(self, key: tuple): # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) - new_ser = self._constructor(self._values[indexer], index=new_index) + new_ser = self._constructor(self._values[indexer], index=new_index, copy=False) if using_copy_on_write() and isinstance(indexer, slice): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) @@ -1113,7 +1113,9 @@ def _get_value(self, label, takeable: bool = False): new_index = mi[loc] new_index = maybe_droplevels(new_index, label) - new_ser = self._constructor(new_values, index=new_index, name=self.name) + new_ser = self._constructor( + new_values, index=new_index, name=self.name, copy=False + ) if using_copy_on_write() and isinstance(loc, slice): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) @@ -1413,7 +1415,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: nv.validate_repeat((), {"axis": axis}) new_index = self.index.repeat(repeats) new_values = self._values.repeat(repeats) - return self._constructor(new_values, index=new_index).__finalize__( + return self._constructor(new_values, index=new_index, copy=False).__finalize__( self, method="repeat" ) @@ -1579,7 +1581,7 @@ def reset_index( self.index = new_index else: return self._constructor( - self._values.copy(), index=new_index + self._values.copy(), index=new_index, copy=False ).__finalize__(self, method="reset_index") elif inplace: raise TypeError( @@ -2101,7 +2103,7 @@ def mode(self, dropna: bool = True) -> Series: # Ensure index is type stable (should always use int index) return self._constructor( - res_values, index=range(len(res_values)), name=self.name + res_values, index=range(len(res_values)), name=self.name, copy=False ) def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation @@ -2365,7 +2367,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: dtype: bool """ res = self._duplicated(keep=keep) - result = self._constructor(res, index=self.index) + result = self._constructor(res, index=self.index, copy=False) return result.__finalize__(self, method="duplicated") def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: @@ -2543,7 +2545,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series: """ nv.validate_round(args, kwargs) result = self._values.round(decimals) - result = self._constructor(result, index=self.index).__finalize__( + result = self._constructor(result, index=self.index, copy=False).__finalize__( self, method="round" ) @@ -2844,7 +2846,7 @@ def diff(self, periods: int = 1) -> Series: {examples} """ result = algorithms.diff(self._values, periods) - return self._constructor(result, index=self.index).__finalize__( + return self._constructor(result, index=self.index, copy=False).__finalize__( self, method="diff" ) @@ -2962,7 +2964,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray: if isinstance(other, ABCDataFrame): return self._constructor( - np.dot(lvals, rvals), index=other.columns + np.dot(lvals, rvals), index=other.columns, copy=False ).__finalize__(self, method="dot") elif isinstance(other, Series): return np.dot(lvals, rvals) @@ -3264,7 +3266,7 @@ def combine( # try_float=False is to match agg_series npvalues = lib.maybe_convert_objects(new_values, try_float=False) res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) - return self._constructor(res_values, index=new_index, name=new_name) + return self._constructor(res_values, index=new_index, name=new_name, copy=False) def combine_first(self, other) -> Series: """ @@ -3615,7 +3617,7 @@ def sort_values( return self.copy(deep=None) result = self._constructor( - self._values[sorted_index], index=self.index[sorted_index] + self._values[sorted_index], index=self.index[sorted_index], copy=False ) if ignore_index: @@ -3863,7 +3865,9 @@ def argsort( else: result = np.argsort(values, kind=kind) - res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) + res = self._constructor( + result, index=self.index, name=self.name, dtype=np.intp, copy=False + ) return res.__finalize__(self, method="argsort") def nlargest( @@ -4238,7 +4242,7 @@ def explode(self, ignore_index: bool = False) -> Series: else: index = self.index.repeat(counts) - return self._constructor(values, index=index, name=self.name) + return self._constructor(values, index=index, name=self.name, copy=False) def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame: """ @@ -4369,7 +4373,7 @@ def map( dtype: object """ new_values = self._map_values(arg, na_action=na_action) - return self._constructor(new_values, index=self.index).__finalize__( + return self._constructor(new_values, index=self.index, copy=False).__finalize__( self, method="map" ) @@ -4663,7 +4667,7 @@ def _reindex_indexer( new_values = algorithms.take_nd( self._values, indexer, allow_fill=True, fill_value=None ) - return self._constructor(new_values, index=new_index) + return self._constructor(new_values, index=new_index, copy=False) def _needs_reindex_multi(self, axes, method, level) -> bool: """ @@ -5378,7 +5382,7 @@ def isin(self, values) -> Series: dtype: bool """ result = algorithms.isin(self._values, values) - return self._constructor(result, index=self.index).__finalize__( + return self._constructor(result, index=self.index, copy=False).__finalize__( self, method="isin" )