Skip to content

Commit

Permalink
REF: remove axes from Managers
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Aug 17, 2022
1 parent ff441ec commit a017402
Show file tree
Hide file tree
Showing 14 changed files with 199 additions and 62 deletions.
5 changes: 3 additions & 2 deletions pandas/_libs/properties.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,10 @@ cdef class AxisProperty:
if obj is None:
# Only instances have _mgr, not classes
return self
if self.axis == 0:
return obj._index
else:
axes = obj._mgr.axes
return axes[self.axis]
return obj._columns

def __set__(self, obj, value):
obj._set_axis(self.axis, value)
6 changes: 6 additions & 0 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,12 @@ def _reconstruct(result):
return result
if isinstance(result, BlockManager):
# we went through BlockManager.apply e.g. np.sqrt
# TODO: any cases that aren't index/columns-preserving?
if self.ndim == 1:
reconstruct_kwargs["index"] = self.index
else:
reconstruct_kwargs["index"] = self.index
reconstruct_kwargs["columns"] = self.columns
result = self._constructor(result, **reconstruct_kwargs, copy=False)
else:
# we converted an array, lost our axes
Expand Down
47 changes: 31 additions & 16 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ class DataFrame(NDFrame, OpsMixin):
2 2 3
"""

_internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
_internal_names_set = {"_columns", "columns", "_index", "index"} | NDFrame._internal_names_set
_typ = "dataframe"
_HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray)
_accessors: set[str] = {"sparse"}
Expand Down Expand Up @@ -617,11 +617,20 @@ def __init__(
dtype = self._validate_dtype(dtype)

if isinstance(data, DataFrame):
if index is None and columns is None:
index = data.index
columns = data.columns
data = data._mgr

if isinstance(data, (BlockManager, ArrayManager)):
# first check if a Manager is passed without any other arguments
# -> use fastpath (without checking Manager type)
if index is None or columns is None:
assert False
if not index.equals(data.axes[-1]):#index is not data.axes[-1]:
assert False
if not columns.equals(data.axes[0]):#columns is not data.axes[0]:
assert False
if index is None and columns is None and dtype is None and not copy:
# GH#33357 fastpath
NDFrame.__init__(self, data)
Expand Down Expand Up @@ -747,7 +756,7 @@ def __init__(
index, # type: ignore[arg-type]
dtype,
)
mgr = arrays_to_mgr(
mgr, _, _ = arrays_to_mgr(
arrays,
columns,
index,
Expand Down Expand Up @@ -790,7 +799,7 @@ def __init__(
construct_1d_arraylike_from_scalar(data, len(index), dtype)
for _ in range(len(columns))
]
mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
mgr, _, _ = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
else:
arr2d = construct_2d_arraylike_from_scalar(
data,
Expand Down Expand Up @@ -2354,9 +2363,10 @@ def maybe_reorder(
columns = columns.drop(exclude)

manager = get_option("mode.data_manager")
mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
mgr, index, columns = arrays_to_mgr(arrays, columns, result_index, typ=manager)

return cls(mgr)
# FIXME: get axes without mgr.axes
return cls(mgr, index=index, columns=columns)

def to_records(
self, index: bool = True, column_dtypes=None, index_dtypes=None
Expand Down Expand Up @@ -2558,15 +2568,15 @@ def _from_arrays(
columns = ensure_index(columns)
if len(columns) != len(arrays):
raise ValueError("len(columns) must match len(arrays)")
mgr = arrays_to_mgr(
mgr, index, columns = arrays_to_mgr(
arrays,
columns,
index,
dtype=dtype,
verify_integrity=verify_integrity,
typ=manager,
)
return cls(mgr)
return cls(mgr, index=index, columns=columns)

@doc(
storage_options=_shared_docs["storage_options"],
Expand Down Expand Up @@ -3683,7 +3693,7 @@ def _ixs(self, i: int, axis: int = 0) -> Series:

# if we are a copy, mark as such
copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None
result = self._constructor_sliced(new_mgr, name=self.index[i]).__finalize__(
result = self._constructor_sliced(new_mgr, index=self.columns, name=self.index[i]).__finalize__(
self
)
result._set_is_copy(self, copy=copy)
Expand Down Expand Up @@ -4198,7 +4208,7 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
name = self.columns[loc]
klass = self._constructor_sliced
# We get index=self.index bc values is a SingleDataManager
return klass(values, name=name, fastpath=True).__finalize__(self)
return klass(values, name=name, index=self.index, fastpath=True).__finalize__(self)

# ----------------------------------------------------------------------
# Lookup Caching
Expand Down Expand Up @@ -6884,8 +6894,12 @@ def sort_values( # type: ignore[override]
new_data.set_axis(
self._get_block_manager_axis(axis), default_index(len(indexer))
)

result = self._constructor(new_data)
# FIXME: get axes without mgr.axes
axes_dict = {}
axes_dict["index"] = new_data.axes[-1]
if self.ndim == 2:
axes_dict["columns"] = new_data.axes[0]
result = self._constructor(new_data, **axes_dict)
if inplace:
return self._update_inplace(result)
else:
Expand Down Expand Up @@ -7569,7 +7583,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
# i.e. scalar, faster than checking np.ndim(right) == 0
with np.errstate(all="ignore"):
bm = self._mgr.apply(array_op, right=right)
return self._constructor(bm)
return self._constructor(bm, index=self.index, columns=self.columns)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
Expand All @@ -7590,7 +7604,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
right._mgr, # type: ignore[arg-type]
array_op,
)
return self._constructor(bm)
return self._constructor(bm, index=self.index, columns=self.columns)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
Expand Down Expand Up @@ -10833,7 +10847,8 @@ def _get_data() -> DataFrame:
# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager.reduce
res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
out = df._constructor(res).iloc[0]
# FIXME: get axes without mgr.axes
out = df._constructor(res, index=res.axes[1], columns=res.axes[0]).iloc[0]
if out_dtype is not None:
out = out.astype(out_dtype)
if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
Expand Down Expand Up @@ -11541,9 +11556,9 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
_info_axis_name = "columns"

index = properties.AxisProperty(
axis=1, doc="The index (row labels) of the DataFrame."
axis=0, doc="The index (row labels) of the DataFrame."
)
columns = properties.AxisProperty(axis=0, doc="The column labels of the DataFrame.")
columns = properties.AxisProperty(axis=1, doc="The column labels of the DataFrame.")

@property
def _AXIS_NUMBERS(self) -> dict[str, int]:
Expand Down
Loading

0 comments on commit a017402

Please sign in to comment.