Skip to content

Commit

Permalink
SQUASGED [ArrayManager] Add SingleArrayManager to back a Series
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Mar 4, 2021
1 parent e742820 commit f466f0f
Show file tree
Hide file tree
Showing 15 changed files with 278 additions and 58 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ jobs:
pytest pandas/tests/resample/ --array-manager
pytest pandas/tests/reshape/merge --array-manager
pytest pandas/tests/series/methods --array-manager
pytest pandas/tests/series/test_* --array-manager
# indexing subset (temporary since other tests don't pass yet)
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager
pytest pandas/tests/frame/indexing/test_where.py --array-manager
Expand Down
12 changes: 9 additions & 3 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,20 @@ cdef class _BaseGrouper:
cached_typ = self.typ(
vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name
)
self.has_block = hasattr(cached_typ._mgr, "_block")
else:
# See the comment in indexes/base.py about _index_data.
# We need this for EA-backed indexes that have a reference
# to a 1-d ndarray like datetime / timedelta / period.
object.__setattr__(cached_ityp, '_index_data', islider.buf)
cached_ityp._engine.clear_mapping()
cached_ityp._cache.clear() # e.g. inferred_freq must go
object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf)
object.__setattr__(cached_typ._mgr._block, 'mgr_locs',
slice(len(vslider.buf)))
if self.has_block:
object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf)
object.__setattr__(cached_typ._mgr._block, 'mgr_locs',
slice(len(vslider.buf)))
else:
cached_typ._mgr.arrays[0] = vslider.buf
object.__setattr__(cached_typ, '_index', cached_ityp)
object.__setattr__(cached_typ, 'name', self.name)

Expand Down Expand Up @@ -108,6 +112,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
cdef public:
ndarray arr, index, dummy_arr, dummy_index
object values, f, bins, typ, ityp, name
bint has_block

def __init__(self, object series, object f, object bins):

Expand Down Expand Up @@ -201,6 +206,7 @@ cdef class SeriesGrouper(_BaseGrouper):
cdef public:
ndarray arr, index, dummy_arr, dummy_index
object f, labels, values, typ, ityp, name
bint has_block

def __init__(self, object series, object f, object labels,
Py_ssize_t ngroups):
Expand Down
3 changes: 3 additions & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
from pandas.core.internals import (
ArrayManager,
BlockManager,
SingleArrayManager,
SingleBlockManager,
)
from pandas.core.resample import Resampler
from pandas.core.series import Series
Expand Down Expand Up @@ -184,3 +186,4 @@

# internals
Manager = Union["ArrayManager", "BlockManager"]
SingleManager = Union["SingleArrayManager", "SingleBlockManager"]
2 changes: 1 addition & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,6 @@ def sanitize_array(
DataFrame constructor, as the dtype keyword there may be interpreted as only
applying to a subset of columns, see GH#24435.
"""

if isinstance(data, ma.MaskedArray):
data = sanitize_masked_array(data)

Expand Down Expand Up @@ -555,6 +554,7 @@ def sanitize_array(
inferred = lib.infer_dtype(subarr, skipna=False)
if inferred in {"interval", "period"}:
subarr = array(subarr)
subarr = extract_array(subarr, extract_numpy=True)

return subarr

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@
from pandas.core.internals import (
ArrayManager,
BlockManager,
SingleArrayManager,
)
from pandas.core.internals.construction import mgr_to_mgr
from pandas.core.missing import find_valid_index
Expand Down Expand Up @@ -5563,7 +5564,7 @@ def _protect_consolidate(self, f):
Consolidate _mgr -- if the blocks have changed, then clear the
cache
"""
if isinstance(self._mgr, ArrayManager):
if isinstance(self._mgr, (ArrayManager, SingleArrayManager)):
return f()
blocks_before = len(self._mgr.blocks)
result = f()
Expand Down
20 changes: 11 additions & 9 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1152,16 +1152,18 @@ def py_fallback(values: ArrayLike) -> ArrayLike:
result = result._consolidate()
assert isinstance(result, (Series, DataFrame)) # for mypy
mgr = result._mgr
assert isinstance(mgr, BlockManager)

# unwrap DataFrame to get array
if len(mgr.blocks) != 1:
# We've split an object block! Everything we've assumed
# about a single block input returning a single block output
# is a lie. See eg GH-39329
return mgr.as_array()
if isinstance(mgr, BlockManager):
# unwrap DataFrame to get array
if len(mgr.blocks) != 1:
# We've split an object block! Everything we've assumed
# about a single block input returning a single block output
# is a lie. See eg GH-39329
return mgr.as_array()
else:
result = mgr.blocks[0].values
return result
else:
result = mgr.blocks[0].values
result = mgr.arrays[0]
return result

def array_func(values: ArrayLike) -> ArrayLike:
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,7 +1590,11 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):

# if there is only one block/type, still have to take split path
# unless the block is one-dimensional or it can hold the value
if not take_split_path and self.obj._mgr.blocks and self.ndim > 1:
if (
not take_split_path
and getattr(self.obj._mgr, "blocks", False)
and self.ndim > 1
):
# in case of dict, keys are indices
val = list(value.values()) if isinstance(value, dict) else value
blk = self.obj._mgr.blocks[0]
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.base import DataManager
from pandas.core.internals.array_manager import (
ArrayManager,
SingleArrayManager,
)
from pandas.core.internals.base import (
DataManager,
SingleDataManager,
)
from pandas.core.internals.blocks import ( # io.pytables, io.packers
Block,
CategoricalBlock,
Expand Down Expand Up @@ -34,7 +40,9 @@
"DataManager",
"ArrayManager",
"BlockManager",
"SingleDataManager",
"SingleBlockManager",
"SingleArrayManager",
"concatenate_managers",
# those two are preserved here for downstream compatibility (GH-33892)
"create_block_manager_from_arrays",
Expand Down
Loading

0 comments on commit f466f0f

Please sign in to comment.