Skip to content

Commit

Permalink
API: pseudo-public internals API for downstream libraries (#40182)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 5, 2021
1 parent 04a0b86 commit 6423561
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 47 deletions.
2 changes: 1 addition & 1 deletion pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pandas.core.internals.api import make_block # pseudo-public version
from pandas.core.internals.array_manager import (
ArrayManager,
SingleArrayManager,
Expand All @@ -16,7 +17,6 @@
NumericBlock,
ObjectBlock,
TimeDeltaBlock,
make_block,
)
from pandas.core.internals.concat import concatenate_managers
from pandas.core.internals.managers import (
Expand Down
61 changes: 61 additions & 0 deletions pandas/core/internals/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
This is a pseudo-public API for downstream libraries. We ask that downstream
authors
1) Try to avoid using internals directly altogether, and failing that,
2) Use only functions exposed here (or in core.internals)
"""
from typing import Optional

import numpy as np

from pandas._typing import Dtype

from pandas.core.dtypes.common import is_datetime64tz_dtype
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.generic import ABCPandasArray

from pandas.core.arrays import DatetimeArray
from pandas.core.internals.blocks import (
Block,
DatetimeTZBlock,
get_block_type,
)


def make_block(
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
) -> Block:
"""
This is a pseudo-public analogue to blocks.new_block.
We ask that downstream libraries use this rather than any fully-internal
APIs, including but not limited to:
- core.internals.blocks.make_block
- Block.make_block
- Block.make_block_same_class
- Block.__init__
"""
if isinstance(values, ABCPandasArray):
# Ensure that we don't allow PandasArray / PandasDtype in internals.
# For now, blocks should be backed by ndarrays when possible.
values = values.to_numpy()
if ndim and ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
values = np.atleast_2d(values)

if isinstance(dtype, PandasDtype):
dtype = dtype.numpy_dtype

if klass is None:
dtype = dtype or values.dtype
klass = get_block_type(values, dtype)

elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
# TODO: This is no longer hit internally; does it need to be retained
# for e.g. pyarrow?
values = DatetimeArray._simple_new(values, dtype=dtype)

return klass(values, ndim=ndim, placement=placement)
6 changes: 3 additions & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
DataManager,
SingleDataManager,
)
from pandas.core.internals.blocks import make_block
from pandas.core.internals.blocks import new_block

if TYPE_CHECKING:
from pandas import Float64Index
Expand Down Expand Up @@ -466,9 +466,9 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:
if self.ndim == 2:
if isinstance(arr, np.ndarray):
arr = np.atleast_2d(arr)
block = make_block(arr, placement=slice(0, 1, 1), ndim=2)
block = new_block(arr, placement=slice(0, 1, 1), ndim=2)
else:
block = make_block(arr, placement=slice(0, len(self), 1), ndim=1)
block = new_block(arr, placement=slice(0, len(self), 1), ndim=1)

applied = getattr(block, f)(**kwargs)
if isinstance(applied, list):
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def make_block(self, values, placement=None) -> Block:
if self.is_extension:
values = ensure_block_shape(values, ndim=self.ndim)

return make_block(values, placement=placement, ndim=self.ndim)
return new_block(values, placement=placement, ndim=self.ndim)

@final
def make_block_same_class(self, values, placement=None) -> Block:
Expand Down Expand Up @@ -1431,7 +1431,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
new_values = new_values.T[mask]
new_placement = new_placement[mask]

blocks = [make_block(new_values, placement=new_placement, ndim=2)]
blocks = [new_block(new_values, placement=new_placement, ndim=2)]
return blocks, mask

def quantile(
Expand Down Expand Up @@ -1460,7 +1460,7 @@ def quantile(

result = quantile_compat(self.values, qs, interpolation, axis)

return make_block(result, placement=self.mgr_locs, ndim=2)
return new_block(result, placement=self.mgr_locs, ndim=2)


class ExtensionBlock(Block):
Expand Down Expand Up @@ -2301,7 +2301,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None):
return cls


def make_block(
def new_block(
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
) -> Block:
# Ensure that we don't allow PandasArray / PandasDtype in internals.
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
ExtensionArray,
)
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.blocks import make_block
from pandas.core.internals.blocks import new_block
from pandas.core.internals.managers import BlockManager

if TYPE_CHECKING:
Expand Down Expand Up @@ -144,10 +144,10 @@ def concatenate_managers(
# Fast-path
b = blk.make_block_same_class(values, placement=placement)
else:
b = make_block(values, placement=placement, ndim=blk.ndim)
b = new_block(values, placement=placement, ndim=blk.ndim)
else:
new_values = _concatenate_join_units(join_units, concat_axis, copy=copy)
b = make_block(new_values, placement=placement, ndim=len(axes))
b = new_block(new_values, placement=placement, ndim=len(axes))
blocks.append(b)

return BlockManager(blocks, axes)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.blocks import (
ensure_block_shape,
make_block,
new_block,
)
from pandas.core.internals.managers import (
BlockManager,
Expand Down Expand Up @@ -300,7 +300,7 @@ def ndarray_to_mgr(
# TODO: What about re-joining object columns?
dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list]
block_values = [
make_block(dvals_list[n], placement=[n], ndim=2)
new_block(dvals_list[n], placement=n, ndim=2)
for n in range(len(dvals_list))
]

Expand Down
32 changes: 16 additions & 16 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
ensure_block_shape,
extend_blocks,
get_block_type,
make_block,
new_block,
)
from pandas.core.internals.ops import (
blockwise_all,
Expand Down Expand Up @@ -322,7 +322,7 @@ def unpickle_block(values, mgr_locs, ndim: int):
# TODO(EA2D): ndim would be unnecessary with 2D EAs
# older pickles may store e.g. DatetimeIndex instead of DatetimeArray
values = extract_array(values, extract_numpy=True)
return make_block(values, placement=mgr_locs, ndim=ndim)
return new_block(values, placement=mgr_locs, ndim=ndim)

if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
state = state[3]["0.14.1"]
Expand Down Expand Up @@ -1148,7 +1148,7 @@ def value_getitem(placement):
# one item.
# TODO(EA2D): special casing unnecessary with 2D EAs
new_blocks.extend(
make_block(
new_block(
values=value,
ndim=self.ndim,
placement=slice(mgr_loc, mgr_loc + 1),
Expand All @@ -1164,7 +1164,7 @@ def value_getitem(placement):
unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])

new_blocks.append(
make_block(
new_block(
values=value_getitem(unfit_val_items),
ndim=self.ndim,
placement=unfit_mgr_locs,
Expand Down Expand Up @@ -1209,7 +1209,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
value = ensure_block_shape(value, ndim=2)

# TODO: type value as ArrayLike
block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
block = new_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))

for blkno, count in _fast_count_smallints(self.blknos[loc:]):
blk = self.blocks[blkno]
Expand Down Expand Up @@ -1436,7 +1436,7 @@ def _make_na_block(self, placement, fill_value=None):
dtype, fill_value = infer_dtype_from_scalar(fill_value)
block_values = np.empty(block_shape, dtype=dtype)
block_values.fill(fill_value)
return make_block(block_values, placement=placement, ndim=block_values.ndim)
return new_block(block_values, placement=placement, ndim=block_values.ndim)

def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
"""
Expand Down Expand Up @@ -1562,7 +1562,7 @@ def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
"""
Constructor for if we have an array that is not yet a Block.
"""
block = make_block(array, placement=slice(0, len(index)), ndim=1)
block = new_block(array, placement=slice(0, len(index)), ndim=1)
return cls(block, index)

def _post_setstate(self):
Expand Down Expand Up @@ -1669,7 +1669,7 @@ def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
# is basically "all items", but if there're many, don't bother
# converting, it's an error anyway.
blocks = [
make_block(
new_block(
values=blocks[0], placement=slice(0, len(axes[0])), ndim=2
)
]
Expand Down Expand Up @@ -1780,7 +1780,7 @@ def _form_blocks(

if len(items_dict["DatetimeTZBlock"]):
dttz_blocks = [
make_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
new_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
for i, array in items_dict["DatetimeTZBlock"]
]
blocks.extend(dttz_blocks)
Expand All @@ -1791,22 +1791,22 @@ def _form_blocks(

if len(items_dict["CategoricalBlock"]) > 0:
cat_blocks = [
make_block(array, klass=CategoricalBlock, placement=i, ndim=2)
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
for i, array in items_dict["CategoricalBlock"]
]
blocks.extend(cat_blocks)

if len(items_dict["ExtensionBlock"]):
external_blocks = [
make_block(array, klass=ExtensionBlock, placement=i, ndim=2)
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)
for i, array in items_dict["ExtensionBlock"]
]

blocks.extend(external_blocks)

if len(items_dict["ObjectValuesExtensionBlock"]):
external_blocks = [
make_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
new_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
for i, array in items_dict["ObjectValuesExtensionBlock"]
]

Expand All @@ -1819,7 +1819,7 @@ def _form_blocks(
block_values = np.empty(shape, dtype=object)
block_values.fill(np.nan)

na_block = make_block(block_values, placement=extra_locs, ndim=2)
na_block = new_block(block_values, placement=extra_locs, ndim=2)
blocks.append(na_block)

return blocks
Expand All @@ -1836,7 +1836,7 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
if dtype is not None and values.dtype != dtype: # pragma: no cover
values = values.astype(dtype)

block = make_block(values, placement=placement, ndim=2)
block = new_block(values, placement=placement, ndim=2)
return [block]


Expand All @@ -1850,7 +1850,7 @@ def _multi_blockify(tuples, dtype: Optional[Dtype] = None):

values, placement = _stack_arrays(list(tup_block), dtype)

block = make_block(values, placement=placement, ndim=2)
block = new_block(values, placement=placement, ndim=2)
new_blocks.append(block)

return new_blocks
Expand Down Expand Up @@ -1928,7 +1928,7 @@ def _merge_blocks(
new_values = new_values[argsort]
new_mgr_locs = new_mgr_locs[argsort]

return [make_block(new_values, placement=new_mgr_locs, ndim=2)]
return [new_block(new_values, placement=new_mgr_locs, ndim=2)]

# can't consolidate --> no merge
return blocks
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/internals/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Tests for the pseudo-public API implemented in internals/api.py and exposed
in core.internals
"""

from pandas.core import internals
from pandas.core.internals import api


def test_internals_api():
assert internals.make_block is api.make_block


def test_namespace():
# SUBJECT TO CHANGE

modules = [
"blocks",
"concat",
"managers",
"construction",
"array_manager",
"base",
"api",
"ops",
]
expected = [
"Block",
"CategoricalBlock",
"NumericBlock",
"DatetimeBlock",
"DatetimeTZBlock",
"ExtensionBlock",
"FloatBlock",
"ObjectBlock",
"TimeDeltaBlock",
"make_block",
"DataManager",
"ArrayManager",
"BlockManager",
"SingleDataManager",
"SingleBlockManager",
"SingleArrayManager",
"concatenate_managers",
"create_block_manager_from_arrays",
"create_block_manager_from_blocks",
]

result = [x for x in dir(internals) if not x.startswith("__")]
assert set(result) == set(expected + modules)
Loading

0 comments on commit 6423561

Please sign in to comment.