Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: pseudo-public internals API for downstream libraries #40182

Merged
merged 9 commits into from
Mar 5, 2021
4 changes: 2 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pandas.core.internals.api import make_block # pseudo-public version
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.base import DataManager
from pandas.core.internals.blocks import ( # io.pytables, io.packers
from pandas.core.internals.blocks import ( # io.pytables
Block,
CategoricalBlock,
DatetimeBlock,
Expand All @@ -10,7 +11,6 @@
NumericBlock,
ObjectBlock,
TimeDeltaBlock,
make_block,
)
from pandas.core.internals.concat import concatenate_managers
from pandas.core.internals.managers import (
Expand Down
61 changes: 61 additions & 0 deletions pandas/core/internals/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a test for this model that asserts the exact name that we expose, similar to https://github.com/pandas-dev/pandas/blob/master/pandas/tests/api/test_api.py

This is a pseudo-public API for downstream libraries. We ask that downstream
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have these import to pandas.api.internals i think (then you can really control the exports)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i know pyarrow accesses the pd.core.internals namespace. not sure about others. we can ask them to change, but for the forseeable future will need these in the namespace.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right what i mean is let's expose a wrapper api namespace and then we can change the downstream packages when we have released.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough. keep it in this file though? im wary of adding it to the pd.api namespace lest new downstream packages adopt bad habits

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok sure

authors

1) Try to avoid using internals directly altogether, and failing that,
2) Use only functions exposed here (or in core.internals)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally we remove 2 as soon as possible.


"""
from typing import Optional

import numpy as np

from pandas._typing import Dtype

from pandas.core.dtypes.common import is_datetime64tz_dtype
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.generic import ABCPandasArray

from pandas.core.arrays import DatetimeArray
from pandas.core.internals.blocks import (
Block,
DatetimeTZBlock,
get_block_type,
)


def make_block(
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
) -> Block:
"""
This is a pseudo-public analogue to blocks.new_block.

We ask that downstream libraries use this rather than any fully-internal
APIs, including but not limited to:

- core.internals.blocks.make_block
- Block.make_block
- Block.make_block_same_class
- Block.__init__
"""
if isinstance(values, ABCPandasArray):
# Ensure that we don't allow PandasArray / PandasDtype in internals.
# For now, blocks should be backed by ndarrays when possible.
values = values.to_numpy()
if ndim and ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
values = np.atleast_2d(values)

if isinstance(dtype, PandasDtype):
dtype = dtype.numpy_dtype

if klass is None:
dtype = dtype or values.dtype
klass = get_block_type(values, dtype)

elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
# TODO: This is no longer hit internally; does it need to be retained
# for e.g. pyarrow?
values = DatetimeArray._simple_new(values, dtype=dtype)

return klass(values, ndim=ndim, placement=placement)
7 changes: 4 additions & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
ensure_index,
)
from pandas.core.internals.base import DataManager
from pandas.core.internals.blocks import make_block
from pandas.core.internals.blocks import new_block

if TYPE_CHECKING:
from pandas.core.internals.managers import SingleBlockManager
Expand Down Expand Up @@ -422,7 +422,8 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:
arr = arr._data # type: ignore[union-attr]
if isinstance(arr, np.ndarray):
arr = np.atleast_2d(arr)
block = make_block(arr, placement=slice(0, 1, 1), ndim=2)

block = new_block(arr, placement=slice(0, 1, 1), ndim=2)
applied = getattr(block, f)(**kwargs)
if isinstance(applied, list):
applied = applied[0]
Expand Down Expand Up @@ -741,7 +742,7 @@ def iget(self, i: int) -> SingleBlockManager:
from pandas.core.internals.managers import SingleBlockManager

values = self.arrays[i]
block = make_block(values, placement=slice(0, len(values)), ndim=1)
block = new_block(values, placement=slice(0, len(values)), ndim=1)

return SingleBlockManager(block, self._axes[0])

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def make_block(self, values, placement=None) -> Block:
if self.is_extension:
values = ensure_block_shape(values, ndim=self.ndim)

return make_block(values, placement=placement, ndim=self.ndim)
return new_block(values, placement=placement, ndim=self.ndim)

@final
def make_block_same_class(self, values, placement=None) -> Block:
Expand Down Expand Up @@ -1434,7 +1434,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
new_values = new_values.T[mask]
new_placement = new_placement[mask]

blocks = [make_block(new_values, placement=new_placement, ndim=2)]
blocks = [new_block(new_values, placement=new_placement, ndim=2)]
return blocks, mask

def quantile(
Expand Down Expand Up @@ -1467,7 +1467,7 @@ def quantile(

result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)

return make_block(result, placement=self.mgr_locs, ndim=2)
return new_block(result, placement=self.mgr_locs, ndim=2)


class ExtensionBlock(Block):
Expand Down Expand Up @@ -1855,7 +1855,7 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
assert result.shape == (1, len(qs)), result.shape
result = type(self.values)._from_factorized(result[0], self.values)

return make_block(result, placement=self.mgr_locs, ndim=2)
return new_block(result, placement=self.mgr_locs, ndim=2)


class HybridMixin:
Expand Down Expand Up @@ -2322,7 +2322,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None):
return cls


def make_block(
def new_block(
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
) -> Block:
# Ensure that we don't allow PandasArray / PandasDtype in internals.
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
ExtensionArray,
)
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.blocks import make_block
from pandas.core.internals.blocks import new_block
from pandas.core.internals.managers import BlockManager

if TYPE_CHECKING:
Expand Down Expand Up @@ -144,10 +144,10 @@ def concatenate_managers(
# Fast-path
b = blk.make_block_same_class(values, placement=placement)
else:
b = make_block(values, placement=placement, ndim=blk.ndim)
b = new_block(values, placement=placement, ndim=blk.ndim)
else:
new_values = _concatenate_join_units(join_units, concat_axis, copy=copy)
b = make_block(new_values, placement=placement, ndim=len(axes))
b = new_block(new_values, placement=placement, ndim=len(axes))
blocks.append(b)

return BlockManager(blocks, axes)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
from pandas.core.internals.array_manager import ArrayManager
from pandas.core.internals.blocks import (
ensure_block_shape,
make_block,
new_block,
)
from pandas.core.internals.managers import (
BlockManager,
Expand Down Expand Up @@ -300,7 +300,7 @@ def ndarray_to_mgr(
# TODO: What about re-joining object columns?
dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list]
block_values = [
make_block(dvals_list[n], placement=[n], ndim=2)
new_block(dvals_list[n], placement=n, ndim=2)
for n in range(len(dvals_list))
]

Expand Down
32 changes: 16 additions & 16 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
ensure_block_shape,
extend_blocks,
get_block_type,
make_block,
new_block,
)
from pandas.core.internals.ops import (
blockwise_all,
Expand Down Expand Up @@ -319,7 +319,7 @@ def unpickle_block(values, mgr_locs, ndim: int):
# TODO(EA2D): ndim would be unnecessary with 2D EAs
# older pickles may store e.g. DatetimeIndex instead of DatetimeArray
values = extract_array(values, extract_numpy=True)
return make_block(values, placement=mgr_locs, ndim=ndim)
return new_block(values, placement=mgr_locs, ndim=ndim)

if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
state = state[3]["0.14.1"]
Expand Down Expand Up @@ -1155,7 +1155,7 @@ def value_getitem(placement):
# one item.
# TODO(EA2D): special casing unnecessary with 2D EAs
new_blocks.extend(
make_block(
new_block(
values=value,
ndim=self.ndim,
placement=slice(mgr_loc, mgr_loc + 1),
Expand All @@ -1171,7 +1171,7 @@ def value_getitem(placement):
unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])

new_blocks.append(
make_block(
new_block(
values=value_getitem(unfit_val_items),
ndim=self.ndim,
placement=unfit_mgr_locs,
Expand Down Expand Up @@ -1216,7 +1216,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
value = ensure_block_shape(value, ndim=2)

# TODO: type value as ArrayLike
block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
block = new_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))

for blkno, count in _fast_count_smallints(self.blknos[loc:]):
blk = self.blocks[blkno]
Expand Down Expand Up @@ -1443,7 +1443,7 @@ def _make_na_block(self, placement, fill_value=None):
dtype, fill_value = infer_dtype_from_scalar(fill_value)
block_values = np.empty(block_shape, dtype=dtype)
block_values.fill(fill_value)
return make_block(block_values, placement=placement, ndim=block_values.ndim)
return new_block(block_values, placement=placement, ndim=block_values.ndim)

def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
"""
Expand Down Expand Up @@ -1569,7 +1569,7 @@ def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
"""
Constructor for if we have an array that is not yet a Block.
"""
block = make_block(array, placement=slice(0, len(index)), ndim=1)
block = new_block(array, placement=slice(0, len(index)), ndim=1)
return cls(block, index)

def _post_setstate(self):
Expand Down Expand Up @@ -1671,7 +1671,7 @@ def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
# is basically "all items", but if there're many, don't bother
# converting, it's an error anyway.
blocks = [
make_block(
new_block(
values=blocks[0], placement=slice(0, len(axes[0])), ndim=2
)
]
Expand Down Expand Up @@ -1782,7 +1782,7 @@ def _form_blocks(

if len(items_dict["DatetimeTZBlock"]):
dttz_blocks = [
make_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
new_block(array, klass=DatetimeTZBlock, placement=i, ndim=2)
for i, array in items_dict["DatetimeTZBlock"]
]
blocks.extend(dttz_blocks)
Expand All @@ -1793,22 +1793,22 @@ def _form_blocks(

if len(items_dict["CategoricalBlock"]) > 0:
cat_blocks = [
make_block(array, klass=CategoricalBlock, placement=i, ndim=2)
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
for i, array in items_dict["CategoricalBlock"]
]
blocks.extend(cat_blocks)

if len(items_dict["ExtensionBlock"]):
external_blocks = [
make_block(array, klass=ExtensionBlock, placement=i, ndim=2)
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)
for i, array in items_dict["ExtensionBlock"]
]

blocks.extend(external_blocks)

if len(items_dict["ObjectValuesExtensionBlock"]):
external_blocks = [
make_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
new_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2)
for i, array in items_dict["ObjectValuesExtensionBlock"]
]

Expand All @@ -1821,7 +1821,7 @@ def _form_blocks(
block_values = np.empty(shape, dtype=object)
block_values.fill(np.nan)

na_block = make_block(block_values, placement=extra_locs, ndim=2)
na_block = new_block(block_values, placement=extra_locs, ndim=2)
blocks.append(na_block)

return blocks
Expand All @@ -1838,7 +1838,7 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
if dtype is not None and values.dtype != dtype: # pragma: no cover
values = values.astype(dtype)

block = make_block(values, placement=placement, ndim=2)
block = new_block(values, placement=placement, ndim=2)
return [block]


Expand All @@ -1852,7 +1852,7 @@ def _multi_blockify(tuples, dtype: Optional[Dtype] = None):

values, placement = _stack_arrays(list(tup_block), dtype)

block = make_block(values, placement=placement, ndim=2)
block = new_block(values, placement=placement, ndim=2)
new_blocks.append(block)

return new_blocks
Expand Down Expand Up @@ -1930,7 +1930,7 @@ def _merge_blocks(
new_values = new_values[argsort]
new_mgr_locs = new_mgr_locs[argsort]

return [make_block(new_values, placement=new_mgr_locs, ndim=2)]
return [new_block(new_values, placement=new_mgr_locs, ndim=2)]

# can't consolidate --> no merge
return blocks
Expand Down
Loading