Skip to content

Commit

Permalink
CLN: unify logic for form_blocks and make_blocks (#19189)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Jan 18, 2018
1 parent d7a2e94 commit 4086e42
Showing 1 changed file with 69 additions and 75 deletions.
144 changes: 69 additions & 75 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2918,35 +2918,52 @@ def sparse_reindex(self, new_index):
placement=self.mgr_locs)


def get_block_type(values, dtype=None):
"""
Find the appropriate Block subclass to use for the given values and dtype.
Parameters
----------
values : ndarray-like
dtype : numpy or pandas dtype
Returns
-------
cls : class, subclass of Block
"""
dtype = dtype or values.dtype
vtype = dtype.type

if is_sparse(values):
cls = SparseBlock
elif issubclass(vtype, np.floating):
cls = FloatBlock
elif issubclass(vtype, np.timedelta64):
assert issubclass(vtype, np.integer)
cls = TimeDeltaBlock
elif issubclass(vtype, np.complexfloating):
cls = ComplexBlock
elif issubclass(vtype, np.datetime64):
assert not is_datetimetz(values)
cls = DatetimeBlock
elif is_datetimetz(values):
cls = DatetimeTZBlock
elif issubclass(vtype, np.integer):
cls = IntBlock
elif dtype == np.bool_:
cls = BoolBlock
elif is_categorical(values):
cls = CategoricalBlock
else:
cls = ObjectBlock
return cls


def make_block(values, placement, klass=None, ndim=None, dtype=None,
fastpath=False):
if klass is None:
dtype = dtype or values.dtype
vtype = dtype.type

if isinstance(values, SparseArray):
klass = SparseBlock
elif issubclass(vtype, np.floating):
klass = FloatBlock
elif (issubclass(vtype, np.integer) and
issubclass(vtype, np.timedelta64)):
klass = TimeDeltaBlock
elif (issubclass(vtype, np.integer) and
not issubclass(vtype, np.datetime64)):
klass = IntBlock
elif dtype == np.bool_:
klass = BoolBlock
elif issubclass(vtype, np.datetime64):
assert not hasattr(values, 'tz')
klass = DatetimeBlock
elif is_datetimetz(values):
klass = DatetimeTZBlock
elif issubclass(vtype, np.complexfloating):
klass = ComplexBlock
elif is_categorical(values):
klass = CategoricalBlock
else:
klass = ObjectBlock
klass = get_block_type(values, dtype)

elif klass is DatetimeTZBlock and not is_datetimetz(values):
return klass(values, ndim=ndim, fastpath=fastpath,
Expand Down Expand Up @@ -4662,15 +4679,7 @@ def create_block_manager_from_arrays(arrays, names, axes):
def form_blocks(arrays, names, axes):
# put "leftover" items in float bucket, where else?
# generalize?
float_items = []
complex_items = []
int_items = []
bool_items = []
object_items = []
sparse_items = []
datetime_items = []
datetime_tz_items = []
cat_items = []
items_dict = defaultdict(list)
extra_locs = []

names_idx = Index(names)
Expand All @@ -4688,70 +4697,55 @@ def form_blocks(arrays, names, axes):
k = names[name_idx]
v = arrays[name_idx]

if is_sparse(v):
sparse_items.append((i, k, v))
elif issubclass(v.dtype.type, np.floating):
float_items.append((i, k, v))
elif issubclass(v.dtype.type, np.complexfloating):
complex_items.append((i, k, v))
elif issubclass(v.dtype.type, np.datetime64):
if v.dtype != _NS_DTYPE:
v = conversion.ensure_datetime64ns(v)

assert not is_datetimetz(v)
datetime_items.append((i, k, v))
elif is_datetimetz(v):
datetime_tz_items.append((i, k, v))
elif issubclass(v.dtype.type, np.integer):
int_items.append((i, k, v))
elif v.dtype == np.bool_:
bool_items.append((i, k, v))
elif is_categorical(v):
cat_items.append((i, k, v))
else:
object_items.append((i, k, v))
block_type = get_block_type(v)
items_dict[block_type.__name__].append((i, k, v))

blocks = []
if len(float_items):
float_blocks = _multi_blockify(float_items)
if len(items_dict['FloatBlock']):
float_blocks = _multi_blockify(items_dict['FloatBlock'])
blocks.extend(float_blocks)

if len(complex_items):
complex_blocks = _multi_blockify(complex_items)
if len(items_dict['ComplexBlock']):
complex_blocks = _multi_blockify(items_dict['ComplexBlock'])
blocks.extend(complex_blocks)

if len(int_items):
int_blocks = _multi_blockify(int_items)
if len(items_dict['TimeDeltaBlock']):
timedelta_blocks = _multi_blockify(items_dict['TimeDeltaBlock'])
blocks.extend(timedelta_blocks)

if len(items_dict['IntBlock']):
int_blocks = _multi_blockify(items_dict['IntBlock'])
blocks.extend(int_blocks)

if len(datetime_items):
datetime_blocks = _simple_blockify(datetime_items, _NS_DTYPE)
if len(items_dict['DatetimeBlock']):
datetime_blocks = _simple_blockify(items_dict['DatetimeBlock'],
_NS_DTYPE)
blocks.extend(datetime_blocks)

if len(datetime_tz_items):
if len(items_dict['DatetimeTZBlock']):
dttz_blocks = [make_block(array,
klass=DatetimeTZBlock,
fastpath=True,
placement=[i], )
for i, _, array in datetime_tz_items]
placement=[i])
for i, _, array in items_dict['DatetimeTZBlock']]
blocks.extend(dttz_blocks)

if len(bool_items):
bool_blocks = _simple_blockify(bool_items, np.bool_)
if len(items_dict['BoolBlock']):
bool_blocks = _simple_blockify(items_dict['BoolBlock'], np.bool_)
blocks.extend(bool_blocks)

if len(object_items) > 0:
object_blocks = _simple_blockify(object_items, np.object_)
if len(items_dict['ObjectBlock']) > 0:
object_blocks = _simple_blockify(items_dict['ObjectBlock'], np.object_)
blocks.extend(object_blocks)

if len(sparse_items) > 0:
sparse_blocks = _sparse_blockify(sparse_items)
if len(items_dict['SparseBlock']) > 0:
sparse_blocks = _sparse_blockify(items_dict['SparseBlock'])
blocks.extend(sparse_blocks)

if len(cat_items) > 0:
if len(items_dict['CategoricalBlock']) > 0:
cat_blocks = [make_block(array, klass=CategoricalBlock, fastpath=True,
placement=[i])
for i, _, array in cat_items]
for i, _, array in items_dict['CategoricalBlock']]
blocks.extend(cat_blocks)

if len(extra_locs):
Expand Down

0 comments on commit 4086e42

Please sign in to comment.