-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF: move Block construction in groupby aggregation to internals #39997
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,7 @@ | |
|
||
import pandas.core.algorithms as algos | ||
from pandas.core.arrays.sparse import SparseDtype | ||
from pandas.core.base import DataError | ||
from pandas.core.construction import extract_array | ||
from pandas.core.indexers import maybe_convert_indices | ||
from pandas.core.indexes.api import ( | ||
|
@@ -403,6 +404,43 @@ def reduce( | |
new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) | ||
return new_mgr, indexer | ||
|
||
def grouped_reduce( | ||
self: T, func: Callable, ignore_failures: bool = False | ||
) -> Tuple[T, np.ndarray]: | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Apply grouped reduction function blockwise, returning a new BlockManager. | ||
|
||
Parameters | ||
---------- | ||
func : grouped reduction function | ||
ignore_failures : bool, default False | ||
Whether to drop blocks where func raises TypeError. | ||
|
||
Returns | ||
------- | ||
BlockManager | ||
""" | ||
result_blocks: List[Block] = [] | ||
|
||
for blk in self.blocks: | ||
try: | ||
applied = blk.apply(func) | ||
except (TypeError, NotImplementedError): | ||
if not ignore_failures: | ||
raise | ||
continue | ||
result_blocks = extend_blocks(applied, result_blocks) | ||
|
||
if len(result_blocks) == 0: | ||
raise DataError("No numeric types to aggregate") | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
index = Index(range(result_blocks[0].values.shape[-1])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we figure out this length before calling grouped_reduce? if so, i think that we can turn it into an argument, then refactor reduce, apply and possibly quantile to dispatch to this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The length can certainly be known before hand for the groupby case ( |
||
|
||
if ignore_failures: | ||
return self._combine(result_blocks, index=index) | ||
|
||
return type(self).from_blocks(result_blocks, [self.axes[0], index]) | ||
|
||
def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: | ||
""" | ||
Apply array_op blockwise with another (aligned) BlockManager. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
agreed this is an improvement