From 175e7a303ac59427b0c6a15649d314ce6bbacb4d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 Feb 2021 17:06:10 -0800 Subject: [PATCH 1/8] TYP: _get_names_from_index --- pandas/core/internals/construction.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 73ef006caa7d9..74ce77f6b6127 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -127,9 +127,8 @@ def masked_rec_array_to_mgr( fdata = ma.getdata(data) if index is None: index = _get_names_from_index(fdata) - if index is None: - index = ibase.default_index(len(data)) - index = ensure_index(index) + else: + index = ensure_index(index) if columns is not None: columns = ensure_index(columns) @@ -518,7 +517,7 @@ def reorder_arrays(arrays, arr_columns, columns): return arrays, arr_columns -def _get_names_from_index(data): +def _get_names_from_index(data) -> Index: has_some_name = any(getattr(s, "name", None) is not None for s in data) if not has_some_name: return ibase.default_index(len(data)) @@ -533,7 +532,7 @@ def _get_names_from_index(data): index[i] = f"Unnamed {count}" count += 1 - return index + return Index(index) def _get_axes( From 971da8875ce59a6466b273f7859c11dcca6e9359 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 Feb 2021 17:56:47 -0800 Subject: [PATCH 2/8] TYP: to_arrays --- pandas/core/internals/construction.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 74ce77f6b6127..84d9a5f7ad68d 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -340,7 +340,7 @@ def nested_data_to_arrays( columns: Optional[Index], index: Optional[Index], dtype: Optional[DtypeObj], -): +) -> Tuple[List[ArrayLike], Index, Index]: """ Convert a single sequence of arrays to multiple arrays. """ @@ -585,7 +585,9 @@ def dataclasses_to_dicts(data): # Conversion of Inputs to Arrays -def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None): +def to_arrays( + data, columns: Optional[Index], dtype: Optional[DtypeObj] = None +) -> Tuple[List[ArrayLike], Index]: """ Return list of arrays, columns. """ @@ -606,8 +608,10 @@ def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None): if isinstance(data, np.ndarray): columns = data.dtype.names if columns is not None: - return [[]] * len(columns), columns - return [], [] # columns if columns is not None else [] + arrays = [np.empty((0,), dtype=data.dtype) for _ in range(len(columns))] + return arrays, ensure_index(columns) + + return [], Index([]) elif isinstance(data[0], Categorical): if columns is None: From 071effc334502999f9ffbfe86b1c56a5734545b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 Feb 2021 18:18:23 -0800 Subject: [PATCH 3/8] revert --- pandas/core/internals/construction.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 84d9a5f7ad68d..73ef006caa7d9 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -127,8 +127,9 @@ def masked_rec_array_to_mgr( fdata = ma.getdata(data) if index is None: index = _get_names_from_index(fdata) - else: - index = ensure_index(index) + if index is None: + index = ibase.default_index(len(data)) + index = ensure_index(index) if columns is not None: columns = ensure_index(columns) @@ -340,7 +341,7 @@ def nested_data_to_arrays( columns: Optional[Index], index: Optional[Index], dtype: Optional[DtypeObj], -) -> Tuple[List[ArrayLike], Index, Index]: +): """ Convert a single sequence of arrays to multiple arrays. """ @@ -517,7 +518,7 @@ def reorder_arrays(arrays, arr_columns, columns): return arrays, arr_columns -def _get_names_from_index(data) -> Index: +def _get_names_from_index(data): has_some_name = any(getattr(s, "name", None) is not None for s in data) if not has_some_name: return ibase.default_index(len(data)) @@ -532,7 +533,7 @@ def _get_names_from_index(data) -> Index: index[i] = f"Unnamed {count}" count += 1 - return Index(index) + return index def _get_axes( @@ -585,9 +586,7 @@ def dataclasses_to_dicts(data): # Conversion of Inputs to Arrays -def to_arrays( - data, columns: Optional[Index], dtype: Optional[DtypeObj] = None -) -> Tuple[List[ArrayLike], Index]: +def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None): """ Return list of arrays, columns. """ @@ -608,10 +607,8 @@ def to_arrays( if isinstance(data, np.ndarray): columns = data.dtype.names if columns is not None: - arrays = [np.empty((0,), dtype=data.dtype) for _ in range(len(columns))] - return arrays, ensure_index(columns) - - return [], Index([]) + return [[]] * len(columns), columns + return [], [] # columns if columns is not None else [] elif isinstance(data[0], Categorical): if columns is None: From 14b7d216bc68b9aa7f9fbf7664a028628fce8568 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 Feb 2021 18:19:45 -0800 Subject: [PATCH 4/8] TYP:_get_names_from_index --- pandas/core/internals/construction.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 73ef006caa7d9..74ce77f6b6127 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -127,9 +127,8 @@ def masked_rec_array_to_mgr( fdata = ma.getdata(data) if index is None: index = _get_names_from_index(fdata) - if index is None: - index = ibase.default_index(len(data)) - index = ensure_index(index) + else: + index = ensure_index(index) if columns is not None: columns = ensure_index(columns) @@ -518,7 +517,7 @@ def reorder_arrays(arrays, arr_columns, columns): return arrays, arr_columns -def _get_names_from_index(data): +def _get_names_from_index(data) -> Index: has_some_name = any(getattr(s, "name", None) is not None for s in data) if not has_some_name: return ibase.default_index(len(data)) @@ -533,7 +532,7 @@ def _get_names_from_index(data): index[i] = f"Unnamed {count}" count += 1 - return index + return Index(index) def _get_axes( From 26fe41ef14da04b870083cddbb54b3f6b08d88cf Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 27 Feb 2021 19:27:44 -0800 Subject: [PATCH 5/8] TYP: reorder_arrays --- pandas/core/frame.py | 3 ++- pandas/core/internals/construction.py | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e8793c364586b..3638902ba6fe8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1918,7 +1918,8 @@ def from_records( arr_columns_list.append(k) arrays.append(v) - arrays, arr_columns = reorder_arrays(arrays, arr_columns_list, columns) + arr_columns = Index(arr_columns_list) + arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns) elif isinstance(data, (np.ndarray, DataFrame)): arrays, columns = to_arrays(data, columns) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 74ce77f6b6127..2140865432a73 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -147,6 +147,9 @@ def masked_rec_array_to_mgr( new_arrays.append(arr) # create the manager + # TODO: once to_arrays is annotated s.t. arr_columns is always an Index, + # this ensure_index will be unnecessary. + arr_columns = ensure_index(arr_columns) arrays, arr_columns = reorder_arrays(new_arrays, arr_columns, columns) if columns is None: columns = arr_columns @@ -503,14 +506,11 @@ def extract_index(data) -> Index: return ensure_index(index) -def reorder_arrays(arrays, arr_columns, columns): +def reorder_arrays( + arrays: List[ArrayLike], arr_columns: Index, columns: Optional[Index] +) -> Tuple[List[ArrayLike], Index]: # reorder according to the columns - if ( - columns is not None - and len(columns) - and arr_columns is not None - and len(arr_columns) - ): + if columns is not None and len(columns) and len(arr_columns): indexer = ensure_index(arr_columns).get_indexer(columns) arr_columns = ensure_index([arr_columns[i] for i in indexer]) arrays = [arrays[i] for i in indexer] From 5131de7ae9ffc88455139e0353023d7da10efc94 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Mar 2021 12:55:00 -0800 Subject: [PATCH 6/8] remove TODO --- pandas/core/internals/construction.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 6467e1caebda8..a24fd2dd5375c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -145,9 +145,6 @@ def rec_array_to_mgr( new_arrays = arrays # create the manager - # TODO: once to_arrays is annotated s.t. arr_columns is always an Index, - # this ensure_index will be unnecessary. - arr_columns = ensure_index(arr_columns) arrays, arr_columns = reorder_arrays(new_arrays, arr_columns, columns) if columns is None: columns = arr_columns From ab0bef0f87c955b7f1d953cdede60a67a60d424c Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Mar 2021 13:00:35 -0800 Subject: [PATCH 7/8] TYP: internals.construction --- pandas/core/frame.py | 6 +----- pandas/core/internals/construction.py | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b329f1073f824..cecf65c6da151 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1923,8 +1923,6 @@ def from_records( elif isinstance(data, (np.ndarray, DataFrame)): arrays, columns = to_arrays(data, columns) - if columns is not None: - columns = ensure_index(columns) arr_columns = columns else: arrays, arr_columns = to_arrays(data, columns) @@ -1934,9 +1932,7 @@ def from_records( arrays[i] = lib.maybe_convert_objects(arr, try_float=True) arr_columns = ensure_index(arr_columns) - if columns is not None: - columns = ensure_index(columns) - else: + if columns is None: columns = arr_columns if exclude is None: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a24fd2dd5375c..e2d7e1ec342bd 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -77,6 +77,8 @@ if TYPE_CHECKING: from numpy.ma.mrecords import MaskedRecords + from pandas.core.internals.managers import BlockManager + # --------------------------------------------------------------------- # BlockManager Interface @@ -88,7 +90,7 @@ def arrays_to_mgr( columns, dtype: Optional[DtypeObj] = None, verify_integrity: bool = True, -): +) -> BlockManager: """ Segregate Series based on type and coerce into matrices. @@ -106,11 +108,11 @@ def arrays_to_mgr( # don't force copy because getting jammed in an ndarray anyway arrays = _homogenize(arrays, index, dtype) - columns = ensure_index(columns) else: - columns = ensure_index(columns) index = ensure_index(index) + columns = ensure_index(columns) + # from BlockManager perspective axes = [columns, index] @@ -208,14 +210,16 @@ def mgr_to_mgr(mgr, typ: str): # DataFrame Constructor Interface -def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool): +def ndarray_to_mgr( + values, index, columns, dtype: Optional[DtypeObj], copy: bool +) -> BlockManager: # used in DataFrame.__init__ - # input must be a ndarray, list, Series, index + # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: - columns = [values.name] + columns = Index([values.name]) if index is None: index = values.index else: @@ -302,7 +306,9 @@ def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool return create_block_manager_from_blocks(block_values, [columns, index]) -def dict_to_mgr(data: Dict, index, columns, dtype: Optional[DtypeObj] = None): +def dict_to_mgr( + data: Dict, index, columns, dtype: Optional[DtypeObj] = None +) -> BlockManager: """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. From 74be33c2390473fe4ea6ca9389ec50d8962eeb9c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 Mar 2021 08:24:27 -0800 Subject: [PATCH 8/8] use Manager alias --- pandas/core/dtypes/cast.py | 2 ++ pandas/core/internals/construction.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1850a0e22fd37..9f111282473c2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1701,6 +1701,8 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: dtype('>> ensure_nanosecond_dtype(np.dtype("m8[ps]")) + Traceback (most recent call last): + ... TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]] """ msg = ( diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c3b3aa21a0c6a..9a7ae39b9f8eb 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -92,7 +92,7 @@ def arrays_to_mgr( dtype: Optional[DtypeObj] = None, verify_integrity: bool = True, typ: Optional[str] = None, -) -> Union[BlockManager, ArrayManager]: +) -> Manager: """ Segregate Series based on type and coerce into matrices. @@ -215,7 +215,7 @@ def mgr_to_mgr(mgr, typ: str): def ndarray_to_mgr( values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str -) -> Union[BlockManager, ArrayManager]: +) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray @@ -311,7 +311,7 @@ def ndarray_to_mgr( def dict_to_mgr( data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str -) -> Union[BlockManager, ArrayManager]: +) -> Manager: """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases.