Skip to content

Commit

Permalink
[ArrayManager] DataFrame constructor from ndarray (#40441)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Apr 26, 2021
1 parent 5faa34c commit ead9404
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 6 deletions.
30 changes: 26 additions & 4 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pandas.core.dtypes.common import (
is_1d_only_ea_dtype,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_integer_dtype,
Expand All @@ -60,6 +61,7 @@
TimedeltaArray,
)
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
sanitize_array,
)
Expand Down Expand Up @@ -316,10 +318,30 @@ def ndarray_to_mgr(
index, columns = _get_axes(
values.shape[0], values.shape[1], index=index, columns=columns
)
values = values.T

_check_values_indices_shape_match(values, index, columns)

if typ == "array":

if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)

if dtype is None and is_object_dtype(values.dtype):
arrays = [
ensure_wrapped_if_datetimelike(
maybe_infer_to_datetimelike(values[:, i].copy())
)
for i in range(values.shape[1])
]
else:
if is_datetime_or_timedelta_dtype(values.dtype):
values = ensure_wrapped_if_datetimelike(values)
arrays = [values[:, i].copy() for i in range(values.shape[1])]

return ArrayManager(arrays, [index, columns], verify_integrity=False)

values = values.T

# if we don't have a dtype specified, then try to convert objects
# on the entire block; this is to convert if we have datetimelike's
# embedded in an object type
Expand Down Expand Up @@ -358,13 +380,13 @@ def _check_values_indices_shape_match(
Check that the shape implied by our axes matches the actual shape of the
data.
"""
if values.shape[0] != len(columns):
if values.shape[1] != len(columns) or values.shape[0] != len(index):
# Could let this raise in Block constructor, but we get a more
# helpful exception message this way.
if values.shape[1] == 0:
if values.shape[0] == 0:
raise ValueError("Empty data passed with indices specified.")

passed = values.T.shape
passed = values.shape
implied = (len(index), len(columns))
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")

Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,27 @@ def test_astype_to_incorrect_datetimelike(self, unit):
other = f"m8[{unit}]"

df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
msg = fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]"
msg = "|".join(
[
# BlockManager path
fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]",
# ArrayManager path
"cannot astype a datetimelike from "
fr"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]",
]
)
with pytest.raises(TypeError, match=msg):
df.astype(other)

msg = fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]"
msg = "|".join(
[
# BlockManager path
fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]",
# ArrayManager path
"cannot astype a timedelta from "
fr"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]",
]
)
df = DataFrame(np.array([[1, 2, 3]], dtype=other))
with pytest.raises(TypeError, match=msg):
df.astype(dtype)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)
import pandas._testing as tm
from pandas.arrays import (
DatetimeArray,
IntervalArray,
PeriodArray,
SparseArray,
Expand Down Expand Up @@ -2569,6 +2570,13 @@ def test_construction_from_set_raises(self, typ):
with pytest.raises(TypeError, match=msg):
Series(values)

def test_construction_from_ndarray_datetimelike(self):
# ensure the underlying arrays are properly wrapped as EA when
# constructed from 2D ndarray
arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3)
df = DataFrame(arr)
assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays)


def get1(obj):
if isinstance(obj, Series):
Expand Down

0 comments on commit ead9404

Please sign in to comment.