Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: to_dict should return a native datetime object for NumPy backed dataframes #37571

Merged
merged 23 commits into from
Nov 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
af2f84d
BUG/REF: add native arg to maybe_box_datetimelike
arw2019 Nov 1, 2020
87265bc
BUG: pass native=True arg to maybe_box_datetimelike
arw2019 Nov 1, 2020
6b8221c
TST: add datetime column to test
arw2019 Nov 1, 2020
a28bfad
TST/CLN: test comments
arw2019 Nov 1, 2020
8821a8d
DOC: whatsnew
arw2019 Nov 1, 2020
717a6e3
CLN/TST: add bool test case & DRY the test
arw2019 Nov 1, 2020
ad78190
REF (feedback): move maybe_box_datetimelike common.py -> dtypes/cast.py
arw2019 Nov 3, 2020
c93fca9
feedback: remove native arg/accept Timestamp return type
arw2019 Nov 2, 2020
47fd80d
Merge remote-tracking branch 'upstream/master' into GH21256
arw2019 Nov 3, 2020
de86652
BUG: fix import in pd.core.indexes.interval.py
arw2019 Nov 3, 2020
b17a0db
BUG: fix import in pd.core.dtypes.cast.py
arw2019 Nov 3, 2020
1057c8a
BUG: revert accidental change tslib->tslibs
arw2019 Nov 3, 2020
5b2a84a
Merge remote-tracking branch 'upstream/master' into GH21256
arw2019 Nov 3, 2020
10119a7
REF (feedback): move dict_compat to core/dtypes/cast.py
arw2019 Nov 4, 2020
44d82c4
REF/TST: move dict_compat tests to dtypes/cast/test_dict_compat.py
arw2019 Nov 4, 2020
d737457
Merge remote-tracking branch 'upstream/master' into GH21256
arw2019 Nov 4, 2020
5d8d374
DOC: improve dict_compat doctring
arw2019 Nov 4, 2020
b9b8e31
DOC: add maybe_box_datetimelike docstring
arw2019 Nov 4, 2020
f1667f6
CLN: consolidate return in maybe_box_datetimelike
arw2019 Nov 4, 2020
5b984b8
TYP: maybe_box_datetimelike
arw2019 Nov 4, 2020
017fa58
TYP: maybe_box_datetimelike (return type)
arw2019 Nov 4, 2020
c76c09c
TYP: dict_compat
arw2019 Nov 4, 2020
2697b85
TST: remove dict_compat test from test_common.py
arw2019 Nov 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ Numeric
Conversion
^^^^^^^^^^

-
- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`)
-

Strings
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
construct_1d_arraylike_from_scalar,
find_common_type,
infer_dtype_from_scalar,
maybe_box_datetimelike,
)
from pandas.core.dtypes.common import (
is_array_like,
Expand Down Expand Up @@ -805,7 +806,7 @@ def _get_val_at(self, loc):
return self.fill_value
else:
val = self.sp_values[sp_loc]
val = com.maybe_box_datetimelike(val, self.sp_values.dtype)
val = maybe_box_datetimelike(val, self.sp_values.dtype)
return val

def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray":
Expand Down
35 changes: 1 addition & 34 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@

from collections import abc, defaultdict
import contextlib
from datetime import datetime, timedelta
from functools import partial
import inspect
from typing import Any, Collection, Iterable, Iterator, List, Union, cast
import warnings

import numpy as np

from pandas._libs import lib, tslibs
from pandas._libs import lib
from pandas._typing import AnyArrayLike, Scalar, T
from pandas.compat.numpy import np_version_under1p18

Expand Down Expand Up @@ -78,21 +77,6 @@ def consensus_name_attr(objs):
return name


def maybe_box_datetimelike(value, dtype=None):
# turn a datetime like into a Timestamp/timedelta as needed
if dtype == object:
# If we dont have datetime64/timedelta64 dtype, we dont want to
# box datetimelike scalars
return value

if isinstance(value, (np.datetime64, datetime)):
value = tslibs.Timestamp(value)
elif isinstance(value, (np.timedelta64, timedelta)):
value = tslibs.Timedelta(value)

return value


def is_bool_indexer(key: Any) -> bool:
"""
Check whether `key` is a valid boolean indexer.
Expand Down Expand Up @@ -347,23 +331,6 @@ def apply_if_callable(maybe_callable, obj, **kwargs):
return maybe_callable


def dict_compat(d):
"""
Helper function to convert datetimelike-keyed dicts
to Timestamp-keyed dict.

Parameters
----------
d: dict like object

Returns
-------
dict

"""
return {maybe_box_datetimelike(key): value for key, value in d.items()}


def standardize_mapping(into):
"""
Helper function to standardize a supplied mapping.
Expand Down
43 changes: 42 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Optional,
Sequence,
Expand All @@ -19,7 +20,7 @@

import numpy as np

from pandas._libs import lib, tslib
from pandas._libs import lib, tslib, tslibs
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -134,6 +135,30 @@ def is_nested_object(obj) -> bool:
return False


def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar:
"""
Cast scalar to Timestamp or Timedelta if scalar is datetime-like
and dtype is not object.
Parameters
----------
value : scalar
dtype : Dtype, optional
Returns
-------
scalar
"""
if dtype == object:
pass
elif isinstance(value, (np.datetime64, datetime)):
value = tslibs.Timestamp(value)
elif isinstance(value, (np.timedelta64, timedelta)):
value = tslibs.Timedelta(value)

return value


def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
"""
try to cast to the specified dtype (e.g. convert back to bool/int
Expand Down Expand Up @@ -791,6 +816,22 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
return dtype, val


def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]:
"""
Convert datetimelike-keyed dicts to a Timestamp-keyed dict.
Parameters
----------
d: dict-like object
Returns
-------
dict
"""
return {maybe_box_datetimelike(key): value for key, value in d.items()}


def infer_dtype_from_array(
arr, pandas_dtype: bool = False
) -> Tuple[DtypeObj, ArrayLike]:
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
find_common_type,
infer_dtype_from_scalar,
invalidate_string_dtypes,
maybe_box_datetimelike,
maybe_cast_to_datetime,
maybe_casted_values,
maybe_convert_platform,
Expand Down Expand Up @@ -1538,15 +1539,15 @@ def to_dict(self, orient="dict", into=dict):
(
"data",
[
list(map(com.maybe_box_datetimelike, t))
list(map(maybe_box_datetimelike, t))
for t in self.itertuples(index=False, name=None)
],
),
)
)

elif orient == "series":
return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items())
return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items())

elif orient == "records":
columns = self.columns.tolist()
Expand All @@ -1555,7 +1556,7 @@ def to_dict(self, orient="dict", into=dict):
for row in self.itertuples(index=False, name=None)
)
return [
into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items())
into_c((k, maybe_box_datetimelike(v)) for k, v in row.items())
for row in rows
]

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.dtypes.cast import (
find_common_type,
infer_dtype_from_scalar,
maybe_box_datetimelike,
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -1193,8 +1194,8 @@ def interval_range(
IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
closed='both', dtype='interval[int64]')
"""
start = com.maybe_box_datetimelike(start)
end = com.maybe_box_datetimelike(end)
start = maybe_box_datetimelike(start)
end = maybe_box_datetimelike(end)
endpoint = start if start is not None else end

if freq is None and com.any_none(periods, start, end):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
find_common_type,
infer_dtype_from,
infer_dtype_from_scalar,
maybe_box_datetimelike,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_infer_dtype_type,
Expand Down Expand Up @@ -843,7 +844,7 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
if isna(s):
return ~mask

s = com.maybe_box_datetimelike(s)
s = maybe_box_datetimelike(s)
return compare_or_regex_search(self.values, s, regex, mask)

# Calculate the mask once, prior to the call of comp
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
construct_1d_ndarray_preserving_na,
dict_compat,
maybe_cast_to_datetime,
maybe_convert_platform,
maybe_infer_to_datetimelike,
Expand Down Expand Up @@ -346,7 +347,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
oindex = index.astype("O")

if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)):
val = com.dict_compat(val)
val = dict_compat(val)
else:
val = dict(val)
val = lib.fast_multiget(val, oindex._values, default=np.nan)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/dtypes/cast/test_dict_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import numpy as np

from pandas.core.dtypes.cast import dict_compat

from pandas import Timestamp


def test_dict_compat():
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
data_unchanged = {1: 2, 3: 4, 5: 6}
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
assert dict_compat(data_datetime64) == expected
assert dict_compat(expected) == expected
assert dict_compat(data_unchanged) == data_unchanged
37 changes: 25 additions & 12 deletions pandas/tests/frame/methods/test_to_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,17 +257,30 @@ def test_to_dict_wide(self):
assert result == expected

def test_to_dict_orient_dtype(self):
# GH#22620
# Input Data
input_data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["X", "Y", "Z"]}
df = DataFrame(input_data)
# Expected Dtypes
expected = {"a": int, "b": float, "c": str}
# Extracting dtypes out of to_dict operation
for df_dict in df.to_dict("records"):
result = {
"a": type(df_dict["a"]),
"b": type(df_dict["b"]),
"c": type(df_dict["c"]),
# GH22620 & GH21256

df = DataFrame(
{
"bool": [True, True, False],
"datetime": [
datetime(2018, 1, 1),
datetime(2019, 2, 2),
datetime(2020, 3, 3),
],
"float": [1.0, 2.0, 3.0],
"int": [1, 2, 3],
"str": ["X", "Y", "Z"],
}
)

expected = {
"int": int,
"float": float,
"str": str,
"datetime": Timestamp,
"bool": bool,
}

for df_dict in df.to_dict("records"):
result = {col: type(df_dict[col]) for col in list(df.columns)}
assert result == expected
11 changes: 1 addition & 10 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas.compat.numpy import np_version_under1p17

import pandas as pd
from pandas import Series, Timestamp
from pandas import Series
import pandas._testing as tm
from pandas.core import ops
import pandas.core.common as com
Expand Down Expand Up @@ -109,15 +109,6 @@ def test_maybe_match_name(left, right, expected):
assert ops.common._maybe_match_name(left, right) == expected


def test_dict_compat():
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
data_unchanged = {1: 2, 3: 4, 5: 6}
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
assert com.dict_compat(data_datetime64) == expected
assert com.dict_compat(expected) == expected
assert com.dict_compat(data_unchanged) == data_unchanged


def test_standardize_mapping():
# No uninitialized defaultdicts
msg = r"to_dict\(\) only accepts initialized defaultdicts"
Expand Down