diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 23b84bfbd69e6..5cceb2a9bce8c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -435,7 +435,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) - Strings diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 5f4cd4b269a2a..9152ce72d75aa 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -22,6 +22,7 @@ construct_1d_arraylike_from_scalar, find_common_type, infer_dtype_from_scalar, + maybe_box_datetimelike, ) from pandas.core.dtypes.common import ( is_array_like, @@ -805,7 +806,7 @@ def _get_val_at(self, loc): return self.fill_value else: val = self.sp_values[sp_loc] - val = com.maybe_box_datetimelike(val, self.sp_values.dtype) + val = maybe_box_datetimelike(val, self.sp_values.dtype) return val def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray": diff --git a/pandas/core/common.py b/pandas/core/common.py index b860c83f89cbc..9b6133d2f7627 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -6,7 +6,6 @@ from collections import abc, defaultdict import contextlib -from datetime import datetime, timedelta from functools import partial import inspect from typing import Any, Collection, Iterable, Iterator, List, Union, cast @@ -14,7 +13,7 @@ import numpy as np -from pandas._libs import lib, tslibs +from pandas._libs import lib from pandas._typing import AnyArrayLike, Scalar, T from pandas.compat.numpy import np_version_under1p18 @@ -78,21 +77,6 @@ def consensus_name_attr(objs): return name -def maybe_box_datetimelike(value, dtype=None): - # turn a datetime like into a Timestamp/timedelta as needed - if dtype == object: - # If we dont have datetime64/timedelta64 dtype, we dont want to - # box datetimelike scalars - return value - - if isinstance(value, (np.datetime64, datetime)): - value = tslibs.Timestamp(value) - elif isinstance(value, (np.timedelta64, timedelta)): - value = tslibs.Timedelta(value) - - return value - - def is_bool_indexer(key: Any) -> bool: """ Check whether `key` is a valid boolean indexer. @@ -347,23 +331,6 @@ def apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -def dict_compat(d): - """ - Helper function to convert datetimelike-keyed dicts - to Timestamp-keyed dict. - - Parameters - ---------- - d: dict like object - - Returns - ------- - dict - - """ - return {maybe_box_datetimelike(key): value for key, value in d.items()} - - def standardize_mapping(into): """ Helper function to standardize a supplied mapping. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index aded0af6aca0e..9758eae60c262 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -7,6 +7,7 @@ from typing import ( TYPE_CHECKING, Any, + Dict, List, Optional, Sequence, @@ -19,7 +20,7 @@ import numpy as np -from pandas._libs import lib, tslib +from pandas._libs import lib, tslib, tslibs from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -134,6 +135,30 @@ def is_nested_object(obj) -> bool: return False +def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar: + """ + Cast scalar to Timestamp or Timedelta if scalar is datetime-like + and dtype is not object. + + Parameters + ---------- + value : scalar + dtype : Dtype, optional + + Returns + ------- + scalar + """ + if dtype == object: + pass + elif isinstance(value, (np.datetime64, datetime)): + value = tslibs.Timestamp(value) + elif isinstance(value, (np.timedelta64, timedelta)): + value = tslibs.Timedelta(value) + + return value + + def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): """ try to cast to the specified dtype (e.g. convert back to bool/int @@ -791,6 +816,22 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, return dtype, val +def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: + """ + Convert datetimelike-keyed dicts to a Timestamp-keyed dict. + + Parameters + ---------- + d: dict-like object + + Returns + ------- + dict + + """ + return {maybe_box_datetimelike(key): value for key, value in d.items()} + + def infer_dtype_from_array( arr, pandas_dtype: bool = False ) -> Tuple[DtypeObj, ArrayLike]: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ec575a849abe..9d223ba2bab0c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -84,6 +84,7 @@ find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, + maybe_box_datetimelike, maybe_cast_to_datetime, maybe_casted_values, maybe_convert_platform, @@ -1538,7 +1539,7 @@ def to_dict(self, orient="dict", into=dict): ( "data", [ - list(map(com.maybe_box_datetimelike, t)) + list(map(maybe_box_datetimelike, t)) for t in self.itertuples(index=False, name=None) ], ), @@ -1546,7 +1547,7 @@ def to_dict(self, orient="dict", into=dict): ) elif orient == "series": - return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items()) + return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items()) elif orient == "records": columns = self.columns.tolist() @@ -1555,7 +1556,7 @@ def to_dict(self, orient="dict", into=dict): for row in self.itertuples(index=False, name=None) ) return [ - into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items()) + into_c((k, maybe_box_datetimelike(v)) for k, v in row.items()) for row in rows ] diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2061e652a4c01..c700acc24f411 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.cast import ( find_common_type, infer_dtype_from_scalar, + maybe_box_datetimelike, maybe_downcast_to_dtype, ) from pandas.core.dtypes.common import ( @@ -1193,8 +1194,8 @@ def interval_range( IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ - start = com.maybe_box_datetimelike(start) - end = com.maybe_box_datetimelike(end) + start = maybe_box_datetimelike(start) + end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ee630909cb990..1f34e91d71077 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -19,6 +19,7 @@ find_common_type, infer_dtype_from, infer_dtype_from_scalar, + maybe_box_datetimelike, maybe_downcast_numeric, maybe_downcast_to_dtype, maybe_infer_dtype_type, @@ -843,7 +844,7 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray: if isna(s): return ~mask - s = com.maybe_box_datetimelike(s) + s = maybe_box_datetimelike(s) return compare_or_regex_search(self.values, s, regex, mask) # Calculate the mask once, prior to the call of comp diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index bb8283604abb0..bcafa2c2fdca7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -14,6 +14,7 @@ from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na, + dict_compat, maybe_cast_to_datetime, maybe_convert_platform, maybe_infer_to_datetimelike, @@ -346,7 +347,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]): oindex = index.astype("O") if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)): - val = com.dict_compat(val) + val = dict_compat(val) else: val = dict(val) val = lib.fast_multiget(val, oindex._values, default=np.nan) diff --git a/pandas/tests/dtypes/cast/test_dict_compat.py b/pandas/tests/dtypes/cast/test_dict_compat.py new file mode 100644 index 0000000000000..13dc82d779f95 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_dict_compat.py @@ -0,0 +1,14 @@ +import numpy as np + +from pandas.core.dtypes.cast import dict_compat + +from pandas import Timestamp + + +def test_dict_compat(): + data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} + data_unchanged = {1: 2, 3: 4, 5: 6} + expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} + assert dict_compat(data_datetime64) == expected + assert dict_compat(expected) == expected + assert dict_compat(data_unchanged) == data_unchanged diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index f1656b46cf356..f8feef7a95eab 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -257,17 +257,30 @@ def test_to_dict_wide(self): assert result == expected def test_to_dict_orient_dtype(self): - # GH#22620 - # Input Data - input_data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["X", "Y", "Z"]} - df = DataFrame(input_data) - # Expected Dtypes - expected = {"a": int, "b": float, "c": str} - # Extracting dtypes out of to_dict operation - for df_dict in df.to_dict("records"): - result = { - "a": type(df_dict["a"]), - "b": type(df_dict["b"]), - "c": type(df_dict["c"]), + # GH22620 & GH21256 + + df = DataFrame( + { + "bool": [True, True, False], + "datetime": [ + datetime(2018, 1, 1), + datetime(2019, 2, 2), + datetime(2020, 3, 3), + ], + "float": [1.0, 2.0, 3.0], + "int": [1, 2, 3], + "str": ["X", "Y", "Z"], } + ) + + expected = { + "int": int, + "float": float, + "str": str, + "datetime": Timestamp, + "bool": bool, + } + + for df_dict in df.to_dict("records"): + result = {col: type(df_dict[col]) for col in list(df.columns)} assert result == expected diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 366a1970f6f64..81d866ba63bc0 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -9,7 +9,7 @@ from pandas.compat.numpy import np_version_under1p17 import pandas as pd -from pandas import Series, Timestamp +from pandas import Series import pandas._testing as tm from pandas.core import ops import pandas.core.common as com @@ -109,15 +109,6 @@ def test_maybe_match_name(left, right, expected): assert ops.common._maybe_match_name(left, right) == expected -def test_dict_compat(): - data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} - data_unchanged = {1: 2, 3: 4, 5: 6} - expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} - assert com.dict_compat(data_datetime64) == expected - assert com.dict_compat(expected) == expected - assert com.dict_compat(data_unchanged) == data_unchanged - - def test_standardize_mapping(): # No uninitialized defaultdicts msg = r"to_dict\(\) only accepts initialized defaultdicts"