Skip to content

Commit 4c75674

Browse files
aileronajayShaharBental
authored andcommittedDec 26, 2016
ENH: allowing datetime and timedelta datatype in pd cut bins
xref pandas-dev#14714, follow-on to pandas-dev#14737 Author: Ajay Saxena <aileronajay@gmail.com> Closes pandas-dev#14798 from aileronajay/cut_timetype_bin and squashes the following commits: 82bffa1 [Ajay Saxena] added method for time type bins in pd cut and modified tests ac919cf [Ajay Saxena] added test for datetime bin type 355e569 [Ajay Saxena] allowing datetime and timedelta datatype in pd cut bins
1 parent a102544 commit 4c75674

File tree

3 files changed

+37
-2
lines changed

3 files changed

+37
-2
lines changed
 

‎doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ Other enhancements
101101
unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack
102102
of sorting or an incorrect key. See :ref:`here <advanced.unsorted>`
103103

104-
- ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`)
104+
- ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`)
105105
- ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`)
106106
- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`)
107107
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)

‎pandas/tools/tests/test_tile.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from pandas.core.algorithms import quantile
1313
from pandas.tools.tile import cut, qcut
1414
import pandas.tools.tile as tmod
15-
from pandas import to_datetime, DatetimeIndex
15+
from pandas import to_datetime, DatetimeIndex, Timestamp
1616

1717

1818
class TestCut(tm.TestCase):
@@ -313,6 +313,26 @@ def test_datetime_cut(self):
313313
result, bins = cut(data, 3, retbins=True)
314314
tm.assert_series_equal(Series(result), expected)
315315

316+
def test_datetime_bin(self):
317+
data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
318+
bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
319+
expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]',
320+
'(2012-12-14 00:00:00, 2012-12-16 00:00:00]'],
321+
).astype("category", ordered=True)
322+
323+
for conv in [Timestamp, Timestamp, np.datetime64]:
324+
bins = [conv(v) for v in bin_data]
325+
result = cut(data, bins=bins)
326+
tm.assert_series_equal(Series(result), expected)
327+
328+
bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
329+
result = cut(data, bins=bin_pydatetime)
330+
tm.assert_series_equal(Series(result), expected)
331+
332+
bins = to_datetime(bin_data)
333+
result = cut(data, bins=bin_pydatetime)
334+
tm.assert_series_equal(Series(result), expected)
335+
316336

317337
def curpath():
318338
pth, _ = os.path.split(os.path.abspath(__file__))

‎pandas/tools/tile.py

+15
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas.compat import zip
1414
from pandas import to_timedelta, to_datetime
1515
from pandas.types.common import is_datetime64_dtype, is_timedelta64_dtype
16+
from pandas.lib import infer_dtype
1617

1718
import numpy as np
1819

@@ -116,6 +117,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
116117

117118
else:
118119
bins = np.asarray(bins)
120+
bins = _convert_bin_to_numeric_type(bins)
119121
if (np.diff(bins) < 0).any():
120122
raise ValueError('bins must increase monotonically.')
121123

@@ -327,6 +329,19 @@ def _coerce_to_type(x):
327329
return x, dtype
328330

329331

332+
def _convert_bin_to_numeric_type(x):
333+
"""
334+
if the passed bin is of datetime/timedelta type,
335+
this method converts it to integer
336+
"""
337+
dtype = infer_dtype(x)
338+
if dtype == 'timedelta' or dtype == 'timedelta64':
339+
x = to_timedelta(x).view(np.int64)
340+
elif dtype == 'datetime' or dtype == 'datetime64':
341+
x = to_datetime(x).view(np.int64)
342+
return x
343+
344+
330345
def _preprocess_for_cut(x):
331346
"""
332347
handles preprocessing for cut where we convert passed

0 commit comments

Comments
 (0)