Skip to content

Commit

Permalink
BUG: DataFrame.diff(axis=0) with DatetimeTZ data
Browse files Browse the repository at this point in the history
add whatsnew

clarify comment

Add addtional tests

move diff into its own function in DatetimeTZBlock

Use correct placement

fix failing test

formatting
  • Loading branch information
mroeschke committed Feb 26, 2018
1 parent 92dbc78 commit 5aabc85
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ Timezones
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)

Offsets
^^^^^^^
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2905,6 +2905,18 @@ def shift(self, periods, axis=0, mgr=None):
return [self.make_block_same_class(new_values,
placement=self.mgr_locs)]

def diff(self, n, axis=0, mgr=None):
"""1st discrete difference"""
if axis == 0:
# Cannot currently calculate diff across multiple blocks since this
# function is invoked via apply
raise NotImplementedError
new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8
# Reshape the new_values like how algos.diff does for timedelta data
new_values = new_values.reshape(1, len(new_values))
new_values = new_values.astype('timedelta64[ns]')
return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]

def concat_same_type(self, to_concat, placement=None):
"""
Concatenate list of single blocks of the same type.
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ def test_diff(self):
1), 'z': pd.Series(1)}).astype('float64')
assert_frame_equal(result, expected)

@pytest.mark.parametrize('axis', [0, 1])
@pytest.mark.parametrize('tz', [None, 'UTC'])
def test_diff_datetime(self, axis, tz):
# GH 18578
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
1: date_range('2010', freq='D', periods=2, tz=tz)})
if axis == 1:
if tz is None:
result = df.diff(axis=axis)
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
1: pd.TimedeltaIndex(['0 days',
'0 days'])})
assert_frame_equal(result, expected)
else:
with pytest.raises(NotImplementedError):
result = df.diff(axis=axis)

else:
result = df.diff(axis=axis)
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
1: pd.TimedeltaIndex(['NaT', '1 days'])})
assert_frame_equal(result, expected)

def test_diff_timedelta(self):
# GH 4533
df = DataFrame(dict(time=[Timestamp('20130101 9:01'),
Expand Down

0 comments on commit 5aabc85

Please sign in to comment.