-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF: Datetime/Timestamp.normalize for timezone naive datetimes #23634
Changes from 9 commits
62827ef
ff171f7
efb281f
cc6eee0
b90abd9
35fac22
f09559a
0b3a664
9042aa1
3a23170
52a7eb2
6204d21
1f1d455
44a8808
e6c74d2
0ece208
bc5571a
243d73a
fb11dcf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,8 @@ from np_datetime cimport (check_dts_bounds, | |
npy_datetime, | ||
dt64_to_dtstruct, dtstruct_to_dt64, | ||
get_datetime64_unit, get_datetime64_value, | ||
pydatetime_to_dt64, NPY_DATETIMEUNIT, NPY_FR_ns) | ||
pydatetime_to_dt64, NPY_DATETIMEUNIT, NPY_FR_ns, | ||
DAY_S) | ||
from np_datetime import OutOfBoundsDatetime | ||
|
||
from util cimport (is_string_object, | ||
|
@@ -41,7 +42,6 @@ from nattype cimport NPY_NAT, checknull_with_nat | |
# ---------------------------------------------------------------------- | ||
# Constants | ||
|
||
cdef int64_t DAY_NS = 86400000000000LL | ||
cdef int64_t HOURS_NS = 3600000000000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should prob move this one too (future ok) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this one as well |
||
NS_DTYPE = np.dtype('M8[ns]') | ||
TD_DTYPE = np.dtype('m8[ns]') | ||
|
@@ -931,10 +931,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
result_b[:] = NPY_NAT | ||
|
||
idx_shifted_left = (np.maximum(0, trans.searchsorted( | ||
vals - DAY_NS, side='right') - 1)).astype(np.int64) | ||
vals - DAY_S * 1000000000, side='right') - 1)).astype(np.int64) | ||
|
||
idx_shifted_right = (np.maximum(0, trans.searchsorted( | ||
vals + DAY_NS, side='right') - 1)).astype(np.int64) | ||
vals + DAY_S * 1000000000, side='right') - 1)).astype(np.int64) | ||
|
||
for i in range(n): | ||
val = vals[i] | ||
|
@@ -1116,9 +1116,9 @@ def normalize_date(dt: object) -> datetime: | |
@cython.boundscheck(False) | ||
def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): | ||
""" | ||
Normalize each of the (nanosecond) timestamps in the given array by | ||
rounding down to the beginning of the day (i.e. midnight). If `tz` | ||
is not None, then this is midnight for this timezone. | ||
Normalize each of the (nanosecond) timezone aware timestamps in the given | ||
array by rounding down to the beginning of the day (i.e. midnight). | ||
This is midnight for timezone, `tz`. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -1130,21 +1130,11 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): | |
result : int64 ndarray of converted of normalized nanosecond timestamps | ||
""" | ||
cdef: | ||
Py_ssize_t i, n = len(stamps) | ||
npy_datetimestruct dts | ||
Py_ssize_t n = len(stamps) | ||
int64_t[:] result = np.empty(n, dtype=np.int64) | ||
|
||
if tz is not None: | ||
tz = maybe_get_tz(tz) | ||
result = _normalize_local(stamps, tz) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this case never reached? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. This case (the naive case) is handled in these two places now: https://github.com/pandas-dev/pandas/pull/23634/files#diff-231ac35d2116a12844a7cfed02730580R1289 |
||
with nogil: | ||
for i in range(n): | ||
if stamps[i] == NPY_NAT: | ||
result[i] = NPY_NAT | ||
continue | ||
dt64_to_dtstruct(stamps[i], &dts) | ||
result[i] = _normalized_stamp(&dts) | ||
tz = maybe_get_tz(tz) | ||
result = _normalize_local(stamps, tz) | ||
|
||
return result.base # .base to access underlying np.ndarray | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,12 @@ cdef extern from "src/datetime/np_datetime_strings.h": | |
npy_datetimestruct *out, | ||
int *out_local, int *out_tzoffset) | ||
|
||
# ---------------------------------------------------------------------- | ||
# time constants | ||
|
||
cdef int64_t DAY_S = 86400 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's write this out to DAY_SECONDS There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the place for these may be ccalendar |
||
|
||
|
||
# ---------------------------------------------------------------------- | ||
# numpy object inspection | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ from timezones cimport ( | |
# Constants | ||
_zero_time = datetime_time(0, 0) | ||
_no_input = object() | ||
cdef int64_t DAY_NS = 86400000000000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have DAY_NS defined in lots of places, can you move to 1
prob should be in np_datetime.pyx (and import from there) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move to the same place you have DAY_SECONDS |
||
|
||
|
||
# ---------------------------------------------------------------------- | ||
|
@@ -1285,6 +1286,8 @@ class Timestamp(_Timestamp): | |
Normalize Timestamp to midnight, preserving | ||
tz information. | ||
""" | ||
if self.tz is None: | ||
return Timestamp(self.value - (self.value % DAY_NS)) | ||
normalized_value = normalize_i8_timestamps( | ||
np.array([self.value], dtype='i8'), tz=self.tz)[0] | ||
return Timestamp(normalized_value).tz_localize(self.tz) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -832,7 +832,14 @@ def normalize(self): | |
'2014-08-01 00:00:00+05:30'], | ||
dtype='datetime64[ns, Asia/Calcutta]', freq=None) | ||
""" | ||
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) | ||
if self.tz is None: | ||
not_null = self.notnull() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use notna |
||
DAY_NS = 86400000000000 | ||
new_values = self.asi8.copy() | ||
adjustment = (new_values[not_null] % DAY_NS) | ||
new_values[not_null] = new_values[not_null] - adjustment | ||
else: | ||
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) | ||
return type(self)(new_values, freq='infer').tz_localize(self.tz) | ||
|
||
def to_period(self, freq=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -328,6 +328,17 @@ def test_replace_dst_border(self): | |
expected = Timestamp('2013-11-3 03:00:00', tz='America/Chicago') | ||
assert result == expected | ||
|
||
# -------------------------------------------------------------- | ||
# Timestamp.normalize | ||
|
||
@pytest.mark.parametrize('arg', ['2013-11-30', '2013-11-30 12:00:00']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a normalize_nat test as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't define There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could have one for Timstamp mirroring (another issue). Probably would just return |
||
def test_normalize(self, tz_naive_fixture, arg): | ||
tz = tz_naive_fixture | ||
ts = Timestamp(arg, tz=tz) | ||
result = ts.normalize() | ||
expected = Timestamp('2013-11-30', tz=tz) | ||
assert result == expected | ||
|
||
# -------------------------------------------------------------- | ||
|
||
@td.skip_if_windows | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what is DAY_S, you mean DAY_NS right? let's write out these constants.