-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implement libmissing; untangles _libs dependencies #18357
Changes from 4 commits
12a880e
528cec1
d6355fe
5fea502
876f848
9dcf97c
2cf1b21
44fbe90
f833419
d2184f1
5614df8
a688996
d1323fb
ce185e0
6d8308f
e94e01f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
# prototypes for sharing | ||
|
||
cdef bint is_null_datetimelike(v) | ||
cpdef bint is_period(val) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# -*- coding: utf-8 -*- | ||
# cython: profile=False | ||
|
||
cdef bint is_null_datetimelike(v) | ||
cpdef bint checknull(object val) | ||
cpdef bint checknull_old(object val) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
# -*- coding: utf-8 -*- | ||
# cython: profile=False | ||
|
||
from cpython cimport PyFloat_Check, PyComplex_Check | ||
|
||
cimport cython | ||
from cython cimport Py_ssize_t | ||
|
||
import numpy as np | ||
cimport numpy as np | ||
from numpy cimport ndarray, int64_t, uint8_t | ||
np.import_array() | ||
|
||
cimport util | ||
|
||
from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value | ||
from tslibs.nattype import NaT, iNaT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of importing iNaT, just use NPY_NAT to avoid perf issues. |
||
|
||
cdef double INF = <double> np.inf | ||
cdef double NEGINF = -INF | ||
|
||
cdef int64_t NPY_NAT = util.get_nat() | ||
|
||
|
||
cdef inline bint is_null_datetimelike(v): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prob should rename this for consistency (checknull_datetimelike), can be TODO There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type as object |
||
# determine if we have a null for a timedelta/datetime (or integer | ||
# versions) | ||
if util._checknull(v): | ||
return True | ||
elif v is NaT: | ||
return True | ||
elif util.is_timedelta64_object(v): | ||
return v.view('int64') == iNaT | ||
elif util.is_datetime64_object(v): | ||
return v.view('int64') == iNaT | ||
elif util.is_integer_object(v): | ||
return v == iNaT | ||
return False | ||
|
||
|
||
cdef inline bint _check_all_nulls(object val): | ||
""" utility to check if a value is any type of null """ | ||
cdef bint res | ||
if PyFloat_Check(val) or PyComplex_Check(val): | ||
res = val != val | ||
elif val is NaT: | ||
res = 1 | ||
elif val is None: | ||
res = 1 | ||
elif util.is_datetime64_object(val): | ||
res = get_datetime64_value(val) == NPY_NAT | ||
elif util.is_timedelta64_object(val): | ||
res = get_timedelta64_value(val) == NPY_NAT | ||
else: | ||
res = 0 | ||
return res | ||
|
||
|
||
cpdef bint checknull(object val): | ||
if util.is_float_object(val) or util.is_complex_object(val): | ||
return val != val # and val != INF and val != NEGINF | ||
elif util.is_datetime64_object(val): | ||
return get_datetime64_value(val) == NPY_NAT | ||
elif val is NaT: | ||
return True | ||
elif util.is_timedelta64_object(val): | ||
return get_timedelta64_value(val) == NPY_NAT | ||
elif util.is_array(val): | ||
return False | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason for not pulling in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean defining it here instead of in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think for sure should define it here. but then this puts missing as a dep of things like hashing.pyx. ok with it being a dep of any of the tslibs though.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're moving util._checknull anyway, I'd advocate renaming it to e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'll take a look and see which util funcs can be moved without messing with dependencies. FWIW this PR already adds missing to the 'pxdfiles` key of hashtable, which cimports missing.checknull. Previously it was an un-declared dependency on lib. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like util._checknull_old can be moved to missing (is used there once, nowhere else). Let's saving util._checknull for later, since it is used in a bunch of places. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok with leaving these for later as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. Just pushed commit with docstrings. |
||
return util._checknull(val) | ||
|
||
|
||
cpdef bint checknull_old(object val): | ||
if util.is_float_object(val) or util.is_complex_object(val): | ||
return val != val or val == INF or val == NEGINF | ||
elif util.is_datetime64_object(val): | ||
return get_datetime64_value(val) == NPY_NAT | ||
elif val is NaT: | ||
return True | ||
elif util.is_timedelta64_object(val): | ||
return get_timedelta64_value(val) == NPY_NAT | ||
elif util.is_array(val): | ||
return False | ||
else: | ||
return util._checknull(val) | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def isnaobj(ndarray arr): | ||
cdef: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ideally add some doc-strings |
||
Py_ssize_t i, n | ||
object val | ||
ndarray[uint8_t] result | ||
|
||
assert arr.ndim == 1, "'arr' must be 1-D." | ||
|
||
n = len(arr) | ||
result = np.empty(n, dtype=np.uint8) | ||
for i from 0 <= i < n: | ||
val = arr[i] | ||
result[i] = _check_all_nulls(val) | ||
return result.view(np.bool_) | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def isnaobj_old(ndarray arr): | ||
cdef: | ||
Py_ssize_t i, n | ||
object val | ||
ndarray[uint8_t] result | ||
|
||
assert arr.ndim == 1, "'arr' must be 1-D." | ||
|
||
n = len(arr) | ||
result = np.zeros(n, dtype=np.uint8) | ||
for i from 0 <= i < n: | ||
val = arr[i] | ||
result[i] = val is NaT or util._checknull_old(val) | ||
return result.view(np.bool_) | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def isnaobj2d(ndarray arr): | ||
cdef: | ||
Py_ssize_t i, j, n, m | ||
object val | ||
ndarray[uint8_t, ndim=2] result | ||
|
||
assert arr.ndim == 2, "'arr' must be 2-D." | ||
|
||
n, m = (<object> arr).shape | ||
result = np.zeros((n, m), dtype=np.uint8) | ||
for i from 0 <= i < n: | ||
for j from 0 <= j < m: | ||
val = arr[i, j] | ||
if checknull(val): | ||
result[i, j] = 1 | ||
return result.view(np.bool_) | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def isnaobj2d_old(ndarray arr): | ||
cdef: | ||
Py_ssize_t i, j, n, m | ||
object val | ||
ndarray[uint8_t, ndim=2] result | ||
|
||
assert arr.ndim == 2, "'arr' must be 2-D." | ||
|
||
n, m = (<object> arr).shape | ||
result = np.zeros((n, m), dtype=np.uint8) | ||
for i from 0 <= i < n: | ||
for j from 0 <= j < m: | ||
val = arr[i, j] | ||
if checknull_old(val): | ||
result[i, j] = 1 | ||
return result.view(np.bool_) | ||
|
||
|
||
cpdef bint isposinf_scalar(object val): | ||
if util.is_float_object(val) and val == INF: | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
cpdef bint isneginf_scalar(object val): | ||
if util.is_float_object(val) and val == NEGINF: | ||
return True | ||
else: | ||
return False |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these could be cimports instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ATM these are not
cdef