From bd145c8d2b14e506609b251a11bd8268582ebc85 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 22 Nov 2017 14:52:28 -0800 Subject: [PATCH] implement libmissing; untangles _libs dependencies (#18357) --- pandas/_libs/algos.pyx | 3 +- pandas/_libs/algos_rank_helper.pxi.in | 4 +- pandas/_libs/hashtable.pyx | 2 +- pandas/_libs/hashtable_class_helper.pxi.in | 2 +- pandas/_libs/lib.pxd | 1 - pandas/_libs/lib.pyx | 124 +-------- pandas/_libs/missing.pxd | 6 + pandas/_libs/missing.pyx | 310 +++++++++++++++++++++ pandas/_libs/period.pyx | 2 +- pandas/_libs/src/inference.pyx | 16 -- pandas/_libs/src/util.pxd | 10 - pandas/_libs/tslib.pxd | 2 - pandas/_libs/tslib.pyx | 28 +- pandas/core/dtypes/missing.py | 13 +- pandas/io/formats/excel.py | 10 +- pandas/io/formats/format.py | 11 +- pandas/tests/dtypes/test_inference.py | 24 +- pandas/tests/dtypes/test_missing.py | 51 ++++ pandas/tests/test_lib.py | 52 +--- setup.py | 8 +- 20 files changed, 416 insertions(+), 263 deletions(-) create mode 100644 pandas/_libs/missing.pxd create mode 100644 pandas/_libs/missing.pyx diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index bb7f69f04b32d..a5aae6d6af656 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -32,8 +32,7 @@ from libc.math cimport sqrt, fabs # this is our util.pxd from util cimport numeric, get_nat -cimport lib -from pandas._libs import lib +import missing cdef int64_t iNaT = get_nat() diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 0945aec638b1d..78a67d2e40be2 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -83,7 +83,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, nan_value = {{neg_nan_value}} {{if dtype == 'object'}} - mask = lib.isnaobj(values) + mask = missing.isnaobj(values) {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} @@ -259,7 +259,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', nan_value = {{neg_nan_value}} {{if dtype == 'object'}} - mask = lib.isnaobj2d(values) + mask = missing.isnaobj2d(values) {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index b6b81055f89b2..4bbe8c654ea0e 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -42,7 +42,7 @@ cdef extern from "numpy/npy_math.h": cimport cython cimport numpy as cnp -from pandas._libs.lib import checknull +from missing cimport checknull cnp.import_array() cnp.import_ufunc() diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 3ef52c5c59c9d..6e1c4397810b7 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -4,7 +4,7 @@ Template for each `dtype` helper function for hashtable WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -from lib cimport is_null_datetimelike +from missing cimport is_null_datetimelike #---------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pxd b/pandas/_libs/lib.pxd index 554b0248e97ea..b06c071c358c1 100644 --- a/pandas/_libs/lib.pxd +++ b/pandas/_libs/lib.pxd @@ -1,4 +1,3 @@ # prototypes for sharing -cdef bint is_null_datetimelike(v) cpdef bint is_period(val) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6a92815ef84de..956aeaf39b021 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -53,14 +53,14 @@ PyDateTime_IMPORT from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value -from tslib cimport _check_all_nulls from tslib import NaT, Timestamp, Timedelta, array_to_datetime from interval import Interval +from missing cimport checknull cdef int64_t NPY_NAT = util.get_nat() cimport util -from util cimport is_array, _checknull, _checknan +from util cimport is_array, _checknull from libc.math cimport sqrt, fabs @@ -112,54 +112,6 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr): # ---------------------------------------------------------------------- -# isnull / notnull related - -cdef double INF = np.inf -cdef double NEGINF = -INF - - -cpdef bint checknull(object val): - if util.is_float_object(val) or util.is_complex_object(val): - return val != val # and val != INF and val != NEGINF - elif util.is_datetime64_object(val): - return get_datetime64_value(val) == NPY_NAT - elif val is NaT: - return True - elif util.is_timedelta64_object(val): - return get_timedelta64_value(val) == NPY_NAT - elif is_array(val): - return False - else: - return _checknull(val) - - -cpdef bint checknull_old(object val): - if util.is_float_object(val) or util.is_complex_object(val): - return val != val or val == INF or val == NEGINF - elif util.is_datetime64_object(val): - return get_datetime64_value(val) == NPY_NAT - elif val is NaT: - return True - elif util.is_timedelta64_object(val): - return get_timedelta64_value(val) == NPY_NAT - elif is_array(val): - return False - else: - return _checknull(val) - - -cpdef bint isposinf_scalar(object val): - if util.is_float_object(val) and val == INF: - return True - else: - return False - - -cpdef bint isneginf_scalar(object val): - if util.is_float_object(val) and val == NEGINF: - return True - else: - return False cpdef bint isscalar(object val): @@ -212,78 +164,6 @@ def item_from_zerodim(object val): return util.unbox_if_zerodim(val) -@cython.wraparound(False) -@cython.boundscheck(False) -def isnaobj(ndarray arr): - cdef Py_ssize_t i, n - cdef object val - cdef ndarray[uint8_t] result - - assert arr.ndim == 1, "'arr' must be 1-D." - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i from 0 <= i < n: - val = arr[i] - result[i] = _check_all_nulls(val) - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnaobj_old(ndarray arr): - cdef Py_ssize_t i, n - cdef object val - cdef ndarray[uint8_t] result - - assert arr.ndim == 1, "'arr' must be 1-D." - - n = len(arr) - result = np.zeros(n, dtype=np.uint8) - for i from 0 <= i < n: - val = arr[i] - result[i] = val is NaT or util._checknull_old(val) - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnaobj2d(ndarray arr): - cdef Py_ssize_t i, j, n, m - cdef object val - cdef ndarray[uint8_t, ndim=2] result - - assert arr.ndim == 2, "'arr' must be 2-D." - - n, m = ( arr).shape - result = np.zeros((n, m), dtype=np.uint8) - for i from 0 <= i < n: - for j from 0 <= j < m: - val = arr[i, j] - if checknull(val): - result[i, j] = 1 - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnaobj2d_old(ndarray arr): - cdef Py_ssize_t i, j, n, m - cdef object val - cdef ndarray[uint8_t, ndim=2] result - - assert arr.ndim == 2, "'arr' must be 2-D." - - n, m = ( arr).shape - result = np.zeros((n, m), dtype=np.uint8) - for i from 0 <= i < n: - for j from 0 <= j < m: - val = arr[i, j] - if checknull_old(val): - result[i, j] = 1 - return result.view(np.bool_) - - @cython.wraparound(False) @cython.boundscheck(False) cpdef ndarray[object] list_to_object_array(list obj): diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd new file mode 100644 index 0000000000000..f3d990db1c3fd --- /dev/null +++ b/pandas/_libs/missing.pxd @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +cdef bint is_null_datetimelike(object val) +cpdef bint checknull(object val) +cpdef bint checknull_old(object val) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx new file mode 100644 index 0000000000000..0b60fc2c5b4d1 --- /dev/null +++ b/pandas/_libs/missing.pyx @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from cpython cimport PyFloat_Check, PyComplex_Check + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np +cimport numpy as np +from numpy cimport ndarray, int64_t, uint8_t +np.import_array() + +cimport util + +from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value +from tslibs.nattype import NaT + +cdef double INF = np.inf +cdef double NEGINF = -INF + +cdef int64_t NPY_NAT = util.get_nat() + + +cdef inline bint is_null_datetimelike(object val): + # determine if we have a null for a timedelta/datetime (or integer + # versions) + if util._checknull(val): + return True + elif val is NaT: + return True + elif util.is_timedelta64_object(val): + return val.view('int64') == NPY_NAT + elif util.is_datetime64_object(val): + return val.view('int64') == NPY_NAT + elif util.is_integer_object(val): + return val == NPY_NAT + return False + + +cdef inline bint _check_all_nulls(object val): + """ utility to check if a value is any type of null """ + cdef bint res + if PyFloat_Check(val) or PyComplex_Check(val): + res = val != val + elif val is NaT: + res = 1 + elif val is None: + res = 1 + elif util.is_datetime64_object(val): + res = get_datetime64_value(val) == NPY_NAT + elif util.is_timedelta64_object(val): + res = get_timedelta64_value(val) == NPY_NAT + else: + res = 0 + return res + + +cpdef bint checknull(object val): + """ + Return boolean describing of the input is NA-like, defined here as any + of: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + val : object + + Returns + ------- + result : bool + + Notes + ----- + The difference between `checknull` and `checknull_old` is that `checknull` + does *not* consider INF or NEGINF to be NA. + """ + if util.is_float_object(val) or util.is_complex_object(val): + return val != val # and val != INF and val != NEGINF + elif util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + elif val is NaT: + return True + elif util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + elif util.is_array(val): + return False + else: + return util._checknull(val) + + +cpdef bint checknull_old(object val): + """ + Return boolean describing of the input is NA-like, defined here as any + of: + - None + - nan + - INF + - NEGINF + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + val : object + + Returns + ------- + result : bool + + Notes + ----- + The difference between `checknull` and `checknull_old` is that `checknull` + does *not* consider INF or NEGINF to be NA. + """ + if util.is_float_object(val) or util.is_complex_object(val): + return val != val or val == INF or val == NEGINF + elif util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + elif val is NaT: + return True + elif util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + elif util.is_array(val): + return False + else: + return util._checknull(val) + + +cdef inline bint _check_none_nan_inf_neginf(object val): + try: + return val is None or (PyFloat_Check(val) and + (val != val or val == INF or val == NEGINF)) + except ValueError: + return False + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj(ndarray arr): + """ + Return boolean mask denoting which elements of a 1-D array are na-like, + according to the criteria defined in `_check_all_nulls`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, n + object val + ndarray[uint8_t] result + + assert arr.ndim == 1, "'arr' must be 1-D." + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + for i in range(n): + val = arr[i] + result[i] = _check_all_nulls(val) + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj_old(ndarray arr): + """ + Return boolean mask denoting which elements of a 1-D array are na-like, + defined as being any of: + - None + - nan + - INF + - NEGINF + - NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, n + object val + ndarray[uint8_t] result + + assert arr.ndim == 1, "'arr' must be 1-D." + + n = len(arr) + result = np.zeros(n, dtype=np.uint8) + for i in range(n): + val = arr[i] + result[i] = val is NaT or _check_none_nan_inf_neginf(val) + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj2d(ndarray arr): + """ + Return boolean mask denoting which elements of a 2-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + + Notes + ----- + The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d` + does *not* consider INF or NEGINF to be NA. + """ + cdef: + Py_ssize_t i, j, n, m + object val + ndarray[uint8_t, ndim=2] result + + assert arr.ndim == 2, "'arr' must be 2-D." + + n, m = ( arr).shape + result = np.zeros((n, m), dtype=np.uint8) + for i in range(n): + for j in range(m): + val = arr[i, j] + if checknull(val): + result[i, j] = 1 + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj2d_old(ndarray arr): + """ + Return boolean mask denoting which elements of a 2-D array are na-like, + according to the criteria defined in `checknull_old`: + - None + - nan + - INF + - NEGINF + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + + Notes + ----- + The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d` + does *not* consider INF or NEGINF to be NA. + """ + cdef: + Py_ssize_t i, j, n, m + object val + ndarray[uint8_t, ndim=2] result + + assert arr.ndim == 2, "'arr' must be 2-D." + + n, m = ( arr).shape + result = np.zeros((n, m), dtype=np.uint8) + for i in range(n): + for j in range(m): + val = arr[i, j] + if checknull_old(val): + result[i, j] = 1 + return result.view(np.bool_) + + +cpdef bint isposinf_scalar(object val): + if util.is_float_object(val) and val == INF: + return True + else: + return False + + +cpdef bint isneginf_scalar(object val): + if util.is_float_object(val) and val == NEGINF: + return True + else: + return False diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 0a1d4a241b795..d09459898321e 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -25,7 +25,7 @@ from tslibs.np_datetime cimport (pandas_datetimestruct, cimport util from util cimport is_period_object, is_string_object, INT32_MIN -from lib cimport is_null_datetimelike +from missing cimport is_null_datetimelike from pandas._libs.tslib import Timestamp from tslibs.timezones cimport ( is_utc, is_tzlocal, get_utcoffset, get_dst_info) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index ad2defc7b362f..066beb29c24ce 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -533,22 +533,6 @@ cpdef object infer_datetimelike_array(object arr): return 'mixed' -cdef inline bint is_null_datetimelike(v): - # determine if we have a null for a timedelta/datetime (or integer - # versions) - if util._checknull(v): - return True - elif v is NaT: - return True - elif util.is_timedelta64_object(v): - return v.view('int64') == iNaT - elif util.is_datetime64_object(v): - return v.view('int64') == iNaT - elif util.is_integer_object(v): - return v == iNaT - return False - - cdef inline bint is_null_datetime64(v): # determine if we have a null for a datetime (or integer versions), # excluding np.timedelta64('nat') diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd index 7361aa36144c5..61783ab47cb86 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/src/util.pxd @@ -111,16 +111,6 @@ cdef inline bint _checknull(object val): except ValueError: return False -cdef inline bint _checknull_old(object val): - import numpy as np - cdef double INF = np.inf - cdef double NEGINF = -INF - try: - return val is None or (cpython.PyFloat_Check(val) and - (val != val or val == INF or val == NEGINF)) - except ValueError: - return False - cdef inline bint _checknan(object val): return not cnp.PyArray_Check(val) and val != val diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index c764f486c0b12..b74cf5b79c4cb 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -1,5 +1,3 @@ from numpy cimport ndarray, int64_t from tslibs.conversion cimport convert_to_tsobject - -cdef bint _check_all_nulls(obj) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a119e22b8e3ee..ea4f4728a0741 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -9,17 +9,13 @@ from numpy cimport int64_t, import_array, ndarray, float64_t import numpy as np -from cpython cimport ( - PyTypeObject, - PyFloat_Check, - PyComplex_Check) +from cpython cimport PyTypeObject, PyFloat_Check cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) from util cimport (is_integer_object, is_float_object, is_string_object, - is_datetime64_object, is_timedelta64_object) -cimport util + is_datetime64_object) from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_IMPORT, @@ -35,7 +31,6 @@ from tslibs.np_datetime cimport (check_dts_bounds, dt64_to_dtstruct, dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, get_datetime64_value, - get_timedelta64_value, days_per_month_table, dayofweek, is_leapyear) from tslibs.np_datetime import OutOfBoundsDatetime @@ -58,7 +53,6 @@ from tslibs.timedeltas import Timedelta from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_pytz, - get_timezone, get_dst_info) from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_datetime_to_tsobject, @@ -213,24 +207,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): return result -cdef inline bint _check_all_nulls(object val): - """ utility to check if a value is any type of null """ - cdef bint res - if PyFloat_Check(val) or PyComplex_Check(val): - res = val != val - elif val is NaT: - res = 1 - elif val is None: - res = 1 - elif is_datetime64_object(val): - res = get_datetime64_value(val) == NPY_NAT - elif is_timedelta64_object(val): - res = get_timedelta64_value(val) == NPY_NAT - else: - res = 0 - return res - - cdef PyTypeObject* ts_type = Timestamp diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d8973dd2eb27a..7cae536c5edd9 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -2,7 +2,7 @@ missing types & inference """ import numpy as np -from pandas._libs import lib +from pandas._libs import lib, missing as libmissing from pandas._libs.tslib import NaT, iNaT from .generic import (ABCMultiIndex, ABCSeries, ABCIndexClass, ABCGeneric) @@ -22,6 +22,9 @@ _NS_DTYPE) from .inference import is_list_like +isposinf_scalar = libmissing.isposinf_scalar +isneginf_scalar = libmissing.isneginf_scalar + def isna(obj): """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) @@ -50,7 +53,7 @@ def isna(obj): def _isna_new(obj): if is_scalar(obj): - return lib.checknull(obj) + return libmissing.checknull(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") @@ -76,7 +79,7 @@ def _isna_old(obj): boolean ndarray or boolean """ if is_scalar(obj): - return lib.checknull_old(obj) + return libmissing.checknull_old(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") @@ -143,7 +146,7 @@ def _isna_ndarraylike(obj): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - vec = lib.isnaobj(values.ravel()) + vec = libmissing.isnaobj(values.ravel()) result[...] = vec.reshape(shape) elif needs_i8_conversion(obj): @@ -172,7 +175,7 @@ def _isna_ndarraylike_old(obj): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - vec = lib.isnaobj_old(values.ravel()) + vec = libmissing.isnaobj_old(values.ravel()) result[:] = vec.reshape(shape) elif is_datetime64_dtype(dtype): diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index af24537cabf90..a36e82edf6e57 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -11,8 +11,8 @@ from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing from pandas.core.common import _any_not_none -from pandas.core.dtypes.common import is_float -import pandas._libs.lib as lib +from pandas.core.dtypes.common import is_float, is_scalar +from pandas.core.dtypes import missing from pandas import Index, MultiIndex, PeriodIndex from pandas.io.formats.common import get_level_lengths @@ -381,12 +381,12 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, self.inf_rep = inf_rep def _format_value(self, val): - if lib.checknull(val): + if is_scalar(val) and missing.isna(val): val = self.na_rep elif is_float(val): - if lib.isposinf_scalar(val): + if missing.isposinf_scalar(val): val = self.inf_rep - elif lib.isneginf_scalar(val): + elif missing.isneginf_scalar(val): val = '-{inf}'.format(inf=self.inf_rep) elif self.float_format is not None: val = float(self.float_format % val) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ca3b1cfb18b18..e116635c99264 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -20,6 +20,7 @@ is_datetimetz, is_integer, is_float, + is_scalar, is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype, @@ -37,7 +38,7 @@ _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths -import pandas._libs.lib as lib +from pandas._libs import lib from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, format_array_from_datetime) from pandas.core.indexes.datetimes import DatetimeIndex @@ -1860,7 +1861,7 @@ def _format_strings(self): (lambda x: pprint_thing(x, escape_chars=('\t', '\r', '\n')))) def _format(x): - if self.na_rep is not None and lib.checknull(x): + if self.na_rep is not None and is_scalar(x) and isna(x): if x is None: return 'None' elif x is pd.NaT: @@ -2186,7 +2187,7 @@ def _is_dates_only(values): def _format_datetime64(x, tz=None, nat_rep='NaT'): - if x is None or lib.checknull(x): + if x is None or (is_scalar(x) and isna(x)): return nat_rep if tz is not None or not isinstance(x, Timestamp): @@ -2196,7 +2197,7 @@ def _format_datetime64(x, tz=None, nat_rep='NaT'): def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None): - if x is None or lib.checknull(x): + if x is None or (is_scalar(x) and isna(x)): return nat_rep if not isinstance(x, Timestamp): @@ -2281,7 +2282,7 @@ def _get_format_timedelta64(values, nat_rep='NaT', box=False): format = 'long' def _formatter(x): - if x is None or lib.checknull(x): + if x is None or (is_scalar(x) and isna(x)): return nat_rep if not isinstance(x, Timedelta): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 34ed9d3142923..a96dd3c232636 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -15,7 +15,7 @@ import pytest import pandas as pd -from pandas._libs import tslib, lib +from pandas._libs import tslib, lib, missing as libmissing from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical, isna) @@ -260,17 +260,17 @@ def test_infer_dtype_bytes(self): def test_isinf_scalar(self): # GH 11352 - assert lib.isposinf_scalar(float('inf')) - assert lib.isposinf_scalar(np.inf) - assert not lib.isposinf_scalar(-np.inf) - assert not lib.isposinf_scalar(1) - assert not lib.isposinf_scalar('a') - - assert lib.isneginf_scalar(float('-inf')) - assert lib.isneginf_scalar(-np.inf) - assert not lib.isneginf_scalar(np.inf) - assert not lib.isneginf_scalar(1) - assert not lib.isneginf_scalar('a') + assert libmissing.isposinf_scalar(float('inf')) + assert libmissing.isposinf_scalar(np.inf) + assert not libmissing.isposinf_scalar(-np.inf) + assert not libmissing.isposinf_scalar(1) + assert not libmissing.isposinf_scalar('a') + + assert libmissing.isneginf_scalar(float('-inf')) + assert libmissing.isneginf_scalar(-np.inf) + assert not libmissing.isneginf_scalar(np.inf) + assert not libmissing.isneginf_scalar(1) + assert not libmissing.isneginf_scalar('a') def test_maybe_convert_numeric_infinities(self): # see gh-13274 diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index d3c9ca51af18f..fd2c63ef5b37e 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -9,6 +9,8 @@ import pandas as pd from pandas.core import config as cf from pandas.compat import u + +from pandas._libs import missing as libmissing from pandas._libs.tslib import iNaT from pandas import (NaT, Float64Index, Series, DatetimeIndex, TimedeltaIndex, date_range) @@ -333,3 +335,52 @@ def test_na_value_for_dtype(): for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) + + +class TestNAObj(object): + + _1d_methods = ['isnaobj', 'isnaobj_old'] + _2d_methods = ['isnaobj2d', 'isnaobj2d_old'] + + def _check_behavior(self, arr, expected): + for method in TestNAObj._1d_methods: + result = getattr(libmissing, method)(arr) + tm.assert_numpy_array_equal(result, expected) + + arr = np.atleast_2d(arr) + expected = np.atleast_2d(expected) + + for method in TestNAObj._2d_methods: + result = getattr(libmissing, method)(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_basic(self): + arr = np.array([1, None, 'foo', -5.1, pd.NaT, np.nan]) + expected = np.array([False, True, False, False, True, True]) + + self._check_behavior(arr, expected) + + def test_non_obj_dtype(self): + arr = np.array([1, 3, np.nan, 5], dtype=float) + expected = np.array([False, False, True, False]) + + self._check_behavior(arr, expected) + + def test_empty_arr(self): + arr = np.array([]) + expected = np.array([], dtype=bool) + + self._check_behavior(arr, expected) + + def test_empty_str_inp(self): + arr = np.array([""]) # empty but not na + expected = np.array([False]) + + self._check_behavior(arr, expected) + + def test_empty_like(self): + # see gh-13717: no segfaults! + arr = np.empty_like([None]) + expected = np.array([True]) + + self._check_behavior(arr, expected) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 75aa9aa4e8198..10061204df42a 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -3,8 +3,7 @@ import pytest import numpy as np -import pandas as pd -import pandas._libs.lib as lib +from pandas._libs import lib import pandas.util.testing as tm @@ -199,52 +198,3 @@ def test_get_reverse_indexer(self): result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) - - -class TestNAObj(object): - - _1d_methods = ['isnaobj', 'isnaobj_old'] - _2d_methods = ['isnaobj2d', 'isnaobj2d_old'] - - def _check_behavior(self, arr, expected): - for method in TestNAObj._1d_methods: - result = getattr(lib, method)(arr) - tm.assert_numpy_array_equal(result, expected) - - arr = np.atleast_2d(arr) - expected = np.atleast_2d(expected) - - for method in TestNAObj._2d_methods: - result = getattr(lib, method)(arr) - tm.assert_numpy_array_equal(result, expected) - - def test_basic(self): - arr = np.array([1, None, 'foo', -5.1, pd.NaT, np.nan]) - expected = np.array([False, True, False, False, True, True]) - - self._check_behavior(arr, expected) - - def test_non_obj_dtype(self): - arr = np.array([1, 3, np.nan, 5], dtype=float) - expected = np.array([False, False, True, False]) - - self._check_behavior(arr, expected) - - def test_empty_arr(self): - arr = np.array([]) - expected = np.array([], dtype=bool) - - self._check_behavior(arr, expected) - - def test_empty_str_inp(self): - arr = np.array([""]) # empty but not na - expected = np.array([False]) - - self._check_behavior(arr, expected) - - def test_empty_like(self): - # see gh-13717: no segfaults! - arr = np.empty_like([None]) - expected = np.array([True]) - - self._check_behavior(arr, expected) diff --git a/setup.py b/setup.py index c5ec33b8d5064..7e56298d1b20b 100755 --- a/setup.py +++ b/setup.py @@ -338,6 +338,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/indexing.pyx', 'pandas/_libs/interval.pyx', 'pandas/_libs/hashing.pyx', + 'pandas/_libs/missing.pyx', 'pandas/_libs/testing.pyx', 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', @@ -495,7 +496,7 @@ def pxd(name): 'pyxfile': '_libs/hashing'}, '_libs.hashtable': { 'pyxfile': '_libs/hashtable', - 'pxdfiles': ['_libs/hashtable'], + 'pxdfiles': ['_libs/hashtable', '_libs/missing'], 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.index': { @@ -515,7 +516,12 @@ def pxd(name): 'depends': _pxi_dep['join']}, '_libs.lib': { 'pyxfile': '_libs/lib', + 'pxdfiles': ['_libs/src/util', '_libs/missing'], 'depends': lib_depends + tseries_depends}, + '_libs.missing': { + 'pyxfile': '_libs/missing', + 'pxdfiles': ['_libs/src/util'], + 'depends': tseries_depends}, '_libs.parsers': { 'pyxfile': '_libs/parsers', 'depends': ['pandas/_libs/src/parser/tokenizer.h',