Skip to content

Commit

Permalink
Merge branch 'master' into mask_pos_args_deprecation
Browse files Browse the repository at this point in the history
  • Loading branch information
ShreyDixit authored May 20, 2021
2 parents 6e72701 + a246270 commit 56e1757
Show file tree
Hide file tree
Showing 67 changed files with 1,127 additions and 886 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ Other enhancements
- Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
-
- :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -649,6 +649,7 @@ Deprecations
- Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Deprecated passing arguments (apart from ``cond``) as positional in :meth:`DataFrame.mask` (:issue:`41485`)
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)

.. ---------------------------------------------------------------------------
Expand Down
19 changes: 14 additions & 5 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def is_integer(val: object) -> bool: ...
def is_float(val: object) -> bool: ...

def is_interval_array(values: np.ndarray) -> bool: ...
def is_period_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray) -> bool: ...
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
Expand All @@ -67,50 +66,60 @@ def map_infer(
@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = ...,
convert_timedelta: bool = ...,
convert_period: Literal[False] = ...,
convert_to_nullable_integer: Literal[False] = ...,
) -> np.ndarray: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[False] = False,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[False] = ...,
convert_period: bool = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: Literal[True] = ...,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
convert_period: Literal[True] = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

@overload
def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
*,
try_float: bool = ...,
safe: bool = ...,
convert_datetime: bool = ...,
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: bool = ...,
) -> ArrayLike: ...

Expand Down
77 changes: 54 additions & 23 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,7 @@ cdef class Seen:
bint coerce_numeric # coerce data to numeric
bint timedelta_ # seen_timedelta
bint datetimetz_ # seen_datetimetz
bint period_ # seen_period

def __cinit__(self, bint coerce_numeric=False):
"""
Expand All @@ -1210,6 +1211,7 @@ cdef class Seen:
self.datetime_ = False
self.timedelta_ = False
self.datetimetz_ = False
self.period_ = False
self.coerce_numeric = coerce_numeric

cdef inline bint check_uint64_conflict(self) except -1:
Expand Down Expand Up @@ -1996,18 +1998,35 @@ cpdef bint is_time_array(ndarray values, bint skipna=False):
return validator.validate(values)


cdef class PeriodValidator(TemporalValidator):
cdef inline bint is_value_typed(self, object value) except -1:
return is_period_object(value)
cdef bint is_period_array(ndarray[object] values):
"""
Is this an ndarray of Period objects (or NaT) with a single `freq`?
"""
cdef:
Py_ssize_t i, n = len(values)
int dtype_code = -10000 # i.e. c_FreqGroup.FR_UND
object val

cdef inline bint is_valid_null(self, object value) except -1:
return checknull_with_nat(value)
if len(values) == 0:
return False

for val in values:
if is_period_object(val):
if dtype_code == -10000:
dtype_code = val._dtype._dtype_code
elif dtype_code != val._dtype._dtype_code:
# mismatched freqs
return False
elif checknull_with_nat(val):
pass
else:
# Not a Period or NaT-like
return False

cpdef bint is_period_array(ndarray values):
cdef:
PeriodValidator validator = PeriodValidator(len(values), skipna=True)
return validator.validate(values)
if dtype_code == -10000:
# we saw all-NaTs, no actual Periods
return False
return True


cdef class IntervalValidator(Validator):
Expand Down Expand Up @@ -2249,9 +2268,13 @@ def maybe_convert_numeric(

@cython.boundscheck(False)
@cython.wraparound(False)
def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
bint safe=False, bint convert_datetime=False,
def maybe_convert_objects(ndarray[object] objects,
*,
bint try_float=False,
bint safe=False,
bint convert_datetime=False,
bint convert_timedelta=False,
bint convert_period=False,
bint convert_to_nullable_integer=False) -> "ArrayLike":
"""
Type inference function-- convert object array to proper dtype
Expand All @@ -2272,6 +2295,9 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
convert_timedelta : bool, default False
If an array-like object contains only timedelta values or NaT is
encountered, whether to convert and return an array of m8[ns] dtype.
convert_period : bool, default False
If an array-like object contains only (homogeneous-freq) Period values
or NaT, whether to convert and return a PeriodArray.
convert_to_nullable_integer : bool, default False
If an array-like object contains only integer values (and NaN) is
encountered, whether to convert and return an IntegerArray.
Expand All @@ -2292,7 +2318,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
int64_t[:] itimedeltas
Seen seen = Seen()
object val
float64_t fval, fnan
float64_t fval, fnan = np.nan

n = len(objects)

Expand All @@ -2311,8 +2337,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
timedeltas = np.empty(n, dtype='m8[ns]')
itimedeltas = timedeltas.view(np.int64)

fnan = np.nan

for i in range(n):
val = objects[i]
if itemsize_max != -1:
Expand All @@ -2330,7 +2354,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
idatetimes[i] = NPY_NAT
if convert_timedelta:
itimedeltas[i] = NPY_NAT
if not (convert_datetime or convert_timedelta):
if not (convert_datetime or convert_timedelta or convert_period):
seen.object_ = True
break
elif val is np.nan:
Expand All @@ -2343,14 +2367,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
elif util.is_float_object(val):
floats[i] = complexes[i] = val
seen.float_ = True
elif util.is_datetime64_object(val):
if convert_datetime:
idatetimes[i] = convert_to_tsobject(
val, None, None, 0, 0).value
seen.datetime_ = True
else:
seen.object_ = True
break
elif is_timedelta(val):
if convert_timedelta:
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
Expand Down Expand Up @@ -2396,6 +2412,13 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
else:
seen.object_ = True
break
elif is_period_object(val):
if convert_period:
seen.period_ = True
break
else:
seen.object_ = True
break
elif try_float and not isinstance(val, str):
# this will convert Decimal objects
try:
Expand All @@ -2419,6 +2442,14 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
return dti._data
seen.object_ = True

if seen.period_:
if is_period_array(objects):
from pandas import PeriodIndex
pi = PeriodIndex(objects)

# unbox to PeriodArray
return pi._data

if not seen.object_:
result = None
if not safe:
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,10 @@ def array_with_unit_to_datetime(
if issubclass(values.dtype.type, (np.integer, np.float_)):
result = values.astype("M8[ns]", copy=False)
else:
result, tz = array_to_datetime(values.astype(object), errors=errors)
result, tz = array_to_datetime(
values.astype(object, copy=False),
errors=errors,
)
return result, tz

m, p = precision_from_unit(unit)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def _box_values(self, values) -> np.ndarray:
"""
apply box func to passed values
"""
return lib.map_infer(values, self._box_func)
return lib.map_infer(values, self._box_func, convert=False)

def __iter__(self):
if self.ndim > 1:
Expand Down Expand Up @@ -599,7 +599,9 @@ def _validate_shift_value(self, fill_value):
"will raise in a future version, pass "
f"{self._scalar_type.__name__} instead.",
FutureWarning,
stacklevel=8,
# There is no way to hard-code the level since this might be
# reached directly or called from the Index or Block method
stacklevel=find_stack_level(),
)
fill_value = new_fill

Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,7 @@ def to_perioddelta(self, freq) -> TimedeltaArray:
"future version. "
"Use `dtindex - dtindex.to_period(freq).to_timestamp()` instead",
FutureWarning,
# stacklevel chosen to be correct for when called from DatetimeIndex
stacklevel=3,
)
from pandas.core.arrays.timedeltas import TimedeltaArray
Expand Down
1 change: 1 addition & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def _selected_obj(self):
else:
return self.obj[self._selection]

@final
@cache_readonly
def ndim(self) -> int:
return self._selected_obj.ndim
Expand Down
14 changes: 9 additions & 5 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,17 +682,21 @@ def _try_cast(
subarr = construct_1d_object_array_from_listlike(arr)
return subarr

if dtype is None and isinstance(arr, list):
# filter out cases that we _dont_ want to go through maybe_cast_to_datetime
varr = np.array(arr, copy=False)
if varr.dtype != object or varr.size == 0:
return varr
arr = varr

try:
# GH#15832: Check if we are requesting a numeric dtype and
# that we can convert the data to the requested dtype.
if is_integer_dtype(dtype):
# this will raise if we have e.g. floats

# error: Argument 2 to "maybe_cast_to_integer_array" has incompatible type
# "Union[dtype, ExtensionDtype, None]"; expected "Union[ExtensionDtype, str,
# dtype, Type[str], Type[float], Type[int], Type[complex], Type[bool],
# Type[object]]"
maybe_cast_to_integer_array(arr, dtype) # type: ignore[arg-type]
dtype = cast(np.dtype, dtype)
maybe_cast_to_integer_array(arr, dtype)
subarr = arr
else:
subarr = maybe_cast_to_datetime(arr, dtype)
Expand Down
Loading

0 comments on commit 56e1757

Please sign in to comment.