Skip to content

Commit

Permalink
Prevent passing invalid kwds to DateOffset constructors (pandas-dev#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Nov 25, 2017
1 parent 0bcd77e commit 06518b2
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 17 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Other API Changes
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)

- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).

.. _whatsnew_0220.deprecations:

Expand Down
29 changes: 28 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def _validate_business_time(t_input):
# ---------------------------------------------------------------------
# Constructor Helpers

_rd_kwds = set([
relativedelta_kwds = set([
'years', 'months', 'weeks', 'days',
'year', 'month', 'week', 'day', 'weekday',
'hour', 'minute', 'second', 'microsecond',
Expand Down Expand Up @@ -406,6 +406,33 @@ class _BaseOffset(object):
# will raise NotImplementedError.
return get_day_of_month(other, self._day_opt)

def _validate_n(self, n):
"""
Require that `n` be a nonzero integer.
Parameters
----------
n : int
Returns
-------
nint : int
Raises
------
TypeError if `int(n)` raises
ValueError if n != int(n)
"""
try:
nint = int(n)
except (ValueError, TypeError):
raise TypeError('`n` argument must be an integer, '
'got {ntype}'.format(ntype=type(n)))
if n != nint:
raise ValueError('`n` argument must be an integer, '
'got {n}'.format(n=n))
return nint


class BaseOffset(_BaseOffset):
# Here we add __rfoo__ methods that don't play well with cdef classes
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/tseries/offsets/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ def offset_types(request):
return request.param


@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if
issubclass(getattr(offsets, o), offsets.MonthOffset)
and o != 'MonthOffset'])
def month_classes(request):
return request.param


@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if
issubclass(getattr(offsets, o), offsets.Tick)])
def tick_classes(request):
return request.param


@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'])
def tz(request):
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
get_offset, get_standard_freq)
from pandas.core.indexes.datetimes import (
_to_m8, DatetimeIndex, _daterange_cache)
import pandas._libs.tslibs.offsets as liboffsets
from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset
from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd,
BusinessHour, WeekOfMonth, CBMonthEnd,
Expand Down Expand Up @@ -4682,9 +4683,45 @@ def test_all_offset_classes(self, tup):
assert first == second


# ---------------------------------------------------------------------
def test_get_offset_day_error():
# subclass of _BaseOffset must override _day_opt attribute, or we should
# get a NotImplementedError

with pytest.raises(NotImplementedError):
DateOffset()._get_offset_day(datetime.now())


@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds)))
def test_valid_month_attributes(kwd, month_classes):
# GH#18226
cls = month_classes
# check that we cannot create e.g. MonthEnd(weeks=3)
with pytest.raises(TypeError):
cls(**{kwd: 3})


@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds)))
def test_valid_tick_attributes(kwd, tick_classes):
# GH#18226
cls = tick_classes
# check that we cannot create e.g. Hour(weeks=3)
with pytest.raises(TypeError):
cls(**{kwd: 3})


def test_validate_n_error():
with pytest.raises(TypeError):
DateOffset(n='Doh!')

with pytest.raises(TypeError):
MonthBegin(n=timedelta(1))

with pytest.raises(TypeError):
BDay(n=np.array([1, 2], dtype=np.int64))


def test_require_integers(offset_types):
cls = offset_types
with pytest.raises(ValueError):
cls(n=1.5)
48 changes: 33 additions & 15 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
from datetime import date, datetime, timedelta
import functools
import operator

from datetime import date, datetime, timedelta
from pandas.compat import range
from pandas import compat
import numpy as np
Expand Down Expand Up @@ -166,7 +166,7 @@ def __add__(date):
normalize = False

def __init__(self, n=1, normalize=False, **kwds):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
self.kwds = kwds

Expand Down Expand Up @@ -473,7 +473,7 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset):
_adjust_dst = True

def __init__(self, n=1, normalize=False, offset=timedelta(0)):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
self.kwds = {'offset': offset}
self._offset = offset
Expand Down Expand Up @@ -782,7 +782,7 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset):

def __init__(self, n=1, normalize=False, start='09:00',
end='17:00', offset=timedelta(0)):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
super(BusinessHour, self).__init__(start=start, end=end, offset=offset)

Expand Down Expand Up @@ -819,7 +819,7 @@ class CustomBusinessDay(BusinessDay):

def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
holidays=None, calendar=None, offset=timedelta(0)):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
self._offset = offset
self.kwds = {}
Expand Down Expand Up @@ -887,7 +887,7 @@ class CustomBusinessHour(BusinessHourMixin, SingleConstructorOffset):
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
holidays=None, calendar=None,
start='09:00', end='17:00', offset=timedelta(0)):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
super(CustomBusinessHour, self).__init__(start=start,
end=end, offset=offset)
Expand Down Expand Up @@ -919,6 +919,11 @@ def next_bday(self):
class MonthOffset(SingleConstructorOffset):
_adjust_dst = True

def __init__(self, n=1, normalize=False):
self.n = self._validate_n(n)
self.normalize = normalize
self.kwds = {}

@property
def name(self):
if self.isAnchored:
Expand Down Expand Up @@ -994,7 +999,8 @@ def __init__(self, n=1, normalize=False, day_of_month=None):
msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}'
raise ValueError(msg.format(min=self._min_day_of_month,
day=self.day_of_month))
self.n = int(n)

self.n = self._validate_n(n)
self.normalize = normalize
self.kwds = {'day_of_month': self.day_of_month}

Expand Down Expand Up @@ -1205,7 +1211,7 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset):

def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
holidays=None, calendar=None, offset=timedelta(0)):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
self._offset = offset
self.kwds = {}
Expand Down Expand Up @@ -1278,7 +1284,7 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset):

def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
holidays=None, calendar=None, offset=timedelta(0)):
self.n = int(n)
self.n = self._validate_n(n)
self.normalize = normalize
self._offset = offset
self.kwds = {}
Expand Down Expand Up @@ -1345,7 +1351,7 @@ class Week(EndMixin, DateOffset):
_prefix = 'W'

def __init__(self, n=1, normalize=False, weekday=None):
self.n = n
self.n = self._validate_n(n)
self.normalize = normalize
self.weekday = weekday

Expand Down Expand Up @@ -1424,7 +1430,7 @@ class WeekOfMonth(DateOffset):
_adjust_dst = True

def __init__(self, n=1, normalize=False, week=None, weekday=None):
self.n = n
self.n = self._validate_n(n)
self.normalize = normalize
self.weekday = weekday
self.week = week
Expand Down Expand Up @@ -1509,7 +1515,7 @@ class LastWeekOfMonth(DateOffset):
_prefix = 'LWOM'

def __init__(self, n=1, normalize=False, weekday=None):
self.n = n
self.n = self._validate_n(n)
self.normalize = normalize
self.weekday = weekday

Expand Down Expand Up @@ -1575,7 +1581,7 @@ class QuarterOffset(DateOffset):
# point

def __init__(self, n=1, normalize=False, startingMonth=None):
self.n = n
self.n = self._validate_n(n)
self.normalize = normalize
if startingMonth is None:
startingMonth = self._default_startingMonth
Expand Down Expand Up @@ -1820,7 +1826,7 @@ class FY5253(DateOffset):

def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1,
variation="nearest"):
self.n = n
self.n = self._validate_n(n)
self.normalize = normalize
self.startingMonth = startingMonth
self.weekday = weekday
Expand Down Expand Up @@ -2032,7 +2038,7 @@ class FY5253Quarter(DateOffset):

def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1,
qtr_with_extra_week=1, variation="nearest"):
self.n = n
self.n = self._validate_n(n)
self.normalize = normalize

self.weekday = weekday
Expand Down Expand Up @@ -2158,6 +2164,11 @@ class Easter(DateOffset):
"""
_adjust_dst = True

def __init__(self, n=1, normalize=False):
self.n = self._validate_n(n)
self.normalize = normalize
self.kwds = {}

@apply_wraps
def apply(self, other):
current_easter = easter(other.year)
Expand Down Expand Up @@ -2199,6 +2210,12 @@ class Tick(SingleConstructorOffset):
_inc = Timedelta(microseconds=1000)
_prefix = 'undefined'

def __init__(self, n=1, normalize=False):
# TODO: do Tick classes with normalize=True make sense?
self.n = self._validate_n(n)
self.normalize = normalize
self.kwds = {}

__gt__ = _tick_comp(operator.gt)
__ge__ = _tick_comp(operator.ge)
__lt__ = _tick_comp(operator.lt)
Expand Down Expand Up @@ -2257,6 +2274,7 @@ def delta(self):
def nanos(self):
return delta_to_nanoseconds(self.delta)

# TODO: Should Tick have its own apply_index?
def apply(self, other):
# Timestamp can handle tz and nano sec, thus no need to use apply_wraps
if isinstance(other, Timestamp):
Expand Down

0 comments on commit 06518b2

Please sign in to comment.