From 1b03b667727012347dd18696f6fc3184fd9a8e7d Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 8 Oct 2017 21:25:50 +0100 Subject: [PATCH 1/3] ERR: Raise ValueError when week is passed in to_datetime format without day or year (#16774) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/tools/datetimes.py | 10 ++++++++++ pandas/tests/indexes/datetimes/test_tools.py | 13 +++++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1e9c402dac73e..033e428bcbbb0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -701,6 +701,7 @@ Other API Changes - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) +- :func:`to_datetime` now raises a ``ValueError`` when format includes ``%W`` or ``%U`` without also including day of the week and calendar year (:issue:`16774`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) - Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index e335dfe3a4142..f4e68dc1bcc27 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -254,6 +254,16 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): require_iso8601 = not infer_datetime_format format = None + if format is not None: + if '%W' in format or '%U' in format: + if '%Y' not in format and '%y' not in format: + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + if ('%A' not in format and '%a' not in format and '%w' not + in format): + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + try: result = None diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b8ce1f0af6ea8..1157d13f91e82 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -372,6 +372,19 @@ def test_datetime_invalid_datatype(self): with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) + @pytest.mark.parametrize('date, format', + [('2017-20', '%Y-%W'), + ('20 Sunday', '%W %A'), + ('20 Sun', '%W %a'), + ('2017-21', '%Y-%U'), + ('20 Sunday', '%U %A'), + ('20 Sun', '%U %a')]) + def test_week_without_day_and_calendar_year(self, date, format): + # GH16774 + + with pytest.raises(ValueError): + pd.to_datetime(date, format=format) + class TestToDatetimeUnit(object): From c5bf6b86a4d13744576fac9a491e4c929e1ad45a Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 8 Oct 2017 23:29:45 +0100 Subject: [PATCH 2/3] Replace pytest.raises with tm.assert_raises_regex --- pandas/tests/indexes/datetimes/test_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1157d13f91e82..330ec9f357655 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -382,7 +382,8 @@ def test_datetime_invalid_datatype(self): def test_week_without_day_and_calendar_year(self, date, format): # GH16774 - with pytest.raises(ValueError): + msg = "Cannot use '%W' or '%U' without day and year" + with tm.assert_raises_regex(ValueError, msg): pd.to_datetime(date, format=format) From c2c8810f4c1ed9327285249552ee72830571a67c Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 14 Oct 2017 16:36:49 +0100 Subject: [PATCH 3/3] Moving check to strptime.pyx --- pandas/_libs/tslibs/strptime.pyx | 10 ++++++++++ pandas/core/tools/datetimes.py | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 20b24d6be9a58..59a7376280da0 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -83,6 +83,16 @@ def array_strptime(ndarray[object] values, object fmt, assert is_raise or is_ignore or is_coerce + if fmt is not None: + if '%W' in fmt or '%U' in fmt: + if '%Y' not in fmt and '%y' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + if ('%A' not in fmt and '%a' not in fmt and '%w' not + in fmt): + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + global _TimeRE_cache, _regex_cache with _cache_lock: if _getlang() != _TimeRE_cache.locale_time.lang: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index f4e68dc1bcc27..e335dfe3a4142 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -254,16 +254,6 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): require_iso8601 = not infer_datetime_format format = None - if format is not None: - if '%W' in format or '%U' in format: - if '%Y' not in format and '%y' not in format: - raise ValueError("Cannot use '%W' or '%U' without " - "day and year") - if ('%A' not in format and '%a' not in format and '%w' not - in format): - raise ValueError("Cannot use '%W' or '%U' without " - "day and year") - try: result = None