Skip to content

Commit

Permalink
[3.11] [3.12] bpo-42663: Fix parsing TZ strings in zoneinfo module (G…
Browse files Browse the repository at this point in the history
…H-23825) (GH-110882) (GH-110889)

zipinfo now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
(cherry picked from commit ab08ff7)
(cherry picked from commit 72b0f0e)
  • Loading branch information
serhiy-storchaka authored Oct 15, 2023
1 parent 44558a9 commit 5c55f50
Show file tree
Hide file tree
Showing 4 changed files with 327 additions and 259 deletions.
125 changes: 118 additions & 7 deletions Lib/test/test_zoneinfo/test_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,23 +988,114 @@ def test_tzstr_from_utc(self):

self.assertEqual(dt_act, dt_utc)

def test_extreme_tzstr(self):
tzstrs = [
# Extreme offset hour
"AAA24",
"AAA+24",
"AAA-24",
"AAA24BBB,J60/2,J300/2",
"AAA+24BBB,J60/2,J300/2",
"AAA-24BBB,J60/2,J300/2",
"AAA4BBB24,J60/2,J300/2",
"AAA4BBB+24,J60/2,J300/2",
"AAA4BBB-24,J60/2,J300/2",
# Extreme offset minutes
"AAA4:00BBB,J60/2,J300/2",
"AAA4:59BBB,J60/2,J300/2",
"AAA4BBB5:00,J60/2,J300/2",
"AAA4BBB5:59,J60/2,J300/2",
# Extreme offset seconds
"AAA4:00:00BBB,J60/2,J300/2",
"AAA4:00:59BBB,J60/2,J300/2",
"AAA4BBB5:00:00,J60/2,J300/2",
"AAA4BBB5:00:59,J60/2,J300/2",
# Extreme total offset
"AAA24:59:59BBB5,J60/2,J300/2",
"AAA-24:59:59BBB5,J60/2,J300/2",
"AAA4BBB24:59:59,J60/2,J300/2",
"AAA4BBB-24:59:59,J60/2,J300/2",
# Extreme months
"AAA4BBB,M12.1.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M12.1.1/2",
# Extreme weeks
"AAA4BBB,M1.5.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M1.5.1/2",
# Extreme weekday
"AAA4BBB,M1.1.6/2,M2.1.1/2",
"AAA4BBB,M1.1.1/2,M2.1.6/2",
# Extreme numeric offset
"AAA4BBB,0/2,20/2",
"AAA4BBB,0/2,0/14",
"AAA4BBB,20/2,365/2",
"AAA4BBB,365/2,365/14",
# Extreme julian offset
"AAA4BBB,J1/2,J20/2",
"AAA4BBB,J1/2,J1/14",
"AAA4BBB,J20/2,J365/2",
"AAA4BBB,J365/2,J365/14",
# Extreme transition hour
"AAA4BBB,J60/167,J300/2",
"AAA4BBB,J60/+167,J300/2",
"AAA4BBB,J60/-167,J300/2",
"AAA4BBB,J60/2,J300/167",
"AAA4BBB,J60/2,J300/+167",
"AAA4BBB,J60/2,J300/-167",
# Extreme transition minutes
"AAA4BBB,J60/2:00,J300/2",
"AAA4BBB,J60/2:59,J300/2",
"AAA4BBB,J60/2,J300/2:00",
"AAA4BBB,J60/2,J300/2:59",
# Extreme transition seconds
"AAA4BBB,J60/2:00:00,J300/2",
"AAA4BBB,J60/2:00:59,J300/2",
"AAA4BBB,J60/2,J300/2:00:00",
"AAA4BBB,J60/2,J300/2:00:59",
# Extreme total transition time
"AAA4BBB,J60/167:59:59,J300/2",
"AAA4BBB,J60/-167:59:59,J300/2",
"AAA4BBB,J60/2,J300/167:59:59",
"AAA4BBB,J60/2,J300/-167:59:59",
]

for tzstr in tzstrs:
with self.subTest(tzstr=tzstr):
self.zone_from_tzstr(tzstr)

def test_invalid_tzstr(self):
invalid_tzstrs = [
"PST8PDT", # DST but no transition specified
"+11", # Unquoted alphanumeric
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
"PST8PDT,M3.2.0/2", # Only one transition rule
# Invalid offsets
"STD+25",
"STD-25",
"STD+374",
"STD+374DST,M3.2.0/2,M11.1.0/3",
"STD+23DST+25,M3.2.0/2,M11.1.0/3",
"STD-23DST-25,M3.2.0/2,M11.1.0/3",
# Invalid offset hours
"AAA168",
"AAA+168",
"AAA-168",
"AAA168BBB,J60/2,J300/2",
"AAA+168BBB,J60/2,J300/2",
"AAA-168BBB,J60/2,J300/2",
"AAA4BBB168,J60/2,J300/2",
"AAA4BBB+168,J60/2,J300/2",
"AAA4BBB-168,J60/2,J300/2",
# Invalid offset minutes
"AAA4:0BBB,J60/2,J300/2",
"AAA4:100BBB,J60/2,J300/2",
"AAA4BBB5:0,J60/2,J300/2",
"AAA4BBB5:100,J60/2,J300/2",
# Invalid offset seconds
"AAA4:00:0BBB,J60/2,J300/2",
"AAA4:00:100BBB,J60/2,J300/2",
"AAA4BBB5:00:0,J60/2,J300/2",
"AAA4BBB5:00:100,J60/2,J300/2",
# Completely invalid dates
"AAA4BBB,M1443339,M11.1.0/3",
"AAA4BBB,M3.2.0/2,0349309483959c",
"AAA4BBB,,J300/2",
"AAA4BBB,z,J300/2",
"AAA4BBB,J60/2,",
"AAA4BBB,J60/2,z",
# Invalid months
"AAA4BBB,M13.1.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M13.1.1/2",
Expand All @@ -1024,6 +1115,26 @@ def test_invalid_tzstr(self):
# Invalid julian offset
"AAA4BBB,J0/2,J20/2",
"AAA4BBB,J20/2,J366/2",
# Invalid transition time
"AAA4BBB,J60/2/3,J300/2",
"AAA4BBB,J60/2,J300/2/3",
# Invalid transition hour
"AAA4BBB,J60/168,J300/2",
"AAA4BBB,J60/+168,J300/2",
"AAA4BBB,J60/-168,J300/2",
"AAA4BBB,J60/2,J300/168",
"AAA4BBB,J60/2,J300/+168",
"AAA4BBB,J60/2,J300/-168",
# Invalid transition minutes
"AAA4BBB,J60/2:0,J300/2",
"AAA4BBB,J60/2:100,J300/2",
"AAA4BBB,J60/2,J300/2:0",
"AAA4BBB,J60/2,J300/2:100",
# Invalid transition seconds
"AAA4BBB,J60/2:00:0,J300/2",
"AAA4BBB,J60/2:00:100,J300/2",
"AAA4BBB,J60/2,J300/2:00:0",
"AAA4BBB,J60/2,J300/2:00:100",
]

for invalid_tzstr in invalid_tzstrs:
Expand Down
86 changes: 53 additions & 33 deletions Lib/zoneinfo/_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ class _DayOffset:
__slots__ = ["d", "julian", "hour", "minute", "second"]

def __init__(self, d, julian, hour=2, minute=0, second=0):
if not (0 + julian) <= d <= 365:
min_day = 0 + julian
min_day = 0 + julian # convert bool to int
if not min_day <= d <= 365:
raise ValueError(f"d must be in [{min_day}, 365], not: {d}")

self.d = d
Expand Down Expand Up @@ -560,11 +560,11 @@ class _CalendarOffset:
)

def __init__(self, m, w, d, hour=2, minute=0, second=0):
if not 0 < m <= 12:
raise ValueError("m must be in (0, 12]")
if not 1 <= m <= 12:
raise ValueError("m must be in [1, 12]")

if not 0 < w <= 5:
raise ValueError("w must be in (0, 5]")
if not 1 <= w <= 5:
raise ValueError("w must be in [1, 5]")

if not 0 <= d <= 6:
raise ValueError("d must be in [0, 6]")
Expand Down Expand Up @@ -634,18 +634,21 @@ def _parse_tz_str(tz_str):

offset_str, *start_end_str = tz_str.split(",", 1)

# fmt: off
parser_re = re.compile(
r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
r")?" + # dst
r")?$" # stdoff
r"""
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?:
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
(?:
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
)? # dst
)? # stdoff
""",
re.ASCII|re.VERBOSE
)
# fmt: on

m = parser_re.match(offset_str)
m = parser_re.fullmatch(offset_str)

if m is None:
raise ValueError(f"{tz_str} is not a valid TZ string")
Expand Down Expand Up @@ -696,16 +699,17 @@ def _parse_tz_str(tz_str):


def _parse_dst_start_end(dststr):
date, *time = dststr.split("/")
if date[0] == "M":
date, *time = dststr.split("/", 1)
type = date[:1]
if type == "M":
n_is_julian = False
m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
if m is None:
raise ValueError(f"Invalid dst start/end date: {dststr}")
date_offset = tuple(map(int, m.groups()))
offset = _CalendarOffset(*date_offset)
else:
if date[0] == "J":
if type == "J":
n_is_julian = True
date = date[1:]
else:
Expand All @@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr):
offset = _DayOffset(doy, n_is_julian)

if time:
time_components = list(map(int, time[0].split(":")))
n_components = len(time_components)
if n_components < 3:
time_components.extend([0] * (3 - n_components))
offset.hour, offset.minute, offset.second = time_components
offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])

return offset


def _parse_transition_time(time_str):
match = re.fullmatch(
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
time_str,
re.ASCII
)
if match is None:
raise ValueError(f"Invalid time: {time_str}")

h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))

if h > 167:
raise ValueError(
f"Hour must be in [0, 167]: {time_str}"
)

if match.group("sign") == "-":
h, m, s = -h, -m, -s

return h, m, s


def _parse_tz_delta(tz_delta):
match = re.match(
r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
match = re.fullmatch(
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
tz_delta,
re.ASCII
)
# Anything passed to this function should already have hit an equivalent
# regular expression to find the section to parse.
assert match is not None, tz_delta

h, m, s = (
int(v) if v is not None else 0
for v in map(match.group, ("h", "m", "s"))
)
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))

total = h * 3600 + m * 60 + s

if not -86400 < total < 86400:
if h > 24:
raise ValueError(
f"Offset must be strictly between -24h and +24h: {tz_delta}"
f"Offset hours must be in [0, 24]: {tz_delta}"
)

# Yes, +5 maps to an offset of -5h
if match.group("sign") != "-":
total *= -1
total = -total

return total
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`zipinfo` now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
Loading

0 comments on commit 5c55f50

Please sign in to comment.