Skip to content

Commit

Permalink
bpo-42663: Fix parsing TZ strings in zoneinfo module (pythonGH-23825)
Browse files Browse the repository at this point in the history
zipinfo now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
  • Loading branch information
serhiy-storchaka authored Oct 14, 2023
1 parent 12deda7 commit ab08ff7
Show file tree
Hide file tree
Showing 4 changed files with 326 additions and 258 deletions.
125 changes: 118 additions & 7 deletions Lib/test/test_zoneinfo/test_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,23 +1001,114 @@ def test_tzstr_from_utc(self):

self.assertEqual(dt_act, dt_utc)

def test_extreme_tzstr(self):
tzstrs = [
# Extreme offset hour
"AAA24",
"AAA+24",
"AAA-24",
"AAA24BBB,J60/2,J300/2",
"AAA+24BBB,J60/2,J300/2",
"AAA-24BBB,J60/2,J300/2",
"AAA4BBB24,J60/2,J300/2",
"AAA4BBB+24,J60/2,J300/2",
"AAA4BBB-24,J60/2,J300/2",
# Extreme offset minutes
"AAA4:00BBB,J60/2,J300/2",
"AAA4:59BBB,J60/2,J300/2",
"AAA4BBB5:00,J60/2,J300/2",
"AAA4BBB5:59,J60/2,J300/2",
# Extreme offset seconds
"AAA4:00:00BBB,J60/2,J300/2",
"AAA4:00:59BBB,J60/2,J300/2",
"AAA4BBB5:00:00,J60/2,J300/2",
"AAA4BBB5:00:59,J60/2,J300/2",
# Extreme total offset
"AAA24:59:59BBB5,J60/2,J300/2",
"AAA-24:59:59BBB5,J60/2,J300/2",
"AAA4BBB24:59:59,J60/2,J300/2",
"AAA4BBB-24:59:59,J60/2,J300/2",
# Extreme months
"AAA4BBB,M12.1.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M12.1.1/2",
# Extreme weeks
"AAA4BBB,M1.5.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M1.5.1/2",
# Extreme weekday
"AAA4BBB,M1.1.6/2,M2.1.1/2",
"AAA4BBB,M1.1.1/2,M2.1.6/2",
# Extreme numeric offset
"AAA4BBB,0/2,20/2",
"AAA4BBB,0/2,0/14",
"AAA4BBB,20/2,365/2",
"AAA4BBB,365/2,365/14",
# Extreme julian offset
"AAA4BBB,J1/2,J20/2",
"AAA4BBB,J1/2,J1/14",
"AAA4BBB,J20/2,J365/2",
"AAA4BBB,J365/2,J365/14",
# Extreme transition hour
"AAA4BBB,J60/167,J300/2",
"AAA4BBB,J60/+167,J300/2",
"AAA4BBB,J60/-167,J300/2",
"AAA4BBB,J60/2,J300/167",
"AAA4BBB,J60/2,J300/+167",
"AAA4BBB,J60/2,J300/-167",
# Extreme transition minutes
"AAA4BBB,J60/2:00,J300/2",
"AAA4BBB,J60/2:59,J300/2",
"AAA4BBB,J60/2,J300/2:00",
"AAA4BBB,J60/2,J300/2:59",
# Extreme transition seconds
"AAA4BBB,J60/2:00:00,J300/2",
"AAA4BBB,J60/2:00:59,J300/2",
"AAA4BBB,J60/2,J300/2:00:00",
"AAA4BBB,J60/2,J300/2:00:59",
# Extreme total transition time
"AAA4BBB,J60/167:59:59,J300/2",
"AAA4BBB,J60/-167:59:59,J300/2",
"AAA4BBB,J60/2,J300/167:59:59",
"AAA4BBB,J60/2,J300/-167:59:59",
]

for tzstr in tzstrs:
with self.subTest(tzstr=tzstr):
self.zone_from_tzstr(tzstr)

def test_invalid_tzstr(self):
invalid_tzstrs = [
"PST8PDT", # DST but no transition specified
"+11", # Unquoted alphanumeric
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
"PST8PDT,M3.2.0/2", # Only one transition rule
# Invalid offsets
"STD+25",
"STD-25",
"STD+374",
"STD+374DST,M3.2.0/2,M11.1.0/3",
"STD+23DST+25,M3.2.0/2,M11.1.0/3",
"STD-23DST-25,M3.2.0/2,M11.1.0/3",
# Invalid offset hours
"AAA168",
"AAA+168",
"AAA-168",
"AAA168BBB,J60/2,J300/2",
"AAA+168BBB,J60/2,J300/2",
"AAA-168BBB,J60/2,J300/2",
"AAA4BBB168,J60/2,J300/2",
"AAA4BBB+168,J60/2,J300/2",
"AAA4BBB-168,J60/2,J300/2",
# Invalid offset minutes
"AAA4:0BBB,J60/2,J300/2",
"AAA4:100BBB,J60/2,J300/2",
"AAA4BBB5:0,J60/2,J300/2",
"AAA4BBB5:100,J60/2,J300/2",
# Invalid offset seconds
"AAA4:00:0BBB,J60/2,J300/2",
"AAA4:00:100BBB,J60/2,J300/2",
"AAA4BBB5:00:0,J60/2,J300/2",
"AAA4BBB5:00:100,J60/2,J300/2",
# Completely invalid dates
"AAA4BBB,M1443339,M11.1.0/3",
"AAA4BBB,M3.2.0/2,0349309483959c",
"AAA4BBB,,J300/2",
"AAA4BBB,z,J300/2",
"AAA4BBB,J60/2,",
"AAA4BBB,J60/2,z",
# Invalid months
"AAA4BBB,M13.1.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M13.1.1/2",
Expand All @@ -1037,6 +1128,26 @@ def test_invalid_tzstr(self):
# Invalid julian offset
"AAA4BBB,J0/2,J20/2",
"AAA4BBB,J20/2,J366/2",
# Invalid transition time
"AAA4BBB,J60/2/3,J300/2",
"AAA4BBB,J60/2,J300/2/3",
# Invalid transition hour
"AAA4BBB,J60/168,J300/2",
"AAA4BBB,J60/+168,J300/2",
"AAA4BBB,J60/-168,J300/2",
"AAA4BBB,J60/2,J300/168",
"AAA4BBB,J60/2,J300/+168",
"AAA4BBB,J60/2,J300/-168",
# Invalid transition minutes
"AAA4BBB,J60/2:0,J300/2",
"AAA4BBB,J60/2:100,J300/2",
"AAA4BBB,J60/2,J300/2:0",
"AAA4BBB,J60/2,J300/2:100",
# Invalid transition seconds
"AAA4BBB,J60/2:00:0,J300/2",
"AAA4BBB,J60/2:00:100,J300/2",
"AAA4BBB,J60/2,J300/2:00:0",
"AAA4BBB,J60/2,J300/2:00:100",
]

for invalid_tzstr in invalid_tzstrs:
Expand Down
86 changes: 53 additions & 33 deletions Lib/zoneinfo/_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ class _DayOffset:
__slots__ = ["d", "julian", "hour", "minute", "second"]

def __init__(self, d, julian, hour=2, minute=0, second=0):
if not (0 + julian) <= d <= 365:
min_day = 0 + julian
min_day = 0 + julian # convert bool to int
if not min_day <= d <= 365:
raise ValueError(f"d must be in [{min_day}, 365], not: {d}")

self.d = d
Expand Down Expand Up @@ -560,11 +560,11 @@ class _CalendarOffset:
)

def __init__(self, m, w, d, hour=2, minute=0, second=0):
if not 0 < m <= 12:
raise ValueError("m must be in (0, 12]")
if not 1 <= m <= 12:
raise ValueError("m must be in [1, 12]")

if not 0 < w <= 5:
raise ValueError("w must be in (0, 5]")
if not 1 <= w <= 5:
raise ValueError("w must be in [1, 5]")

if not 0 <= d <= 6:
raise ValueError("d must be in [0, 6]")
Expand Down Expand Up @@ -634,18 +634,21 @@ def _parse_tz_str(tz_str):

offset_str, *start_end_str = tz_str.split(",", 1)

# fmt: off
parser_re = re.compile(
r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
r")?" + # dst
r")?$" # stdoff
r"""
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?:
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
(?:
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
)? # dst
)? # stdoff
""",
re.ASCII|re.VERBOSE
)
# fmt: on

m = parser_re.match(offset_str)
m = parser_re.fullmatch(offset_str)

if m is None:
raise ValueError(f"{tz_str} is not a valid TZ string")
Expand Down Expand Up @@ -696,16 +699,17 @@ def _parse_tz_str(tz_str):


def _parse_dst_start_end(dststr):
date, *time = dststr.split("/")
if date[0] == "M":
date, *time = dststr.split("/", 1)
type = date[:1]
if type == "M":
n_is_julian = False
m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
if m is None:
raise ValueError(f"Invalid dst start/end date: {dststr}")
date_offset = tuple(map(int, m.groups()))
offset = _CalendarOffset(*date_offset)
else:
if date[0] == "J":
if type == "J":
n_is_julian = True
date = date[1:]
else:
Expand All @@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr):
offset = _DayOffset(doy, n_is_julian)

if time:
time_components = list(map(int, time[0].split(":")))
n_components = len(time_components)
if n_components < 3:
time_components.extend([0] * (3 - n_components))
offset.hour, offset.minute, offset.second = time_components
offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])

return offset


def _parse_transition_time(time_str):
match = re.fullmatch(
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
time_str,
re.ASCII
)
if match is None:
raise ValueError(f"Invalid time: {time_str}")

h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))

if h > 167:
raise ValueError(
f"Hour must be in [0, 167]: {time_str}"
)

if match.group("sign") == "-":
h, m, s = -h, -m, -s

return h, m, s


def _parse_tz_delta(tz_delta):
match = re.match(
r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
match = re.fullmatch(
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
tz_delta,
re.ASCII
)
# Anything passed to this function should already have hit an equivalent
# regular expression to find the section to parse.
assert match is not None, tz_delta

h, m, s = (
int(v) if v is not None else 0
for v in map(match.group, ("h", "m", "s"))
)
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))

total = h * 3600 + m * 60 + s

if not -86400 < total < 86400:
if h > 24:
raise ValueError(
f"Offset must be strictly between -24h and +24h: {tz_delta}"
f"Offset hours must be in [0, 24]: {tz_delta}"
)

# Yes, +5 maps to an offset of -5h
if match.group("sign") != "-":
total *= -1
total = -total

return total
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`zipinfo` now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
Loading

0 comments on commit ab08ff7

Please sign in to comment.