From a2142a7210999854d48a9aade87bf477d5b7f2af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:30:34 +0200 Subject: [PATCH 1/8] fix linecache --- Lib/linecache.py | 16 ++++++++++++++++ Lib/test/test_linecache.py | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/Lib/linecache.py b/Lib/linecache.py index 3462f1c451ba29..a0cdadd4c28141 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -73,6 +73,15 @@ def checkcache(filename=None): except OSError: cache.pop(filename, None) continue + except ValueError: + # ValueError may happen on Windows platforms for long paths. + # In this case, we assume that we could not just read the file. + # + # See: https://github.com/python/cpython/issues/122170. + if os.name == 'nt': + cache.pop(filename, None) + continue + raise # this should not happen on other platforms if size != stat.st_size or mtime != stat.st_mtime: cache.pop(filename, None) @@ -137,8 +146,15 @@ def updatecache(filename, module_globals=None): break except OSError: pass + except ValueError: + if os.name != 'nt': + raise # this should not happen on other platforms else: return [] + except ValueError: + if os.name != 'nt': + raise # this should not happen on other platforms + return [] try: with tokenize.open(fullname) as fp: lines = fp.readlines() diff --git a/Lib/test/test_linecache.py b/Lib/test/test_linecache.py index 8ac521d72ef13e..0259fd97563992 100644 --- a/Lib/test/test_linecache.py +++ b/Lib/test/test_linecache.py @@ -280,6 +280,28 @@ def test_loader(self): self.assertEqual(linecache.getlines(filename, module_globals), ['source for x.y.z\n']) + def test_long_filename(self): + # For POSIX platforms, an OSError will be raised and will take + # the usual path handling. For Windows platforms, a ValueError + # is raised instead but linecache will handle it as if it were + # an OSError in this case. + # + # See: https://github.com/python/cpython/issues/122170 + + linecache.clearcache() + lines = linecache.updatecache('a' * 9999) + self.assertListEqual(lines, []) + self.assertNotIn('a' * 9999, linecache.cache) + + # hack into the cache (it shouldn't be allowed + # but we never know what people do...) + linecache.cache['smallname'] = (0, 1234, [], 'a' * 9999) + linecache.checkcache('smallname') + self.assertNotIn('smallname', linecache.cache) + + # just to be sure that we did not mess with cache + linecache.clearcache() + class LineCacheInvalidationTests(unittest.TestCase): def setUp(self): From ca9a493cdb2149c02af460a19cf1fb7bed6d497c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:30:26 +0200 Subject: [PATCH 2/8] blurb --- .../next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst diff --git a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst new file mode 100644 index 00000000000000..d15a316b138291 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst @@ -0,0 +1,2 @@ +Handle long filenames in :mod:`linecache` on Windows platforms. +Patch by Bénédikt Tran. From 0b57bb45372b7cf4a5fda3b989ea49f8f991d2e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:35:57 +0200 Subject: [PATCH 3/8] handle NUL bytes --- Lib/linecache.py | 27 +++++++++++---------------- Lib/test/test_linecache.py | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/Lib/linecache.py b/Lib/linecache.py index a0cdadd4c28141..8d8214b5368e5d 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -70,18 +70,13 @@ def checkcache(filename=None): return try: stat = os.stat(fullname) - except OSError: - cache.pop(filename, None) - continue - except ValueError: - # ValueError may happen on Windows platforms for long paths. - # In this case, we assume that we could not just read the file. + except (OSError, ValueError): + # ValueError may happen on Windows platforms for long paths or + # on any platform when the filename has embedded null bytes. # # See: https://github.com/python/cpython/issues/122170. - if os.name == 'nt': - cache.pop(filename, None) - continue - raise # this should not happen on other platforms + cache.pop(filename, None) + continue if size != stat.st_size or mtime != stat.st_mtime: cache.pop(filename, None) @@ -144,16 +139,16 @@ def updatecache(filename, module_globals=None): try: stat = os.stat(fullname) break - except OSError: + except (OSError, ValueError): pass - except ValueError: - if os.name != 'nt': - raise # this should not happen on other platforms else: return [] except ValueError: - if os.name != 'nt': - raise # this should not happen on other platforms + # ValueError may happen on Windows platforms for long paths or + # on any platform when the filename has embedded null bytes. + # + # In this case, we will not even try to find the path using lazy + # loading or alternative techniques. return [] try: with tokenize.open(fullname) as fp: diff --git a/Lib/test/test_linecache.py b/Lib/test/test_linecache.py index 0259fd97563992..2f8e9a9e384628 100644 --- a/Lib/test/test_linecache.py +++ b/Lib/test/test_linecache.py @@ -280,6 +280,28 @@ def test_loader(self): self.assertEqual(linecache.getlines(filename, module_globals), ['source for x.y.z\n']) + def test_embedded_null_bytes(self): + NUL = '\x00' + linecache.clearcache() + lines = linecache.updatecache(NUL) + self.assertListEqual(lines, []) + self.assertNotIn(NUL, linecache.cache) + + # hack into the cache (it shouldn't be allowed + # but we never know what people do...) + linecache.clearcache() + linecache.cache[NUL] = (0, 1234, [], 'FULLNAME') + linecache.checkcache(NUL) + self.assertNotIn(NUL, linecache.cache) + + linecache.clearcache() + linecache.cache[NUL] = (0, 1234, [], NUL) + linecache.checkcache(NUL) + self.assertNotIn(NUL, linecache.cache) + + # just to be sure that we did not mess with cache + linecache.clearcache() + def test_long_filename(self): # For POSIX platforms, an OSError will be raised and will take # the usual path handling. For Windows platforms, a ValueError @@ -295,6 +317,7 @@ def test_long_filename(self): # hack into the cache (it shouldn't be allowed # but we never know what people do...) + linecache.clearcache() linecache.cache['smallname'] = (0, 1234, [], 'a' * 9999) linecache.checkcache('smallname') self.assertNotIn('smallname', linecache.cache) From 7baccdd8f057ceff4f365691c1c7448dadb3a7a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:36:43 +0200 Subject: [PATCH 4/8] update NEWS --- .../next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst index d15a316b138291..ac83a886351631 100644 --- a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst +++ b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst @@ -1,2 +1,2 @@ -Handle long filenames in :mod:`linecache` on Windows platforms. +Handle embedded NUL bytes in paths passed to :mod:`linecache`. Patch by Bénédikt Tran. From ea7e0f4ecf2b3dbb303bce185d98972f610eb855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Jul 2024 16:28:05 +0200 Subject: [PATCH 5/8] Update NEWS --- .../next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst index ac83a886351631..2e615140b657fe 100644 --- a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst +++ b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst @@ -1,2 +1,2 @@ -Handle embedded NUL bytes in paths passed to :mod:`linecache`. +Handle :exc:`ValueError`s raised by :func:`os.stat` in :mod:`linecache`. Patch by Bénédikt Tran. From 722946f554e81eb493edf61b39d9c07eda39cce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:02:25 +0200 Subject: [PATCH 6/8] fixup! sphinx! --- .../next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst index 2e615140b657fe..7eeb9f67ad4b3a 100644 --- a/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst +++ b/Misc/NEWS.d/next/Library/2024-07-23-15-30-23.gh-issue-122170.Z9gi3Y.rst @@ -1,2 +1,2 @@ -Handle :exc:`ValueError`s raised by :func:`os.stat` in :mod:`linecache`. +Handle :exc:`ValueError`\s raised by :func:`os.stat` in :mod:`linecache`. Patch by Bénédikt Tran. From 27eaa98db0b6d136fe6f9688ee695299cc391f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Jul 2024 18:38:15 +0200 Subject: [PATCH 7/8] address review --- Lib/linecache.py | 11 +----- Lib/test/test_linecache.py | 72 +++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 46 deletions(-) diff --git a/Lib/linecache.py b/Lib/linecache.py index 8d8214b5368e5d..4b38a0464d8747 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -71,10 +71,6 @@ def checkcache(filename=None): try: stat = os.stat(fullname) except (OSError, ValueError): - # ValueError may happen on Windows platforms for long paths or - # on any platform when the filename has embedded null bytes. - # - # See: https://github.com/python/cpython/issues/122170. cache.pop(filename, None) continue if size != stat.st_size or mtime != stat.st_mtime: @@ -143,12 +139,7 @@ def updatecache(filename, module_globals=None): pass else: return [] - except ValueError: - # ValueError may happen on Windows platforms for long paths or - # on any platform when the filename has embedded null bytes. - # - # In this case, we will not even try to find the path using lazy - # loading or alternative techniques. + except ValueError: # may be raised by os.stat() return [] try: with tokenize.open(fullname) as fp: diff --git a/Lib/test/test_linecache.py b/Lib/test/test_linecache.py index 2f8e9a9e384628..1ef89b7cb1cecf 100644 --- a/Lib/test/test_linecache.py +++ b/Lib/test/test_linecache.py @@ -281,46 +281,46 @@ def test_loader(self): ['source for x.y.z\n']) def test_embedded_null_bytes(self): - NUL = '\x00' - linecache.clearcache() - lines = linecache.updatecache(NUL) - self.assertListEqual(lines, []) - self.assertNotIn(NUL, linecache.cache) - - # hack into the cache (it shouldn't be allowed - # but we never know what people do...) - linecache.clearcache() - linecache.cache[NUL] = (0, 1234, [], 'FULLNAME') - linecache.checkcache(NUL) - self.assertNotIn(NUL, linecache.cache) - - linecache.clearcache() - linecache.cache[NUL] = (0, 1234, [], NUL) - linecache.checkcache(NUL) - self.assertNotIn(NUL, linecache.cache) + for name in ['\x00', __file__ + '\x00']: + with self.subTest('updatecache', badname=name): + linecache.clearcache() + lines = linecache.updatecache(name) + self.assertListEqual(lines, []) + self.assertNotIn(name, linecache.cache) + + # hack into the cache (it shouldn't be allowed + # but we never know what people do...) + for key, fullname in [(name, 'ok'), ('key', name), (name, name)]: + with self.subTest('checkcache', key=key, fullname=fullname): + linecache.clearcache() + linecache.cache[key] = (0, 1234, [], fullname) + linecache.checkcache(key) + self.assertNotIn(key, linecache.cache) # just to be sure that we did not mess with cache linecache.clearcache() - def test_long_filename(self): - # For POSIX platforms, an OSError will be raised and will take - # the usual path handling. For Windows platforms, a ValueError - # is raised instead but linecache will handle it as if it were - # an OSError in this case. - # - # See: https://github.com/python/cpython/issues/122170 - - linecache.clearcache() - lines = linecache.updatecache('a' * 9999) - self.assertListEqual(lines, []) - self.assertNotIn('a' * 9999, linecache.cache) - - # hack into the cache (it shouldn't be allowed - # but we never know what people do...) - linecache.clearcache() - linecache.cache['smallname'] = (0, 1234, [], 'a' * 9999) - linecache.checkcache('smallname') - self.assertNotIn('smallname', linecache.cache) + def test_invalid_names(self): + for name, desc in [ + # A filename with surrogate codes. A UnicodeEncodeError is raised + # by os.stat() upon querying, which is a subclass of ValueError. + ("\uD834\uDD1E.py", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + # For POSIX platforms, an OSError will be raised but for Windows + # platforms, a ValueError is raised due to the path_t converter. + # See: https://github.com/python/cpython/issues/122170 + ('a' * 1_000_000, 'very long filename'), + ]: + with self.subTest(f'updatecache: {desc}'): + linecache.clearcache() + lines = linecache.updatecache(name) + self.assertListEqual(lines, []) + self.assertNotIn(name, linecache.cache) + + with self.subTest(f'checkcache: {desc}'): + linecache.clearcache() + linecache.cache['key'] = (0, 1234, [], name) + linecache.checkcache('key') + self.assertNotIn('key', linecache.cache) # just to be sure that we did not mess with cache linecache.clearcache() From 106fcdb2e021f7188e3f4f626f709cfd00623682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 27 Jul 2024 09:23:48 +0200 Subject: [PATCH 8/8] merge tests --- Lib/test/test_linecache.py | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_linecache.py b/Lib/test/test_linecache.py index 1ef89b7cb1cecf..6f5955791407ea 100644 --- a/Lib/test/test_linecache.py +++ b/Lib/test/test_linecache.py @@ -280,28 +280,10 @@ def test_loader(self): self.assertEqual(linecache.getlines(filename, module_globals), ['source for x.y.z\n']) - def test_embedded_null_bytes(self): - for name in ['\x00', __file__ + '\x00']: - with self.subTest('updatecache', badname=name): - linecache.clearcache() - lines = linecache.updatecache(name) - self.assertListEqual(lines, []) - self.assertNotIn(name, linecache.cache) - - # hack into the cache (it shouldn't be allowed - # but we never know what people do...) - for key, fullname in [(name, 'ok'), ('key', name), (name, name)]: - with self.subTest('checkcache', key=key, fullname=fullname): - linecache.clearcache() - linecache.cache[key] = (0, 1234, [], fullname) - linecache.checkcache(key) - self.assertNotIn(key, linecache.cache) - - # just to be sure that we did not mess with cache - linecache.clearcache() - def test_invalid_names(self): for name, desc in [ + ('\x00', 'NUL bytes filename'), + (__file__ + '\x00', 'filename with embedded NUL bytes'), # A filename with surrogate codes. A UnicodeEncodeError is raised # by os.stat() upon querying, which is a subclass of ValueError. ("\uD834\uDD1E.py", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), @@ -316,11 +298,15 @@ def test_invalid_names(self): self.assertListEqual(lines, []) self.assertNotIn(name, linecache.cache) - with self.subTest(f'checkcache: {desc}'): - linecache.clearcache() - linecache.cache['key'] = (0, 1234, [], name) - linecache.checkcache('key') - self.assertNotIn('key', linecache.cache) + # hack into the cache (it shouldn't be allowed + # but we never know what people do...) + for key, fullname in [(name, 'ok'), ('key', name), (name, name)]: + with self.subTest(f'checkcache: {desc}', + key=key, fullname=fullname): + linecache.clearcache() + linecache.cache[key] = (0, 1234, [], fullname) + linecache.checkcache(key) + self.assertNotIn(key, linecache.cache) # just to be sure that we did not mess with cache linecache.clearcache()