From e2e0a4940ce4c56a1ef8818f2f06433a6b7ce237 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 9 Apr 2023 22:18:02 +0100 Subject: [PATCH] GH-65238: Fix stripping of trailing slash in pathlib This brings pathlib in line with *IEEE Std 1003.1-2017*, where trailing slashes are meaningful to path resolution and should not be discarded. See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 --- Lib/importlib/metadata/__init__.py | 5 +- Lib/pathlib.py | 13 ++-- Lib/test/test_pathlib.py | 66 +++++++++++++++---- ...3-04-17-21-25-08.gh-issue-65238.gNfhyT.rst | 1 + 4 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-17-21-25-08.gh-issue-65238.gNfhyT.rst diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 40ab1a1aaac328..7e79b28754fe3b 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -748,7 +748,10 @@ def read_text(self, filename): NotADirectoryError, PermissionError, ): - return self._path.joinpath(filename).read_text(encoding='utf-8') + path = self._path + if filename: + path /= filename + return path.read_text(encoding='utf-8') read_text.__doc__ = Distribution.read_text.__doc__ diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 4ae1fae6f4b358..867d73d18242ee 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -326,6 +326,9 @@ def _parse_path(cls, path): # pathlib assumes that UNC paths always have a root. root = sep parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] + if parsed and not rel.endswith(parsed[-1]): + # Preserve trailing slash + parsed.append('') return drv, root, parsed def _load_parts(self): @@ -578,6 +581,9 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): remove=(3, 14)) path_cls = type(self) other = path_cls(other, *_deprecated) + if not other.name: + # Ignore trailing slash. + other = other.parent for step, path in enumerate([other] + list(other.parents)): if self.is_relative_to(path): break @@ -598,6 +604,9 @@ def is_relative_to(self, other, /, *_deprecated): warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", msg, remove=(3, 14)) other = type(self)(other, *_deprecated) + if not other.name: + # Ignore trailing slash. + other = other.parent return other == self or other in self.parents @property @@ -825,8 +834,6 @@ def glob(self, pattern): drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") - if pattern[-1] in (self._flavour.sep, self._flavour.altsep): - pattern_parts.append('') selector = _make_selector(tuple(pattern_parts), self._flavour) for p in selector.select_from(self): yield p @@ -840,8 +847,6 @@ def rglob(self, pattern): drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") - if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): - pattern_parts.append('') selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) for p in selector.select_from(self): yield p diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 3c6da94d094610..e1a6bfc9d5f38b 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -42,11 +42,14 @@ class _BasePurePathTest(object): # supposed to produce equal paths. equivalences = { 'a/b': [ - ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), - ('a/b/',), ('a//b',), ('a//b//',), + ('a', 'b'), ('a/', 'b'), ('a//b',), # Empty components get removed. - ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), + ('', 'a', 'b'), ('a', '', 'b'), ], + 'a/b/': [ + ('a', 'b/'), ('a/', 'b/'), ('a/b/',), + ('a//b//',), ('a', 'b', ''), + ], '/b/c/d': [ ('a', '/b/c', 'd'), ('/a', '/b/c', 'd'), # Empty components get removed. @@ -154,11 +157,11 @@ def test_drive_root_parts_common(self): # Unanchored parts. check((), '', '', ()) check(('a',), '', '', ('a',)) - check(('a/',), '', '', ('a',)) + check(('a/',), '', '', ('a', '')) check(('a', 'b'), '', '', ('a', 'b')) # Expansion. check(('a/b',), '', '', ('a', 'b')) - check(('a/b/',), '', '', ('a', 'b')) + check(('a/b/',), '', '', ('a', 'b', '')) check(('a', 'b/c', 'd'), '', '', ('a', 'b', 'c', 'd')) # Collapsing and stripping excess slashes. check(('a', 'b//c', 'd'), '', '', ('a', 'b', 'c', 'd')) @@ -167,7 +170,7 @@ def test_drive_root_parts_common(self): check(('.',), '', '', ()) check(('.', '.', 'b'), '', '', ('b',)) check(('a', '.', 'b'), '', '', ('a', 'b')) - check(('a', '.', '.'), '', '', ('a',)) + check(('a', '.', '.'), '', '', ('a', '')) # The first part is anchored. check(('/a/b',), '', sep, (sep, 'a', 'b')) check(('/a', 'b'), '', sep, (sep, 'a', 'b')) @@ -188,6 +191,24 @@ def test_join_common(self): self.assertEqual(pp, P('a/b/c')) pp = p.joinpath('/c') self.assertEqual(pp, P('/c')) + pp = p.joinpath('.') + self.assertEqual(pp, P('a/b/')) + pp = p.joinpath('') + self.assertEqual(pp, P('a/b/')) + p = P('a/b/') + pp = p.joinpath('c') + self.assertEqual(pp, P('a/b/c')) + self.assertIs(type(pp), type(p)) + pp = p.joinpath('c', 'd') + self.assertEqual(pp, P('a/b/c/d')) + pp = p.joinpath(P('c')) + self.assertEqual(pp, P('a/b/c')) + pp = p.joinpath('/c') + self.assertEqual(pp, P('/c')) + pp = p.joinpath('.') + self.assertEqual(pp, P('a/b/')) + pp = p.joinpath('') + self.assertEqual(pp, P('a/b/')) def test_div_common(self): # Basically the same as joinpath(). @@ -389,6 +410,12 @@ def test_parent_common(self): self.assertEqual(p.parent.parent, P('/a')) self.assertEqual(p.parent.parent.parent, P('/')) self.assertEqual(p.parent.parent.parent.parent, P('/')) + # Trailing slash + p = P('/a/b/') + self.assertEqual(p.parent, P('/a/b')) + self.assertEqual(p.parent.parent, P('/a')) + self.assertEqual(p.parent.parent.parent, P('/')) + self.assertEqual(p.parent.parent.parent.parent, P('/')) def test_parents_common(self): # Relative @@ -436,6 +463,9 @@ def test_parents_common(self): par[-4] with self.assertRaises(IndexError): par[3] + # Trailing slash + self.assertEqual(P('a/b/').parents[:], (P('a/b'), P('a'), P())) + self.assertEqual(P('/a/b/').parents[:], (P('/a/b'), P('/a'), P('/'))) def test_drive_common(self): P = self.cls @@ -466,7 +496,7 @@ def test_name_common(self): self.assertEqual(P('/').name, '') self.assertEqual(P('a/b').name, 'b') self.assertEqual(P('/a/b').name, 'b') - self.assertEqual(P('/a/b/.').name, 'b') + self.assertEqual(P('/a/b/.').name, '') self.assertEqual(P('a/b.py').name, 'b.py') self.assertEqual(P('/a/b.py').name, 'b.py') @@ -534,6 +564,7 @@ def test_with_name_common(self): self.assertRaises(ValueError, P('').with_name, 'd.xml') self.assertRaises(ValueError, P('.').with_name, 'd.xml') self.assertRaises(ValueError, P('/').with_name, 'd.xml') + self.assertRaises(ValueError, P('a/').with_name, 'd.xml') self.assertRaises(ValueError, P('a/b').with_name, '') self.assertRaises(ValueError, P('a/b').with_name, '/c') self.assertRaises(ValueError, P('a/b').with_name, 'c/') @@ -551,6 +582,7 @@ def test_with_stem_common(self): self.assertRaises(ValueError, P('').with_stem, 'd') self.assertRaises(ValueError, P('.').with_stem, 'd') self.assertRaises(ValueError, P('/').with_stem, 'd') + self.assertRaises(ValueError, P('a/').with_stem, 'd') self.assertRaises(ValueError, P('a/b').with_stem, '') self.assertRaises(ValueError, P('a/b').with_stem, '/c') self.assertRaises(ValueError, P('a/b').with_stem, 'c/') @@ -569,6 +601,7 @@ def test_with_suffix_common(self): self.assertRaises(ValueError, P('').with_suffix, '.gz') self.assertRaises(ValueError, P('.').with_suffix, '.gz') self.assertRaises(ValueError, P('/').with_suffix, '.gz') + self.assertRaises(ValueError, P('a/').with_suffix, '.gz') # Invalid suffix. self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') self.assertRaises(ValueError, P('a/b').with_suffix, '/') @@ -789,7 +822,8 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): equivalences = _BasePurePathTest.equivalences.copy() equivalences.update({ './a:b': [ ('./a:b',) ], - 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ], + 'c:a': [ ('c:', 'a'), ('.', 'c:', 'a') ], + 'c:a/': [ ('c:', 'a/') ], 'c:/a': [ ('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'), ('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'), @@ -819,7 +853,7 @@ def test_drive_root_parts(self): # UNC paths. check(('a', '//b/c', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) # Collapsing and stripping excess slashes. - check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd')) + check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd', '')) # UNC paths. check(('a', '//b/c//', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) # Extended paths. @@ -970,11 +1004,15 @@ def test_parent(self): self.assertEqual(p.parent, P('//a/b/c')) self.assertEqual(p.parent.parent, P('//a/b')) self.assertEqual(p.parent.parent.parent, P('//a/b')) + # Trailing slash + self.assertEqual(P('z:a/b/').parent, P('z:a/b')) + self.assertEqual(P('z:/a/b/').parent, P('z:/a/b')) + self.assertEqual(P('//a/b/c/d/').parent, P('//a/b/c/d')) def test_parents(self): # Anchored P = self.cls - p = P('z:a/b/') + p = P('z:a/b') par = p.parents self.assertEqual(len(par), 2) self.assertEqual(par[0], P('z:a')) @@ -988,7 +1026,7 @@ def test_parents(self): self.assertEqual(list(par), [P('z:a'), P('z:')]) with self.assertRaises(IndexError): par[2] - p = P('z:/a/b/') + p = P('z:/a/b') par = p.parents self.assertEqual(len(par), 2) self.assertEqual(par[0], P('z:/a')) @@ -1016,6 +1054,10 @@ def test_parents(self): self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) with self.assertRaises(IndexError): par[2] + # Trailing slash + self.assertEqual(P('z:a/b/').parents[:], (P('z:a/b'), P('z:a'), P('z:'))) + self.assertEqual(P('z:/a/b/').parents[:], (P('z:/a/b'), P('z:/a'), P('z:/'))) + self.assertEqual(P('//a/b/c/d/').parents[:], (P('//a/b/c/d'), P('//a/b/c'), P('//a/b/'))) def test_drive(self): P = self.cls @@ -1790,7 +1832,7 @@ def _check(glob, expected): def test_rglob_common(self): def _check(glob, expected): - self.assertEqual(set(glob), { P(BASE, q) for q in expected }) + self.assertEqual(set(glob), { P(BASE, q) if q else P(BASE) for q in expected }) P = self.cls p = P(BASE) it = p.rglob("fileA") diff --git a/Misc/NEWS.d/next/Library/2023-04-17-21-25-08.gh-issue-65238.gNfhyT.rst b/Misc/NEWS.d/next/Library/2023-04-17-21-25-08.gh-issue-65238.gNfhyT.rst new file mode 100644 index 00000000000000..54e99457ab08e4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-17-21-25-08.gh-issue-65238.gNfhyT.rst @@ -0,0 +1 @@ +Fix issue where :mod:`pathlib` did not preserve trailing slashes.