From 3655d5ef6ae3923929a9ba6d3185962ea55b1f35 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 May 2021 20:17:55 +0100 Subject: [PATCH 01/14] Remove `flavour.is_reserved()` --- Lib/pathlib.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 94e079fffef600..0bf0c48647efd7 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -123,12 +123,6 @@ class _WindowsFlavour(_Flavour): drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') ext_namespace_prefix = '\\\\?\\' - reserved_names = ( - {'CON', 'PRN', 'AUX', 'NUL'} | - {'COM%d' % i for i in range(1, 10)} | - {'LPT%d' % i for i in range(1, 10)} - ) - # Interesting findings about extended paths: # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported # but '\\?\c:/a' is not @@ -193,18 +187,6 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): s = '\\' + s[3:] return prefix, s - def is_reserved(self, parts): - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). - # We err on the side of caution and return True for paths which are - # not considered reserved by Windows. - if not parts: - return False - if parts[0].startswith('\\\\'): - # UNC paths are never reserved - return False - return parts[-1].partition('.')[0].upper() in self.reserved_names - def make_uri(self, path): # Under Windows, file URIs use the UTF-8 encoding. drive = path.drive @@ -250,9 +232,6 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern)).fullmatch - def is_reserved(self, parts): - return False - def make_uri(self, path): # We represent the path using the local filesystem encoding, # for portability to other applications. @@ -875,7 +854,7 @@ def is_absolute(self): def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - return self._flavour.is_reserved(self._parts) + raise NotImplementedError def match(self, path_pattern): """ @@ -916,6 +895,9 @@ class PurePosixPath(PurePath): _flavour = _posix_flavour __slots__ = () + def is_reserved(self): + return False + class PureWindowsPath(PurePath): """PurePath subclass for Windows systems. @@ -924,8 +906,25 @@ class PureWindowsPath(PurePath): However, you can also instantiate it directly on any system. """ _flavour = _windows_flavour + _reserved_names = ( + {'CON', 'PRN', 'AUX', 'NUL'} | + {'COM%d' % i for i in range(1, 10)} | + {'LPT%d' % i for i in range(1, 10)} + ) __slots__ = () + def is_reserved(self): + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). + # We err on the side of caution and return True for paths which are + # not considered reserved by Windows. + if not self._parts: + return False + if self._parts[0].startswith('\\\\'): + # UNC paths are never reserved + return False + return self._parts[-1].partition('.')[0].upper() in self._reserved_names + # Filesystem-accessing classes From 0faa6d0df4862d5b6024ea66101fa50e49f8c1fe Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 May 2021 20:24:12 +0100 Subject: [PATCH 02/14] Remove `flavour.make_uri()` --- Lib/pathlib.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 0bf0c48647efd7..8bb3629cbdd229 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -187,18 +187,6 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): s = '\\' + s[3:] return prefix, s - def make_uri(self, path): - # Under Windows, file URIs use the UTF-8 encoding. - drive = path.drive - if len(drive) == 2 and drive[1] == ':': - # It's a path on a local drive => 'file:///c:/a/b' - rest = path.as_posix()[2:].lstrip('/') - return 'file:///%s/%s' % ( - drive, urlquote_from_bytes(rest.encode('utf-8'))) - else: - # It's a path on a network drive => 'file://host/share/a/b' - return 'file:' + urlquote_from_bytes(path.as_posix().encode('utf-8')) - class _PosixFlavour(_Flavour): sep = '/' @@ -232,12 +220,6 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern)).fullmatch - def make_uri(self, path): - # We represent the path using the local filesystem encoding, - # for portability to other applications. - bpath = bytes(path) - return 'file://' + urlquote_from_bytes(bpath) - _windows_flavour = _WindowsFlavour() _posix_flavour = _PosixFlavour() @@ -614,9 +596,7 @@ def __repr__(self): def as_uri(self): """Return the path as a 'file' URI.""" - if not self.is_absolute(): - raise ValueError("relative path can't be expressed as a file URI") - return self._flavour.make_uri(self) + raise NotImplementedError @property def _cparts(self): @@ -898,6 +878,14 @@ class PurePosixPath(PurePath): def is_reserved(self): return False + def as_uri(self): + # We represent the path using the local filesystem encoding, + # for portability to other applications. + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + bpath = bytes(self) + return 'file://' + urlquote_from_bytes(bpath) + class PureWindowsPath(PurePath): """PurePath subclass for Windows systems. @@ -925,6 +913,19 @@ def is_reserved(self): return False return self._parts[-1].partition('.')[0].upper() in self._reserved_names + def as_uri(self): + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + # Under Windows, file URIs use the UTF-8 encoding. + drive = self.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + rest = self.as_posix()[2:].lstrip('/') + return 'file:///%s/%s' % ( + drive, urlquote_from_bytes(rest.encode('utf-8'))) + else: + # It's a path on a network drive => 'file://host/share/a/b' + return 'file:' + urlquote_from_bytes(self.as_posix().encode('utf-8')) # Filesystem-accessing classes From 3094398b4aad5e0e0424fb23c8dce96d35958956 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 May 2021 20:50:16 +0100 Subject: [PATCH 03/14] Remove `flavour.join()`, `flavour.sep` and `flavour.altsep` --- Lib/pathlib.py | 30 +++++++++++++----------------- Lib/test/test_pathlib.py | 12 ++++++------ 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8bb3629cbdd229..7f71a19f237f34 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -50,13 +50,10 @@ class _Flavour(object): """A flavour implements a particular (platform-specific) set of path semantics.""" - def __init__(self): - self.join = self.sep.join - def parse_parts(self, parts): parsed = [] - sep = self.sep - altsep = self.altsep + sep = self.pathmod.sep + altsep = self.pathmod.altsep drv = root = '' it = reversed(parts) for part in it: @@ -113,8 +110,6 @@ class _WindowsFlavour(_Flavour): # Reference for Windows paths can be found at # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - sep = '\\' - altsep = '/' has_drv = True pathmod = ntpath @@ -129,7 +124,8 @@ class _WindowsFlavour(_Flavour): # - extended paths are always absolute; "relative" extended paths will # fail. - def splitroot(self, part, sep=sep): + def splitroot(self, part): + sep = self.pathmod.sep first = part[0:1] second = part[1:2] if (second == sep and first == sep): @@ -189,14 +185,13 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): class _PosixFlavour(_Flavour): - sep = '/' - altsep = '' has_drv = False pathmod = posixpath is_supported = (os.name != 'nt') - def splitroot(self, part, sep=sep): + def splitroot(self, part): + sep = self.pathmod.sep if part and part[0] == sep: stripped_part = part.lstrip(sep) # According to POSIX path resolution: @@ -557,9 +552,9 @@ def _from_parsed_parts(cls, drv, root, parts): @classmethod def _format_parsed_parts(cls, drv, root, parts): if drv or root: - return drv + root + cls._flavour.join(parts[1:]) + return drv + root + cls._flavour.pathmod.sep.join(parts[1:]) else: - return cls._flavour.join(parts) + return cls._flavour.pathmod.sep.join(parts) def _make_child(self, args): drv, root, parts = self._parse_args(args) @@ -584,7 +579,7 @@ def as_posix(self): """Return the string representation of the path with forward (/) slashes.""" f = self._flavour - return str(self).replace(f.sep, '/') + return str(self).replace(f.pathmod.sep, '/') def __bytes__(self): """Return the bytes representation of the path. This is only @@ -704,7 +699,8 @@ def with_name(self, name): if not self.name: raise ValueError("%r has an empty name" % (self,)) drv, root, parts = self._flavour.parse_parts((name,)) - if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep] + m = self._flavour.pathmod + if (not name or name[-1] in [m.sep, m.altsep] or drv or root or len(parts) != 1): raise ValueError("Invalid name %r" % (name)) return self._from_parsed_parts(self._drv, self._root, @@ -719,8 +715,8 @@ def with_suffix(self, suffix): has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. """ - f = self._flavour - if f.sep in suffix or f.altsep and f.altsep in suffix: + m = self._flavour.pathmod + if m.sep in suffix or m.altsep and m.altsep in suffix: raise ValueError("Invalid suffix %r" % (suffix,)) if suffix and not suffix.startswith('.') or suffix == '.': raise ValueError("Invalid suffix %r" % (suffix)) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 6ed08f7e70ce3d..9ef2e9e6fe07a4 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -25,8 +25,8 @@ class _BaseFlavourTest(object): def _check_parse_parts(self, arg, expected): f = self.flavour.parse_parts - sep = self.flavour.sep - altsep = self.flavour.altsep + sep = self.flavour.pathmod.sep + altsep = self.flavour.pathmod.altsep actual = f([x.replace('/', sep) for x in arg]) self.assertEqual(actual, expected) if altsep: @@ -35,7 +35,7 @@ def _check_parse_parts(self, arg, expected): def test_parse_parts_common(self): check = self._check_parse_parts - sep = self.flavour.sep + sep = self.flavour.pathmod.sep # Unanchored parts. check([], ('', '', [])) check(['a'], ('', '', ['a'])) @@ -186,8 +186,8 @@ class _BasePurePathTest(object): def setUp(self): p = self.cls('a') self.flavour = p._flavour - self.sep = self.flavour.sep - self.altsep = self.flavour.altsep + self.sep = self.flavour.pathmod.sep + self.altsep = self.flavour.pathmod.altsep def test_constructor_common(self): P = self.cls @@ -615,7 +615,7 @@ def test_with_suffix_common(self): self.assertRaises(ValueError, P('a/b').with_suffix, './.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') self.assertRaises(ValueError, P('a/b').with_suffix, - (self.flavour.sep, 'd')) + (self.sep, 'd')) def test_relative_to_common(self): P = self.cls From 00f6126807c29c197a7726d0ec9d8819c55af2b1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 May 2021 21:26:26 +0100 Subject: [PATCH 04/14] Remove `flavour.join_parsed_parts()` --- Lib/pathlib.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 7f71a19f237f34..01fc4f2c0d9119 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -88,23 +88,6 @@ def parse_parts(self, parts): parsed.reverse() return drv, root, parsed - def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): - """ - Join the two paths represented by the respective - (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. - """ - if root2: - if not drv2 and drv: - return drv, root2, [drv + root2] + parts2[1:] - elif drv2: - if drv2 == drv or self.casefold(drv2) == self.casefold(drv): - # Same drive => second path is relative to the first - return drv, root, parts + parts2[1:] - else: - # Second path is non-anchored (common case) - return drv, root, parts + parts2 - return drv2, root2, parts2 - class _WindowsFlavour(_Flavour): # Reference for Windows paths can be found at @@ -558,10 +541,27 @@ def _format_parsed_parts(cls, drv, root, parts): def _make_child(self, args): drv, root, parts = self._parse_args(args) - drv, root, parts = self._flavour.join_parsed_parts( + drv, root, parts = self._join_parsed_parts( self._drv, self._root, self._parts, drv, root, parts) return self._from_parsed_parts(drv, root, parts) + def _join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): + """ + Join the two paths represented by the respective + (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. + """ + if root2: + if not drv2 and drv: + return drv, root2, [drv + root2] + parts2[1:] + elif drv2: + if drv2 == drv or self._flavour.casefold(drv2) == self._flavour.casefold(drv): + # Same drive => second path is relative to the first + return drv, root, parts + parts2[1:] + else: + # Second path is non-anchored (common case) + return drv, root, parts + parts2 + return drv2, root2, parts2 + def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" From 6ea7eaed57cf38550e63b1c922176ccec5be4b91 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 09:45:35 +0100 Subject: [PATCH 05/14] Remove `flavour.parse_parts()`, `flavour.splitroot()` and `flavour.pathmod` --- Lib/pathlib.py | 254 +++++++++++++++++++------------------ Lib/test/test_pathlib.py | 268 +++++++++++++++++++-------------------- 2 files changed, 255 insertions(+), 267 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 01fc4f2c0d9119..3ba3ac6fda321c 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -50,103 +50,15 @@ class _Flavour(object): """A flavour implements a particular (platform-specific) set of path semantics.""" - def parse_parts(self, parts): - parsed = [] - sep = self.pathmod.sep - altsep = self.pathmod.altsep - drv = root = '' - it = reversed(parts) - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv, root, rel = self.splitroot(part) - if sep in rel: - for x in reversed(rel.split(sep)): - if x and x != '.': - parsed.append(sys.intern(x)) - else: - if rel and rel != '.': - parsed.append(sys.intern(rel)) - if drv or root: - if not drv: - # If no drive is present, try to find one in the previous - # parts. This makes the result of parsing e.g. - # ("C:", "/", "a") reasonably intuitive. - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv = self.splitroot(part)[0] - if drv: - break - break - if drv or root: - parsed.append(drv + root) - parsed.reverse() - return drv, root, parsed - class _WindowsFlavour(_Flavour): # Reference for Windows paths can be found at # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx has_drv = True - pathmod = ntpath is_supported = (os.name == 'nt') - drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') - ext_namespace_prefix = '\\\\?\\' - - # Interesting findings about extended paths: - # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported - # but '\\?\c:/a' is not - # - extended paths are always absolute; "relative" extended paths will - # fail. - - def splitroot(self, part): - sep = self.pathmod.sep - first = part[0:1] - second = part[1:2] - if (second == sep and first == sep): - # XXX extended paths should also disable the collapsing of "." - # components (according to MSDN docs). - prefix, part = self._split_extended_path(part) - first = part[0:1] - second = part[1:2] - else: - prefix = '' - third = part[2:3] - if (second == sep and first == sep and third != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvvv root - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^ - index = part.find(sep, 2) - if index != -1: - index2 = part.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 != index + 1: - if index2 == -1: - index2 = len(part) - if prefix: - return prefix + part[1:index2], sep, part[index2+1:] - else: - return part[:index2], sep, part[index2+1:] - drv = root = '' - if second == ':' and first in self.drive_letters: - drv = part[:2] - part = part[2:] - first = third - if first == sep: - root = first - part = part.lstrip(sep) - return prefix + drv, root, part - def casefold(self, s): return s.lower() @@ -156,39 +68,12 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): - prefix = '' - if s.startswith(ext_prefix): - prefix = s[:4] - s = s[4:] - if s.startswith('UNC\\'): - prefix += s[:3] - s = '\\' + s[3:] - return prefix, s - class _PosixFlavour(_Flavour): has_drv = False - pathmod = posixpath is_supported = (os.name != 'nt') - def splitroot(self, part): - sep = self.pathmod.sep - if part and part[0] == sep: - stripped_part = part.lstrip(sep) - # According to POSIX path resolution: - # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 - # "A pathname that begins with two successive slashes may be - # interpreted in an implementation-defined manner, although more - # than two leading slashes shall be treated as a single slash". - if len(part) - len(stripped_part) == 2: - return '', sep * 2, stripped_part - else: - return '', sep, stripped_part - else: - return '', '', part - def casefold(self, s): return s @@ -511,7 +396,46 @@ def _parse_args(cls, args): "argument should be a str object or an os.PathLike " "object returning str, not %r" % type(a)) - return cls._flavour.parse_parts(parts) + return cls._parse_parts(parts) + + @classmethod + def _parse_parts(cls, parts): + parsed = [] + sep = cls._pathmod.sep + altsep = cls._pathmod.altsep + drv = root = '' + it = reversed(parts) + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv, root, rel = cls._splitroot(part) + if sep in rel: + for x in reversed(rel.split(sep)): + if x and x != '.': + parsed.append(sys.intern(x)) + else: + if rel and rel != '.': + parsed.append(sys.intern(rel)) + if drv or root: + if not drv: + # If no drive is present, try to find one in the previous + # parts. This makes the result of parsing e.g. + # ("C:", "/", "a") reasonably intuitive. + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv = cls._splitroot(part)[0] + if drv: + break + break + if drv or root: + parsed.append(drv + root) + parsed.reverse() + return drv, root, parsed @classmethod def _from_parts(cls, args): @@ -535,9 +459,9 @@ def _from_parsed_parts(cls, drv, root, parts): @classmethod def _format_parsed_parts(cls, drv, root, parts): if drv or root: - return drv + root + cls._flavour.pathmod.sep.join(parts[1:]) + return drv + root + cls._pathmod.sep.join(parts[1:]) else: - return cls._flavour.pathmod.sep.join(parts) + return cls._pathmod.sep.join(parts) def _make_child(self, args): drv, root, parts = self._parse_args(args) @@ -578,8 +502,7 @@ def __fspath__(self): def as_posix(self): """Return the string representation of the path with forward (/) slashes.""" - f = self._flavour - return str(self).replace(f.pathmod.sep, '/') + return str(self).replace(self._pathmod.sep, '/') def __bytes__(self): """Return the bytes representation of the path. This is only @@ -698,8 +621,8 @@ def with_name(self, name): """Return a new path with the file name changed.""" if not self.name: raise ValueError("%r has an empty name" % (self,)) - drv, root, parts = self._flavour.parse_parts((name,)) - m = self._flavour.pathmod + drv, root, parts = self._parse_parts((name,)) + m = self._pathmod if (not name or name[-1] in [m.sep, m.altsep] or drv or root or len(parts) != 1): raise ValueError("Invalid name %r" % (name)) @@ -715,7 +638,7 @@ def with_suffix(self, suffix): has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. """ - m = self._flavour.pathmod + m = self._pathmod if m.sep in suffix or m.altsep and m.altsep in suffix: raise ValueError("Invalid suffix %r" % (suffix,)) if suffix and not suffix.startswith('.') or suffix == '.': @@ -838,7 +761,7 @@ def match(self, path_pattern): """ cf = self._flavour.casefold path_pattern = cf(path_pattern) - drv, root, pat_parts = self._flavour.parse_parts((path_pattern,)) + drv, root, pat_parts = self._parse_parts((path_pattern,)) if not pat_parts: raise ValueError("empty pattern") if drv and drv != cf(self._drv): @@ -869,8 +792,26 @@ class PurePosixPath(PurePath): However, you can also instantiate it directly on any system. """ _flavour = _posix_flavour + _pathmod = posixpath __slots__ = () + @classmethod + def _splitroot(cls, part): + sep = cls._pathmod.sep + if part and part[0] == sep: + stripped_part = part.lstrip(sep) + # According to POSIX path resolution: + # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 + # "A pathname that begins with two successive slashes may be + # interpreted in an implementation-defined manner, although more + # than two leading slashes shall be treated as a single slash". + if len(part) - len(stripped_part) == 2: + return '', sep * 2, stripped_part + else: + return '', sep, stripped_part + else: + return '', '', part + def is_reserved(self): return False @@ -890,6 +831,9 @@ class PureWindowsPath(PurePath): However, you can also instantiate it directly on any system. """ _flavour = _windows_flavour + _pathmod = ntpath + _drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') + _ext_namespace_prefix = '\\\\?\\' _reserved_names = ( {'CON', 'PRN', 'AUX', 'NUL'} | {'COM%d' % i for i in range(1, 10)} | @@ -897,6 +841,64 @@ class PureWindowsPath(PurePath): ) __slots__ = () + # Interesting findings about extended paths: + # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported + # but '\\?\c:/a' is not + # - extended paths are always absolute; "relative" extended paths will + # fail. + + @classmethod + def _splitroot(cls, part): + sep = cls._pathmod.sep + first = part[0:1] + second = part[1:2] + if (second == sep and first == sep): + # XXX extended paths should also disable the collapsing of "." + # components (according to MSDN docs). + prefix, part = cls._split_extended_path(part) + first = part[0:1] + second = part[1:2] + else: + prefix = '' + third = part[2:3] + if (second == sep and first == sep and third != sep): + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvvv root + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^ + index = part.find(sep, 2) + if index != -1: + index2 = part.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 != index + 1: + if index2 == -1: + index2 = len(part) + if prefix: + return prefix + part[1:index2], sep, part[index2+1:] + else: + return part[:index2], sep, part[index2+1:] + drv = root = '' + if second == ':' and first in cls._drive_letters: + drv = part[:2] + part = part[2:] + first = third + if first == sep: + root = first + part = part.lstrip(sep) + return prefix + drv, root, part + + @classmethod + def _split_extended_path(cls, s, ext_prefix=_ext_namespace_prefix): + prefix = '' + if s.startswith(ext_prefix): + prefix = s[:4] + s = s[4:] + if s.startswith('UNC\\'): + prefix += s[:3] + s = '\\' + s[3:] + return prefix, s + def is_reserved(self): # NOTE: the rules for reserved names seem somewhat complicated # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). @@ -1011,7 +1013,7 @@ def glob(self, pattern): sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") selector = _make_selector(tuple(pattern_parts), self._flavour) @@ -1024,7 +1026,7 @@ def rglob(self, pattern): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 9ef2e9e6fe07a4..80cc921bfbeb2f 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -20,22 +20,45 @@ except ImportError: grp = pwd = None +# +# Tests for the pure classes. +# + +class _BasePurePathTest(object): + + # Keys are canonical paths, values are list of tuples of arguments + # supposed to produce equal paths. + equivalences = { + 'a/b': [ + ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), + ('a/b/',), ('a//b',), ('a//b//',), + # Empty components get removed. + ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), + ], + '/b/c/d': [ + ('a', '/b/c', 'd'), ('a', '///b//c', 'd/'), + ('/a', '/b/c', 'd'), + # Empty components get removed. + ('/', 'b', '', 'c/d'), ('/', '', 'b/c/d'), ('', '/b/c/d'), + ], + } -class _BaseFlavourTest(object): + def setUp(self): + p = self.cls('a') + self.flavour = p._flavour + self.sep = p._pathmod.sep + self.altsep = p._pathmod.altsep def _check_parse_parts(self, arg, expected): - f = self.flavour.parse_parts - sep = self.flavour.pathmod.sep - altsep = self.flavour.pathmod.altsep - actual = f([x.replace('/', sep) for x in arg]) + f = self.cls()._parse_parts + actual = f([x.replace('/', self.sep) for x in arg]) self.assertEqual(actual, expected) - if altsep: - actual = f([x.replace('/', altsep) for x in arg]) + if self.altsep: + actual = f([x.replace('/', self.altsep) for x in arg]) self.assertEqual(actual, expected) def test_parse_parts_common(self): check = self._check_parse_parts - sep = self.flavour.pathmod.sep # Unanchored parts. check([], ('', '', [])) check(['a'], ('', '', ['a'])) @@ -54,140 +77,12 @@ def test_parse_parts_common(self): check(['a', '.', 'b'], ('', '', ['a', 'b'])) check(['a', '.', '.'], ('', '', ['a'])) # The first part is anchored. - check(['/a/b'], ('', sep, [sep, 'a', 'b'])) - check(['/a', 'b'], ('', sep, [sep, 'a', 'b'])) - check(['/a/', 'b'], ('', sep, [sep, 'a', 'b'])) + check(['/a/b'], ('', self.sep, [self.sep, 'a', 'b'])) + check(['/a', 'b'], ('', self.sep, [self.sep, 'a', 'b'])) + check(['/a/', 'b'], ('', self.sep, [self.sep, 'a', 'b'])) # Ignoring parts before an anchored part. - check(['a', '/b', 'c'], ('', sep, [sep, 'b', 'c'])) - check(['a', '/b', '/c'], ('', sep, [sep, 'c'])) - - -class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase): - flavour = pathlib._posix_flavour - - def test_parse_parts(self): - check = self._check_parse_parts - # Collapsing of excess leading slashes, except for the double-slash - # special case. - check(['//a', 'b'], ('', '//', ['//', 'a', 'b'])) - check(['///a', 'b'], ('', '/', ['/', 'a', 'b'])) - check(['////a', 'b'], ('', '/', ['/', 'a', 'b'])) - # Paths which look like NT paths aren't treated specially. - check(['c:a'], ('', '', ['c:a'])) - check(['c:\\a'], ('', '', ['c:\\a'])) - check(['\\a'], ('', '', ['\\a'])) - - def test_splitroot(self): - f = self.flavour.splitroot - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a/b'), ('', '', 'a/b')) - self.assertEqual(f('a/b/'), ('', '', 'a/b/')) - self.assertEqual(f('/a'), ('', '/', 'a')) - self.assertEqual(f('/a/b'), ('', '/', 'a/b')) - self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) - # The root is collapsed when there are redundant slashes - # except when there are exactly two leading slashes, which - # is a special case in POSIX. - self.assertEqual(f('//a'), ('', '//', 'a')) - self.assertEqual(f('///a'), ('', '/', 'a')) - self.assertEqual(f('///a/b'), ('', '/', 'a/b')) - # Paths which look like NT paths aren't treated specially. - self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) - self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) - self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) - - -class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): - flavour = pathlib._windows_flavour - - def test_parse_parts(self): - check = self._check_parse_parts - # First part is anchored. - check(['c:'], ('c:', '', ['c:'])) - check(['c:/'], ('c:', '\\', ['c:\\'])) - check(['/'], ('', '\\', ['\\'])) - check(['c:a'], ('c:', '', ['c:', 'a'])) - check(['c:/a'], ('c:', '\\', ['c:\\', 'a'])) - check(['/a'], ('', '\\', ['\\', 'a'])) - # UNC paths. - check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) - check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) - check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) - # Second part is anchored, so that the first part is ignored. - check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) - check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) - # UNC paths. - check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) - # Collapsing and stripping excess slashes. - check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd'])) - # UNC paths. - check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) - # Extended paths. - check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\'])) - check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a'])) - check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b'])) - # Extended UNC paths (format is "\\?\UNC\server\share"). - check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\'])) - check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd'])) - # Second part has a root but not drive. - check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) - check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) - check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) - - def test_splitroot(self): - f = self.flavour.splitroot - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a\\b'), ('', '', 'a\\b')) - self.assertEqual(f('\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) - self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) - # Redundant slashes in the root are collapsed. - self.assertEqual(f('\\\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\\\\\a/b'), ('', '\\', 'a/b')) - self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) - self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) - # Valid UNC paths. - self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) - # These are non-UNC paths (according to ntpath.py and test_ntpath). - # However, command.com says such paths are invalid, so it's - # difficult to know what the right semantics are. - self.assertEqual(f('\\\\\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('\\\\a'), ('', '\\', 'a')) - - -# -# Tests for the pure classes. -# - -class _BasePurePathTest(object): - - # Keys are canonical paths, values are list of tuples of arguments - # supposed to produce equal paths. - equivalences = { - 'a/b': [ - ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), - ('a/b/',), ('a//b',), ('a//b//',), - # Empty components get removed. - ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), - ], - '/b/c/d': [ - ('a', '/b/c', 'd'), ('a', '///b//c', 'd/'), - ('/a', '/b/c', 'd'), - # Empty components get removed. - ('/', 'b', '', 'c/d'), ('/', '', 'b/c/d'), ('', '/b/c/d'), - ], - } - - def setUp(self): - p = self.cls('a') - self.flavour = p._flavour - self.sep = self.flavour.pathmod.sep - self.altsep = self.flavour.pathmod.altsep + check(['a', '/b', 'c'], ('', self.sep, [self.sep, 'b', 'c'])) + check(['a', '/b', '/c'], ('', self.sep, [self.sep, 'c'])) def test_constructor_common(self): P = self.cls @@ -701,6 +596,38 @@ def test_pickling_common(self): class PurePosixPathTest(_BasePurePathTest, unittest.TestCase): cls = pathlib.PurePosixPath + def test_parse_parts(self): + check = self._check_parse_parts + # Collapsing of excess leading slashes, except for the double-slash + # special case. + check(['//a', 'b'], ('', '//', ['//', 'a', 'b'])) + check(['///a', 'b'], ('', '/', ['/', 'a', 'b'])) + check(['////a', 'b'], ('', '/', ['/', 'a', 'b'])) + # Paths which look like NT paths aren't treated specially. + check(['c:a'], ('', '', ['c:a'])) + check(['c:\\a'], ('', '', ['c:\\a'])) + check(['\\a'], ('', '', ['\\a'])) + + def test_splitroot(self): + f = self.cls._splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a/b'), ('', '', 'a/b')) + self.assertEqual(f('a/b/'), ('', '', 'a/b/')) + self.assertEqual(f('/a'), ('', '/', 'a')) + self.assertEqual(f('/a/b'), ('', '/', 'a/b')) + self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) + # The root is collapsed when there are redundant slashes + # except when there are exactly two leading slashes, which + # is a special case in POSIX. + self.assertEqual(f('//a'), ('', '//', 'a')) + self.assertEqual(f('///a'), ('', '/', 'a')) + self.assertEqual(f('///a/b'), ('', '/', 'a/b')) + # Paths which look like NT paths aren't treated specially. + self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) + self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) + self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) + def test_root(self): P = self.cls self.assertEqual(P('/a/b').root, '/') @@ -790,6 +717,65 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): ], }) + + def test_parse_parts(self): + check = self._check_parse_parts + # First part is anchored. + check(['c:'], ('c:', '', ['c:'])) + check(['c:/'], ('c:', '\\', ['c:\\'])) + check(['/'], ('', '\\', ['\\'])) + check(['c:a'], ('c:', '', ['c:', 'a'])) + check(['c:/a'], ('c:', '\\', ['c:\\', 'a'])) + check(['/a'], ('', '\\', ['\\', 'a'])) + # UNC paths. + check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) + check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) + check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) + # Second part is anchored, so that the first part is ignored. + check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) + check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) + # UNC paths. + check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) + # Collapsing and stripping excess slashes. + check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd'])) + # UNC paths. + check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) + # Extended paths. + check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\'])) + check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a'])) + check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b'])) + # Extended UNC paths (format is "\\?\UNC\server\share"). + check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\'])) + check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd'])) + # Second part has a root but not drive. + check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) + check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) + check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) + + def test_splitroot(self): + f = self.cls._splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a\\b'), ('', '', 'a\\b')) + self.assertEqual(f('\\a'), ('', '\\', 'a')) + self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) + self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) + self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) + # Redundant slashes in the root are collapsed. + self.assertEqual(f('\\\\a'), ('', '\\', 'a')) + self.assertEqual(f('\\\\\\a/b'), ('', '\\', 'a/b')) + self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) + self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) + # Valid UNC paths. + self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) + self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) + self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) + # These are non-UNC paths (according to ntpath.py and test_ntpath). + # However, command.com says such paths are invalid, so it's + # difficult to know what the right semantics are. + self.assertEqual(f('\\\\\\a\\b'), ('', '\\', 'a\\b')) + self.assertEqual(f('\\\\a'), ('', '\\', 'a')) + def test_str(self): p = self.cls('a/b/c') self.assertEqual(str(p), 'a\\b\\c') From 66f5453f8eb80bad4406858c48dd5662936a0070 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 09:54:00 +0100 Subject: [PATCH 06/14] Remove `flavour.compile_pattern()` --- Lib/pathlib.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 3ba3ac6fda321c..aff77e5cc12df4 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -65,9 +65,6 @@ def casefold(self, s): def casefold_parts(self, parts): return [p.lower() for p in parts] - def compile_pattern(self, pattern): - return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - class _PosixFlavour(_Flavour): has_drv = False @@ -80,9 +77,6 @@ def casefold(self, s): def casefold_parts(self, parts): return parts - def compile_pattern(self, pattern): - return re.compile(fnmatch.translate(pattern)).fullmatch - _windows_flavour = _WindowsFlavour() _posix_flavour = _PosixFlavour() @@ -179,7 +173,7 @@ def group(self, path): # Globbing helpers # -def _make_selector(pattern_parts, flavour): +def _make_selector(pattern_parts, case_insensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if pat == '**': @@ -190,7 +184,7 @@ def _make_selector(pattern_parts, flavour): cls = _WildcardSelector else: cls = _PreciseSelector - return cls(pat, child_parts, flavour) + return cls(pat, child_parts, case_insensitive) if hasattr(functools, "lru_cache"): _make_selector = functools.lru_cache()(_make_selector) @@ -200,10 +194,10 @@ class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour): + def __init__(self, child_parts, case_insensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour) + self.successor = _make_selector(child_parts, case_insensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -229,9 +223,9 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _PreciseSelector(_Selector): - def __init__(self, name, child_parts, flavour): + def __init__(self, name, child_parts, case_insensitive): self.name = name - _Selector.__init__(self, child_parts, flavour) + _Selector.__init__(self, child_parts, case_insensitive) def _select_from(self, parent_path, is_dir, exists, scandir): try: @@ -245,9 +239,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - self.match = flavour.compile_pattern(pat) - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_insensitive): + flags = re.IGNORECASE if case_insensitive else 0 + self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch + _Selector.__init__(self, child_parts, case_insensitive) def _select_from(self, parent_path, is_dir, exists, scandir): try: @@ -276,8 +271,8 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_insensitive): + _Selector.__init__(self, child_parts, case_insensitive) def _iterate_directories(self, parent_path, is_dir, scandir): yield parent_path @@ -793,6 +788,7 @@ class PurePosixPath(PurePath): """ _flavour = _posix_flavour _pathmod = posixpath + _case_insensitive = False __slots__ = () @classmethod @@ -832,6 +828,7 @@ class PureWindowsPath(PurePath): """ _flavour = _windows_flavour _pathmod = ntpath + _case_insensitive = True _drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') _ext_namespace_prefix = '\\\\?\\' _reserved_names = ( @@ -1016,7 +1013,7 @@ def glob(self, pattern): drv, root, pattern_parts = self._parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") - selector = _make_selector(tuple(pattern_parts), self._flavour) + selector = _make_selector(tuple(pattern_parts), self._case_insensitive) for p in selector.select_from(self): yield p @@ -1029,7 +1026,8 @@ def rglob(self, pattern): drv, root, pattern_parts = self._parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) + selector = _make_selector(("**",) + tuple(pattern_parts), + self._case_insensitive) for p in selector.select_from(self): yield p From 3d277071ec5ecf19e2b12fe9d521cf01c4f0abfc Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 09:58:27 +0100 Subject: [PATCH 07/14] Remove `flavour.is_supported` --- Lib/pathlib.py | 11 ++++------- Lib/test/test_pathlib.py | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index aff77e5cc12df4..93f049e3502b4a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -57,8 +57,6 @@ class _WindowsFlavour(_Flavour): has_drv = True - is_supported = (os.name == 'nt') - def casefold(self, s): return s.lower() @@ -69,8 +67,6 @@ def casefold_parts(self, parts): class _PosixFlavour(_Flavour): has_drv = False - is_supported = (os.name != 'nt') - def casefold(self, s): return s @@ -788,6 +784,7 @@ class PurePosixPath(PurePath): """ _flavour = _posix_flavour _pathmod = posixpath + _supported = (os.name != 'nt') _case_insensitive = False __slots__ = () @@ -828,6 +825,7 @@ class PureWindowsPath(PurePath): """ _flavour = _windows_flavour _pathmod = ntpath + _supported = (os.name == 'nt') _case_insensitive = True _drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') _ext_namespace_prefix = '\\\\?\\' @@ -940,11 +938,10 @@ class Path(PurePath): def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath - self = cls._from_parts(args) - if not self._flavour.is_supported: + elif not cls._supported: raise NotImplementedError("cannot instantiate %r on your system" % (cls.__name__,)) - return self + return cls._from_parts(args) def _make_child_relpath(self, part): # This is an optimization used for dir walking. `part` must be diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 80cc921bfbeb2f..f601a8adf46468 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2400,7 +2400,7 @@ def test_concrete_class(self): self.assertIs(type(p), pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath) - def test_unsupported_flavour(self): + def test_unsupported_type(self): if os.name == 'nt': self.assertRaises(NotImplementedError, pathlib.PosixPath) else: From b4beecc71e1bb5c117e51e6022982b6c1bac56d8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 10:02:53 +0100 Subject: [PATCH 08/14] Remove `flavour.has_drv`. --- Lib/pathlib.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 93f049e3502b4a..7a2ea4694a3d89 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -55,8 +55,6 @@ class _WindowsFlavour(_Flavour): # Reference for Windows paths can be found at # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - has_drv = True - def casefold(self, s): return s.lower() @@ -65,8 +63,6 @@ def casefold_parts(self, parts): class _PosixFlavour(_Flavour): - has_drv = False - def casefold(self, s): return s @@ -737,9 +733,7 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - if not self._root: - return False - return not self._flavour.has_drv or bool(self._drv) + raise NotImplementedError def is_reserved(self): """Return True if the path contains one of the special names reserved @@ -805,6 +799,9 @@ def _splitroot(cls, part): else: return '', '', part + def is_absolute(self): + return bool(self._root) + def is_reserved(self): return False @@ -894,6 +891,9 @@ def _split_extended_path(cls, s, ext_prefix=_ext_namespace_prefix): s = '\\' + s[3:] return prefix, s + def is_absolute(self): + return bool(self._root) and bool(self._drv) + def is_reserved(self): # NOTE: the rules for reserved names seem somewhat complicated # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). From 3cf15c68acba6683a9052736d1f4d8c05db1b7db Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 10:20:10 +0100 Subject: [PATCH 09/14] Remove `flavour.casefold()` and `flavour.casefold_parts()` --- Lib/pathlib.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 7a2ea4694a3d89..ee2b2bf455d41d 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -54,20 +54,11 @@ class _Flavour(object): class _WindowsFlavour(_Flavour): # Reference for Windows paths can be found at # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - - def casefold(self, s): - return s.lower() - - def casefold_parts(self, parts): - return [p.lower() for p in parts] + pass class _PosixFlavour(_Flavour): - def casefold(self, s): - return s - - def casefold_parts(self, parts): - return parts + pass _windows_flavour = _WindowsFlavour() @@ -465,7 +456,7 @@ def _join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): if not drv2 and drv: return drv, root2, [drv + root2] + parts2[1:] elif drv2: - if drv2 == drv or self._flavour.casefold(drv2) == self._flavour.casefold(drv): + if drv2 == drv or self._casefold(drv2) == self._casefold(drv): # Same drive => second path is relative to the first return drv, root, parts + parts2[1:] else: @@ -473,6 +464,18 @@ def _join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): return drv, root, parts + parts2 return drv2, root2, parts2 + @classmethod + def _casefold(cls, s): + if cls._case_insensitive: + return s.lower() + return s + + @classmethod + def _casefold_parts(cls, parts): + if cls._case_insensitive: + return [p.lower() for p in parts] + return parts + def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" @@ -509,7 +512,7 @@ def _cparts(self): try: return self._cached_cparts except AttributeError: - self._cached_cparts = self._flavour.casefold_parts(self._parts) + self._cached_cparts = self._casefold_parts(self._parts) return self._cached_cparts def __eq__(self, other): @@ -665,7 +668,7 @@ def relative_to(self, *other): else: to_abs_parts = to_parts n = len(to_abs_parts) - cf = self._flavour.casefold_parts + cf = self._casefold_parts if (root or drv) if n == 0 else cf(abs_parts[:n]) != cf(to_abs_parts): formatted = self._format_parsed_parts(to_drv, to_root, to_parts) raise ValueError("{!r} is not in the subpath of {!r}" @@ -744,7 +747,7 @@ def match(self, path_pattern): """ Return True if this path matches the given pattern. """ - cf = self._flavour.casefold + cf = self._casefold path_pattern = cf(path_pattern) drv, root, pat_parts = self._parse_parts((path_pattern,)) if not pat_parts: From 0897d9d2b9f79394e63911f39b66158b3e8ddaa1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 11:00:13 +0100 Subject: [PATCH 10/14] Remove `_Flavour`, `_WindowsFlavour` and `_PosixFlavour` --- Lib/pathlib.py | 35 ++++++++--------------------------- Lib/test/test_pathlib.py | 5 ++--- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index ee2b2bf455d41d..492e71017ba55e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -46,25 +46,6 @@ def _is_wildcard_pattern(pat): return "*" in pat or "?" in pat or "[" in pat -class _Flavour(object): - """A flavour implements a particular (platform-specific) set of path - semantics.""" - - -class _WindowsFlavour(_Flavour): - # Reference for Windows paths can be found at - # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - pass - - -class _PosixFlavour(_Flavour): - pass - - -_windows_flavour = _WindowsFlavour() -_posix_flavour = _PosixFlavour() - - class _Accessor: """An accessor implements a particular (system-specific or not) way of accessing paths on the filesystem.""" @@ -516,9 +497,9 @@ def _cparts(self): return self._cached_cparts def __eq__(self, other): - if not isinstance(other, PurePath): + if not isinstance(other, type(self)): return NotImplemented - return self._cparts == other._cparts and self._flavour is other._flavour + return self._cparts == other._cparts def __hash__(self): try: @@ -528,22 +509,22 @@ def __hash__(self): return self._hash def __lt__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts < other._cparts def __le__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts <= other._cparts def __gt__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts > other._cparts def __ge__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts >= other._cparts @@ -779,7 +760,6 @@ class PurePosixPath(PurePath): On a POSIX system, instantiating a PurePath should return this object. However, you can also instantiate it directly on any system. """ - _flavour = _posix_flavour _pathmod = posixpath _supported = (os.name != 'nt') _case_insensitive = False @@ -823,7 +803,8 @@ class PureWindowsPath(PurePath): On a Windows system, instantiating a PurePath should return this object. However, you can also instantiate it directly on any system. """ - _flavour = _windows_flavour + # Reference for Windows paths can be found at + # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx _pathmod = ntpath _supported = (os.name == 'nt') _case_insensitive = True diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f601a8adf46468..e563f2dcc4816b 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -45,7 +45,6 @@ class _BasePurePathTest(object): def setUp(self): p = self.cls('a') - self.flavour = p._flavour self.sep = p._pathmod.sep self.altsep = p._pathmod.altsep @@ -1290,12 +1289,12 @@ def test_concrete_class(self): self.assertIs(type(p), pathlib.PureWindowsPath if os.name == 'nt' else pathlib.PurePosixPath) - def test_different_flavours_unequal(self): + def test_different_types_unequal(self): p = pathlib.PurePosixPath('a') q = pathlib.PureWindowsPath('a') self.assertNotEqual(p, q) - def test_different_flavours_unordered(self): + def test_different_types_unordered(self): p = pathlib.PurePosixPath('a') q = pathlib.PureWindowsPath('a') with self.assertRaises(TypeError): From 4e918e4c4abd57fa3c3da624472b8cbf35935c91 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 12:01:42 +0100 Subject: [PATCH 11/14] Simplify Path construction. --- Lib/pathlib.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 492e71017ba55e..8a945caf32d7c4 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -335,7 +335,7 @@ def __new__(cls, *args): def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - return (self.__class__, tuple(self._parts)) + return (type(self), tuple(self._parts)) @classmethod def _parse_args(cls, args): @@ -398,10 +398,8 @@ def _parse_parts(cls, parts): @classmethod def _from_parts(cls, args): - # We need to call _parse_args on the instance, so as to get the - # right flavour. + drv, root, parts = cls._parse_args(args) self = object.__new__(cls) - drv, root, parts = self._parse_args(args) self._drv = drv self._root = root self._parts = parts @@ -428,7 +426,8 @@ def _make_child(self, args): self._drv, self._root, self._parts, drv, root, parts) return self._from_parsed_parts(drv, root, parts) - def _join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): + @classmethod + def _join_parsed_parts(cls, drv, root, parts, drv2, root2, parts2): """ Join the two paths represented by the respective (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. @@ -437,7 +436,7 @@ def _join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): if not drv2 and drv: return drv, root2, [drv + root2] + parts2[1:] elif drv2: - if drv2 == drv or self._casefold(drv2) == self._casefold(drv): + if drv2 == drv or cls._casefold(drv2) == cls._casefold(drv): # Same drive => second path is relative to the first return drv, root, parts + parts2[1:] else: @@ -481,7 +480,7 @@ def __bytes__(self): return os.fsencode(self) def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self.as_posix()) + return "{}({!r})".format(type(self).__name__, self.as_posix()) def as_uri(self): """Return the path as a 'file' URI.""" @@ -1114,7 +1113,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None): """ if not isinstance(data, str): raise TypeError('data must be str, not %s' % - data.__class__.__name__) + type(data).__name__) encoding = io.text_encoding(encoding) with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) @@ -1197,7 +1196,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ self._accessor.rename(self, target) - return self.__class__(target) + return self._from_parts((target,)) def replace(self, target): """ @@ -1210,7 +1209,7 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ self._accessor.replace(self, target) - return self.__class__(target) + return self._from_parts((target,)) def symlink_to(self, target, target_is_directory=False): """ From f0beb661aea0bb0e9464f741409a77c4acb256cf Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 12:10:21 +0100 Subject: [PATCH 12/14] Move Windows constants to module scope. --- Lib/pathlib.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8a945caf32d7c4..be9edb9865a493 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -35,6 +35,14 @@ _WINERROR_INVALID_NAME, _WINERROR_CANT_RESOLVE_FILENAME) +_win_drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') +_win_ext_namespace_prefix = '\\\\?\\' +_win_reserved_names = ( + {'CON', 'PRN', 'AUX', 'NUL'} | + {'COM%d' % i for i in range(1, 10)} | + {'LPT%d' % i for i in range(1, 10)} +) + def _ignore_error(exception): return (getattr(exception, 'errno', None) in _IGNORED_ERROS or getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) @@ -807,13 +815,6 @@ class PureWindowsPath(PurePath): _pathmod = ntpath _supported = (os.name == 'nt') _case_insensitive = True - _drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') - _ext_namespace_prefix = '\\\\?\\' - _reserved_names = ( - {'CON', 'PRN', 'AUX', 'NUL'} | - {'COM%d' % i for i in range(1, 10)} | - {'LPT%d' % i for i in range(1, 10)} - ) __slots__ = () # Interesting findings about extended paths: @@ -854,7 +855,7 @@ def _splitroot(cls, part): else: return part[:index2], sep, part[index2+1:] drv = root = '' - if second == ':' and first in cls._drive_letters: + if second == ':' and first in _win_drive_letters: drv = part[:2] part = part[2:] first = third @@ -864,7 +865,7 @@ def _splitroot(cls, part): return prefix + drv, root, part @classmethod - def _split_extended_path(cls, s, ext_prefix=_ext_namespace_prefix): + def _split_extended_path(cls, s, ext_prefix=_win_ext_namespace_prefix): prefix = '' if s.startswith(ext_prefix): prefix = s[:4] @@ -887,7 +888,7 @@ def is_reserved(self): if self._parts[0].startswith('\\\\'): # UNC paths are never reserved return False - return self._parts[-1].partition('.')[0].upper() in self._reserved_names + return self._parts[-1].partition('.')[0].upper() in _win_reserved_names def as_uri(self): if not self.is_absolute(): From 22942e40a22c399658e5925b0211c942107833c0 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 12:13:29 +0100 Subject: [PATCH 13/14] Remove `Path._supported` --- Lib/pathlib.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index be9edb9865a493..5162a26596573e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -768,7 +768,6 @@ class PurePosixPath(PurePath): However, you can also instantiate it directly on any system. """ _pathmod = posixpath - _supported = (os.name != 'nt') _case_insensitive = False __slots__ = () @@ -813,7 +812,6 @@ class PureWindowsPath(PurePath): # Reference for Windows paths can be found at # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx _pathmod = ntpath - _supported = (os.name == 'nt') _case_insensitive = True __slots__ = () @@ -922,7 +920,8 @@ class Path(PurePath): def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath - elif not cls._supported: + elif (issubclass(cls, PosixPath) and os.name == 'nt') or \ + (issubclass(cls, WindowsPath) and os.name != 'nt'): raise NotImplementedError("cannot instantiate %r on your system" % (cls.__name__,)) return cls._from_parts(args) From 6f9fd60d1ce94392ff288e0930150bc37cea7b3d Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 May 2021 19:51:13 +0100 Subject: [PATCH 14/14] Move `PurePosixPath._splitroot()`, `is_absolute()` and `is_reserved()` into `PurePath` These are all reasonable default implementations for `AbstractPath` --- Lib/pathlib.py | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5162a26596573e..22b24c8524b6a7 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -329,6 +329,7 @@ class PurePath(object): '_drv', '_root', '_parts', '_str', '_hash', '_pparts', '_cached_cparts', ) + _case_insensitive = False def __new__(cls, *args): """Construct a PurePath from one or several strings and or existing @@ -345,6 +346,23 @@ def __reduce__(self): # when pickling related paths. return (type(self), tuple(self._parts)) + @classmethod + def _splitroot(cls, part): + sep = cls._pathmod.sep + if part and part[0] == sep: + stripped_part = part.lstrip(sep) + # According to POSIX path resolution: + # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 + # "A pathname that begins with two successive slashes may be + # interpreted in an implementation-defined manner, although more + # than two leading slashes shall be treated as a single slash". + if len(part) - len(stripped_part) == 2: + return '', sep * 2, stripped_part + else: + return '', sep, stripped_part + else: + return '', '', part + @classmethod def _parse_args(cls, args): # This is useful when you don't want to create an instance, just @@ -724,12 +742,12 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - raise NotImplementedError + return bool(self._root) def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - raise NotImplementedError + return False def match(self, path_pattern): """ @@ -768,32 +786,8 @@ class PurePosixPath(PurePath): However, you can also instantiate it directly on any system. """ _pathmod = posixpath - _case_insensitive = False __slots__ = () - @classmethod - def _splitroot(cls, part): - sep = cls._pathmod.sep - if part and part[0] == sep: - stripped_part = part.lstrip(sep) - # According to POSIX path resolution: - # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 - # "A pathname that begins with two successive slashes may be - # interpreted in an implementation-defined manner, although more - # than two leading slashes shall be treated as a single slash". - if len(part) - len(stripped_part) == 2: - return '', sep * 2, stripped_part - else: - return '', sep, stripped_part - else: - return '', '', part - - def is_absolute(self): - return bool(self._root) - - def is_reserved(self): - return False - def as_uri(self): # We represent the path using the local filesystem encoding, # for portability to other applications.