diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 94e079fffef600..22b24c8524b6a7 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -35,6 +35,14 @@ _WINERROR_INVALID_NAME, _WINERROR_CANT_RESOLVE_FILENAME) +_win_drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') +_win_ext_namespace_prefix = '\\\\?\\' +_win_reserved_names = ( + {'CON', 'PRN', 'AUX', 'NUL'} | + {'COM%d' % i for i in range(1, 10)} | + {'LPT%d' % i for i in range(1, 10)} +) + def _ignore_error(exception): return (getattr(exception, 'errno', None) in _IGNORED_ERROS or getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) @@ -46,224 +54,6 @@ def _is_wildcard_pattern(pat): return "*" in pat or "?" in pat or "[" in pat -class _Flavour(object): - """A flavour implements a particular (platform-specific) set of path - semantics.""" - - def __init__(self): - self.join = self.sep.join - - def parse_parts(self, parts): - parsed = [] - sep = self.sep - altsep = self.altsep - drv = root = '' - it = reversed(parts) - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv, root, rel = self.splitroot(part) - if sep in rel: - for x in reversed(rel.split(sep)): - if x and x != '.': - parsed.append(sys.intern(x)) - else: - if rel and rel != '.': - parsed.append(sys.intern(rel)) - if drv or root: - if not drv: - # If no drive is present, try to find one in the previous - # parts. This makes the result of parsing e.g. - # ("C:", "/", "a") reasonably intuitive. - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv = self.splitroot(part)[0] - if drv: - break - break - if drv or root: - parsed.append(drv + root) - parsed.reverse() - return drv, root, parsed - - def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): - """ - Join the two paths represented by the respective - (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. - """ - if root2: - if not drv2 and drv: - return drv, root2, [drv + root2] + parts2[1:] - elif drv2: - if drv2 == drv or self.casefold(drv2) == self.casefold(drv): - # Same drive => second path is relative to the first - return drv, root, parts + parts2[1:] - else: - # Second path is non-anchored (common case) - return drv, root, parts + parts2 - return drv2, root2, parts2 - - -class _WindowsFlavour(_Flavour): - # Reference for Windows paths can be found at - # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - - sep = '\\' - altsep = '/' - has_drv = True - pathmod = ntpath - - is_supported = (os.name == 'nt') - - drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') - ext_namespace_prefix = '\\\\?\\' - - reserved_names = ( - {'CON', 'PRN', 'AUX', 'NUL'} | - {'COM%d' % i for i in range(1, 10)} | - {'LPT%d' % i for i in range(1, 10)} - ) - - # Interesting findings about extended paths: - # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported - # but '\\?\c:/a' is not - # - extended paths are always absolute; "relative" extended paths will - # fail. - - def splitroot(self, part, sep=sep): - first = part[0:1] - second = part[1:2] - if (second == sep and first == sep): - # XXX extended paths should also disable the collapsing of "." - # components (according to MSDN docs). - prefix, part = self._split_extended_path(part) - first = part[0:1] - second = part[1:2] - else: - prefix = '' - third = part[2:3] - if (second == sep and first == sep and third != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvvv root - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^ - index = part.find(sep, 2) - if index != -1: - index2 = part.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 != index + 1: - if index2 == -1: - index2 = len(part) - if prefix: - return prefix + part[1:index2], sep, part[index2+1:] - else: - return part[:index2], sep, part[index2+1:] - drv = root = '' - if second == ':' and first in self.drive_letters: - drv = part[:2] - part = part[2:] - first = third - if first == sep: - root = first - part = part.lstrip(sep) - return prefix + drv, root, part - - def casefold(self, s): - return s.lower() - - def casefold_parts(self, parts): - return [p.lower() for p in parts] - - def compile_pattern(self, pattern): - return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - - def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): - prefix = '' - if s.startswith(ext_prefix): - prefix = s[:4] - s = s[4:] - if s.startswith('UNC\\'): - prefix += s[:3] - s = '\\' + s[3:] - return prefix, s - - def is_reserved(self, parts): - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). - # We err on the side of caution and return True for paths which are - # not considered reserved by Windows. - if not parts: - return False - if parts[0].startswith('\\\\'): - # UNC paths are never reserved - return False - return parts[-1].partition('.')[0].upper() in self.reserved_names - - def make_uri(self, path): - # Under Windows, file URIs use the UTF-8 encoding. - drive = path.drive - if len(drive) == 2 and drive[1] == ':': - # It's a path on a local drive => 'file:///c:/a/b' - rest = path.as_posix()[2:].lstrip('/') - return 'file:///%s/%s' % ( - drive, urlquote_from_bytes(rest.encode('utf-8'))) - else: - # It's a path on a network drive => 'file://host/share/a/b' - return 'file:' + urlquote_from_bytes(path.as_posix().encode('utf-8')) - - -class _PosixFlavour(_Flavour): - sep = '/' - altsep = '' - has_drv = False - pathmod = posixpath - - is_supported = (os.name != 'nt') - - def splitroot(self, part, sep=sep): - if part and part[0] == sep: - stripped_part = part.lstrip(sep) - # According to POSIX path resolution: - # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 - # "A pathname that begins with two successive slashes may be - # interpreted in an implementation-defined manner, although more - # than two leading slashes shall be treated as a single slash". - if len(part) - len(stripped_part) == 2: - return '', sep * 2, stripped_part - else: - return '', sep, stripped_part - else: - return '', '', part - - def casefold(self, s): - return s - - def casefold_parts(self, parts): - return parts - - def compile_pattern(self, pattern): - return re.compile(fnmatch.translate(pattern)).fullmatch - - def is_reserved(self, parts): - return False - - def make_uri(self, path): - # We represent the path using the local filesystem encoding, - # for portability to other applications. - bpath = bytes(path) - return 'file://' + urlquote_from_bytes(bpath) - - -_windows_flavour = _WindowsFlavour() -_posix_flavour = _PosixFlavour() - - class _Accessor: """An accessor implements a particular (system-specific or not) way of accessing paths on the filesystem.""" @@ -355,7 +145,7 @@ def group(self, path): # Globbing helpers # -def _make_selector(pattern_parts, flavour): +def _make_selector(pattern_parts, case_insensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if pat == '**': @@ -366,7 +156,7 @@ def _make_selector(pattern_parts, flavour): cls = _WildcardSelector else: cls = _PreciseSelector - return cls(pat, child_parts, flavour) + return cls(pat, child_parts, case_insensitive) if hasattr(functools, "lru_cache"): _make_selector = functools.lru_cache()(_make_selector) @@ -376,10 +166,10 @@ class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour): + def __init__(self, child_parts, case_insensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour) + self.successor = _make_selector(child_parts, case_insensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -405,9 +195,9 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _PreciseSelector(_Selector): - def __init__(self, name, child_parts, flavour): + def __init__(self, name, child_parts, case_insensitive): self.name = name - _Selector.__init__(self, child_parts, flavour) + _Selector.__init__(self, child_parts, case_insensitive) def _select_from(self, parent_path, is_dir, exists, scandir): try: @@ -421,9 +211,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - self.match = flavour.compile_pattern(pat) - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_insensitive): + flags = re.IGNORECASE if case_insensitive else 0 + self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch + _Selector.__init__(self, child_parts, case_insensitive) def _select_from(self, parent_path, is_dir, exists, scandir): try: @@ -452,8 +243,8 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_insensitive): + _Selector.__init__(self, child_parts, case_insensitive) def _iterate_directories(self, parent_path, is_dir, scandir): yield parent_path @@ -538,6 +329,7 @@ class PurePath(object): '_drv', '_root', '_parts', '_str', '_hash', '_pparts', '_cached_cparts', ) + _case_insensitive = False def __new__(cls, *args): """Construct a PurePath from one or several strings and or existing @@ -552,7 +344,24 @@ def __new__(cls, *args): def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - return (self.__class__, tuple(self._parts)) + return (type(self), tuple(self._parts)) + + @classmethod + def _splitroot(cls, part): + sep = cls._pathmod.sep + if part and part[0] == sep: + stripped_part = part.lstrip(sep) + # According to POSIX path resolution: + # http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 + # "A pathname that begins with two successive slashes may be + # interpreted in an implementation-defined manner, although more + # than two leading slashes shall be treated as a single slash". + if len(part) - len(stripped_part) == 2: + return '', sep * 2, stripped_part + else: + return '', sep, stripped_part + else: + return '', '', part @classmethod def _parse_args(cls, args): @@ -572,14 +381,51 @@ def _parse_args(cls, args): "argument should be a str object or an os.PathLike " "object returning str, not %r" % type(a)) - return cls._flavour.parse_parts(parts) + return cls._parse_parts(parts) + + @classmethod + def _parse_parts(cls, parts): + parsed = [] + sep = cls._pathmod.sep + altsep = cls._pathmod.altsep + drv = root = '' + it = reversed(parts) + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv, root, rel = cls._splitroot(part) + if sep in rel: + for x in reversed(rel.split(sep)): + if x and x != '.': + parsed.append(sys.intern(x)) + else: + if rel and rel != '.': + parsed.append(sys.intern(rel)) + if drv or root: + if not drv: + # If no drive is present, try to find one in the previous + # parts. This makes the result of parsing e.g. + # ("C:", "/", "a") reasonably intuitive. + for part in it: + if not part: + continue + if altsep: + part = part.replace(altsep, sep) + drv = cls._splitroot(part)[0] + if drv: + break + break + if drv or root: + parsed.append(drv + root) + parsed.reverse() + return drv, root, parsed @classmethod def _from_parts(cls, args): - # We need to call _parse_args on the instance, so as to get the - # right flavour. + drv, root, parts = cls._parse_args(args) self = object.__new__(cls) - drv, root, parts = self._parse_args(args) self._drv = drv self._root = root self._parts = parts @@ -596,16 +442,46 @@ def _from_parsed_parts(cls, drv, root, parts): @classmethod def _format_parsed_parts(cls, drv, root, parts): if drv or root: - return drv + root + cls._flavour.join(parts[1:]) + return drv + root + cls._pathmod.sep.join(parts[1:]) else: - return cls._flavour.join(parts) + return cls._pathmod.sep.join(parts) def _make_child(self, args): drv, root, parts = self._parse_args(args) - drv, root, parts = self._flavour.join_parsed_parts( + drv, root, parts = self._join_parsed_parts( self._drv, self._root, self._parts, drv, root, parts) return self._from_parsed_parts(drv, root, parts) + @classmethod + def _join_parsed_parts(cls, drv, root, parts, drv2, root2, parts2): + """ + Join the two paths represented by the respective + (drive, root, parts) tuples. Return a new (drive, root, parts) tuple. + """ + if root2: + if not drv2 and drv: + return drv, root2, [drv + root2] + parts2[1:] + elif drv2: + if drv2 == drv or cls._casefold(drv2) == cls._casefold(drv): + # Same drive => second path is relative to the first + return drv, root, parts + parts2[1:] + else: + # Second path is non-anchored (common case) + return drv, root, parts + parts2 + return drv2, root2, parts2 + + @classmethod + def _casefold(cls, s): + if cls._case_insensitive: + return s.lower() + return s + + @classmethod + def _casefold_parts(cls, parts): + if cls._case_insensitive: + return [p.lower() for p in parts] + return parts + def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" @@ -622,8 +498,7 @@ def __fspath__(self): def as_posix(self): """Return the string representation of the path with forward (/) slashes.""" - f = self._flavour - return str(self).replace(f.sep, '/') + return str(self).replace(self._pathmod.sep, '/') def __bytes__(self): """Return the bytes representation of the path. This is only @@ -631,13 +506,11 @@ def __bytes__(self): return os.fsencode(self) def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self.as_posix()) + return "{}({!r})".format(type(self).__name__, self.as_posix()) def as_uri(self): """Return the path as a 'file' URI.""" - if not self.is_absolute(): - raise ValueError("relative path can't be expressed as a file URI") - return self._flavour.make_uri(self) + raise NotImplementedError @property def _cparts(self): @@ -645,13 +518,13 @@ def _cparts(self): try: return self._cached_cparts except AttributeError: - self._cached_cparts = self._flavour.casefold_parts(self._parts) + self._cached_cparts = self._casefold_parts(self._parts) return self._cached_cparts def __eq__(self, other): - if not isinstance(other, PurePath): + if not isinstance(other, type(self)): return NotImplemented - return self._cparts == other._cparts and self._flavour is other._flavour + return self._cparts == other._cparts def __hash__(self): try: @@ -661,22 +534,22 @@ def __hash__(self): return self._hash def __lt__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts < other._cparts def __le__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts <= other._cparts def __gt__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts > other._cparts def __ge__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: + if not isinstance(other, type(self)): return NotImplemented return self._cparts >= other._cparts @@ -744,8 +617,9 @@ def with_name(self, name): """Return a new path with the file name changed.""" if not self.name: raise ValueError("%r has an empty name" % (self,)) - drv, root, parts = self._flavour.parse_parts((name,)) - if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep] + drv, root, parts = self._parse_parts((name,)) + m = self._pathmod + if (not name or name[-1] in [m.sep, m.altsep] or drv or root or len(parts) != 1): raise ValueError("Invalid name %r" % (name)) return self._from_parsed_parts(self._drv, self._root, @@ -760,8 +634,8 @@ def with_suffix(self, suffix): has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. """ - f = self._flavour - if f.sep in suffix or f.altsep and f.altsep in suffix: + m = self._pathmod + if m.sep in suffix or m.altsep and m.altsep in suffix: raise ValueError("Invalid suffix %r" % (suffix,)) if suffix and not suffix.startswith('.') or suffix == '.': raise ValueError("Invalid suffix %r" % (suffix)) @@ -800,7 +674,7 @@ def relative_to(self, *other): else: to_abs_parts = to_parts n = len(to_abs_parts) - cf = self._flavour.casefold_parts + cf = self._casefold_parts if (root or drv) if n == 0 else cf(abs_parts[:n]) != cf(to_abs_parts): formatted = self._format_parsed_parts(to_drv, to_root, to_parts) raise ValueError("{!r} is not in the subpath of {!r}" @@ -868,22 +742,20 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - if not self._root: - return False - return not self._flavour.has_drv or bool(self._drv) + return bool(self._root) def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - return self._flavour.is_reserved(self._parts) + return False def match(self, path_pattern): """ Return True if this path matches the given pattern. """ - cf = self._flavour.casefold + cf = self._casefold path_pattern = cf(path_pattern) - drv, root, pat_parts = self._flavour.parse_parts((path_pattern,)) + drv, root, pat_parts = self._parse_parts((path_pattern,)) if not pat_parts: raise ValueError("empty pattern") if drv and drv != cf(self._drv): @@ -913,9 +785,17 @@ class PurePosixPath(PurePath): On a POSIX system, instantiating a PurePath should return this object. However, you can also instantiate it directly on any system. """ - _flavour = _posix_flavour + _pathmod = posixpath __slots__ = () + def as_uri(self): + # We represent the path using the local filesystem encoding, + # for portability to other applications. + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + bpath = bytes(self) + return 'file://' + urlquote_from_bytes(bpath) + class PureWindowsPath(PurePath): """PurePath subclass for Windows systems. @@ -923,9 +803,98 @@ class PureWindowsPath(PurePath): On a Windows system, instantiating a PurePath should return this object. However, you can also instantiate it directly on any system. """ - _flavour = _windows_flavour + # Reference for Windows paths can be found at + # http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx + _pathmod = ntpath + _case_insensitive = True __slots__ = () + # Interesting findings about extended paths: + # - '\\?\c:\a', '//?/c:\a' and '//?/c:/a' are all supported + # but '\\?\c:/a' is not + # - extended paths are always absolute; "relative" extended paths will + # fail. + + @classmethod + def _splitroot(cls, part): + sep = cls._pathmod.sep + first = part[0:1] + second = part[1:2] + if (second == sep and first == sep): + # XXX extended paths should also disable the collapsing of "." + # components (according to MSDN docs). + prefix, part = cls._split_extended_path(part) + first = part[0:1] + second = part[1:2] + else: + prefix = '' + third = part[2:3] + if (second == sep and first == sep and third != sep): + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvvv root + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^ + index = part.find(sep, 2) + if index != -1: + index2 = part.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 != index + 1: + if index2 == -1: + index2 = len(part) + if prefix: + return prefix + part[1:index2], sep, part[index2+1:] + else: + return part[:index2], sep, part[index2+1:] + drv = root = '' + if second == ':' and first in _win_drive_letters: + drv = part[:2] + part = part[2:] + first = third + if first == sep: + root = first + part = part.lstrip(sep) + return prefix + drv, root, part + + @classmethod + def _split_extended_path(cls, s, ext_prefix=_win_ext_namespace_prefix): + prefix = '' + if s.startswith(ext_prefix): + prefix = s[:4] + s = s[4:] + if s.startswith('UNC\\'): + prefix += s[:3] + s = '\\' + s[3:] + return prefix, s + + def is_absolute(self): + return bool(self._root) and bool(self._drv) + + def is_reserved(self): + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). + # We err on the side of caution and return True for paths which are + # not considered reserved by Windows. + if not self._parts: + return False + if self._parts[0].startswith('\\\\'): + # UNC paths are never reserved + return False + return self._parts[-1].partition('.')[0].upper() in _win_reserved_names + + def as_uri(self): + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + # Under Windows, file URIs use the UTF-8 encoding. + drive = self.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + rest = self.as_posix()[2:].lstrip('/') + return 'file:///%s/%s' % ( + drive, urlquote_from_bytes(rest.encode('utf-8'))) + else: + # It's a path on a network drive => 'file://host/share/a/b' + return 'file:' + urlquote_from_bytes(self.as_posix().encode('utf-8')) # Filesystem-accessing classes @@ -945,11 +914,11 @@ class Path(PurePath): def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath - self = cls._from_parts(args) - if not self._flavour.is_supported: + elif (issubclass(cls, PosixPath) and os.name == 'nt') or \ + (issubclass(cls, WindowsPath) and os.name != 'nt'): raise NotImplementedError("cannot instantiate %r on your system" % (cls.__name__,)) - return self + return cls._from_parts(args) def _make_child_relpath(self, part): # This is an optimization used for dir walking. `part` must be @@ -1015,10 +984,10 @@ def glob(self, pattern): sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") - selector = _make_selector(tuple(pattern_parts), self._flavour) + selector = _make_selector(tuple(pattern_parts), self._case_insensitive) for p in selector.select_from(self): yield p @@ -1028,10 +997,11 @@ def rglob(self, pattern): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) + selector = _make_selector(("**",) + tuple(pattern_parts), + self._case_insensitive) for p in selector.select_from(self): yield p @@ -1137,7 +1107,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None): """ if not isinstance(data, str): raise TypeError('data must be str, not %s' % - data.__class__.__name__) + type(data).__name__) encoding = io.text_encoding(encoding) with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) @@ -1220,7 +1190,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ self._accessor.rename(self, target) - return self.__class__(target) + return self._from_parts((target,)) def replace(self, target): """ @@ -1233,7 +1203,7 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ self._accessor.replace(self, target) - return self.__class__(target) + return self._from_parts((target,)) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 6ed08f7e70ce3d..e563f2dcc4816b 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -20,22 +20,44 @@ except ImportError: grp = pwd = None +# +# Tests for the pure classes. +# -class _BaseFlavourTest(object): +class _BasePurePathTest(object): + + # Keys are canonical paths, values are list of tuples of arguments + # supposed to produce equal paths. + equivalences = { + 'a/b': [ + ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), + ('a/b/',), ('a//b',), ('a//b//',), + # Empty components get removed. + ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), + ], + '/b/c/d': [ + ('a', '/b/c', 'd'), ('a', '///b//c', 'd/'), + ('/a', '/b/c', 'd'), + # Empty components get removed. + ('/', 'b', '', 'c/d'), ('/', '', 'b/c/d'), ('', '/b/c/d'), + ], + } + + def setUp(self): + p = self.cls('a') + self.sep = p._pathmod.sep + self.altsep = p._pathmod.altsep def _check_parse_parts(self, arg, expected): - f = self.flavour.parse_parts - sep = self.flavour.sep - altsep = self.flavour.altsep - actual = f([x.replace('/', sep) for x in arg]) + f = self.cls()._parse_parts + actual = f([x.replace('/', self.sep) for x in arg]) self.assertEqual(actual, expected) - if altsep: - actual = f([x.replace('/', altsep) for x in arg]) + if self.altsep: + actual = f([x.replace('/', self.altsep) for x in arg]) self.assertEqual(actual, expected) def test_parse_parts_common(self): check = self._check_parse_parts - sep = self.flavour.sep # Unanchored parts. check([], ('', '', [])) check(['a'], ('', '', ['a'])) @@ -54,140 +76,12 @@ def test_parse_parts_common(self): check(['a', '.', 'b'], ('', '', ['a', 'b'])) check(['a', '.', '.'], ('', '', ['a'])) # The first part is anchored. - check(['/a/b'], ('', sep, [sep, 'a', 'b'])) - check(['/a', 'b'], ('', sep, [sep, 'a', 'b'])) - check(['/a/', 'b'], ('', sep, [sep, 'a', 'b'])) + check(['/a/b'], ('', self.sep, [self.sep, 'a', 'b'])) + check(['/a', 'b'], ('', self.sep, [self.sep, 'a', 'b'])) + check(['/a/', 'b'], ('', self.sep, [self.sep, 'a', 'b'])) # Ignoring parts before an anchored part. - check(['a', '/b', 'c'], ('', sep, [sep, 'b', 'c'])) - check(['a', '/b', '/c'], ('', sep, [sep, 'c'])) - - -class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase): - flavour = pathlib._posix_flavour - - def test_parse_parts(self): - check = self._check_parse_parts - # Collapsing of excess leading slashes, except for the double-slash - # special case. - check(['//a', 'b'], ('', '//', ['//', 'a', 'b'])) - check(['///a', 'b'], ('', '/', ['/', 'a', 'b'])) - check(['////a', 'b'], ('', '/', ['/', 'a', 'b'])) - # Paths which look like NT paths aren't treated specially. - check(['c:a'], ('', '', ['c:a'])) - check(['c:\\a'], ('', '', ['c:\\a'])) - check(['\\a'], ('', '', ['\\a'])) - - def test_splitroot(self): - f = self.flavour.splitroot - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a/b'), ('', '', 'a/b')) - self.assertEqual(f('a/b/'), ('', '', 'a/b/')) - self.assertEqual(f('/a'), ('', '/', 'a')) - self.assertEqual(f('/a/b'), ('', '/', 'a/b')) - self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) - # The root is collapsed when there are redundant slashes - # except when there are exactly two leading slashes, which - # is a special case in POSIX. - self.assertEqual(f('//a'), ('', '//', 'a')) - self.assertEqual(f('///a'), ('', '/', 'a')) - self.assertEqual(f('///a/b'), ('', '/', 'a/b')) - # Paths which look like NT paths aren't treated specially. - self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) - self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) - self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) - - -class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): - flavour = pathlib._windows_flavour - - def test_parse_parts(self): - check = self._check_parse_parts - # First part is anchored. - check(['c:'], ('c:', '', ['c:'])) - check(['c:/'], ('c:', '\\', ['c:\\'])) - check(['/'], ('', '\\', ['\\'])) - check(['c:a'], ('c:', '', ['c:', 'a'])) - check(['c:/a'], ('c:', '\\', ['c:\\', 'a'])) - check(['/a'], ('', '\\', ['\\', 'a'])) - # UNC paths. - check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) - check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) - check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) - # Second part is anchored, so that the first part is ignored. - check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) - check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) - # UNC paths. - check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) - # Collapsing and stripping excess slashes. - check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd'])) - # UNC paths. - check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) - # Extended paths. - check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\'])) - check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a'])) - check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b'])) - # Extended UNC paths (format is "\\?\UNC\server\share"). - check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\'])) - check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd'])) - # Second part has a root but not drive. - check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) - check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) - check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) - - def test_splitroot(self): - f = self.flavour.splitroot - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a\\b'), ('', '', 'a\\b')) - self.assertEqual(f('\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) - self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) - # Redundant slashes in the root are collapsed. - self.assertEqual(f('\\\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\\\\\a/b'), ('', '\\', 'a/b')) - self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) - self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) - # Valid UNC paths. - self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) - # These are non-UNC paths (according to ntpath.py and test_ntpath). - # However, command.com says such paths are invalid, so it's - # difficult to know what the right semantics are. - self.assertEqual(f('\\\\\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('\\\\a'), ('', '\\', 'a')) - - -# -# Tests for the pure classes. -# - -class _BasePurePathTest(object): - - # Keys are canonical paths, values are list of tuples of arguments - # supposed to produce equal paths. - equivalences = { - 'a/b': [ - ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), - ('a/b/',), ('a//b',), ('a//b//',), - # Empty components get removed. - ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), - ], - '/b/c/d': [ - ('a', '/b/c', 'd'), ('a', '///b//c', 'd/'), - ('/a', '/b/c', 'd'), - # Empty components get removed. - ('/', 'b', '', 'c/d'), ('/', '', 'b/c/d'), ('', '/b/c/d'), - ], - } - - def setUp(self): - p = self.cls('a') - self.flavour = p._flavour - self.sep = self.flavour.sep - self.altsep = self.flavour.altsep + check(['a', '/b', 'c'], ('', self.sep, [self.sep, 'b', 'c'])) + check(['a', '/b', '/c'], ('', self.sep, [self.sep, 'c'])) def test_constructor_common(self): P = self.cls @@ -615,7 +509,7 @@ def test_with_suffix_common(self): self.assertRaises(ValueError, P('a/b').with_suffix, './.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') self.assertRaises(ValueError, P('a/b').with_suffix, - (self.flavour.sep, 'd')) + (self.sep, 'd')) def test_relative_to_common(self): P = self.cls @@ -701,6 +595,38 @@ def test_pickling_common(self): class PurePosixPathTest(_BasePurePathTest, unittest.TestCase): cls = pathlib.PurePosixPath + def test_parse_parts(self): + check = self._check_parse_parts + # Collapsing of excess leading slashes, except for the double-slash + # special case. + check(['//a', 'b'], ('', '//', ['//', 'a', 'b'])) + check(['///a', 'b'], ('', '/', ['/', 'a', 'b'])) + check(['////a', 'b'], ('', '/', ['/', 'a', 'b'])) + # Paths which look like NT paths aren't treated specially. + check(['c:a'], ('', '', ['c:a'])) + check(['c:\\a'], ('', '', ['c:\\a'])) + check(['\\a'], ('', '', ['\\a'])) + + def test_splitroot(self): + f = self.cls._splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a/b'), ('', '', 'a/b')) + self.assertEqual(f('a/b/'), ('', '', 'a/b/')) + self.assertEqual(f('/a'), ('', '/', 'a')) + self.assertEqual(f('/a/b'), ('', '/', 'a/b')) + self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) + # The root is collapsed when there are redundant slashes + # except when there are exactly two leading slashes, which + # is a special case in POSIX. + self.assertEqual(f('//a'), ('', '//', 'a')) + self.assertEqual(f('///a'), ('', '/', 'a')) + self.assertEqual(f('///a/b'), ('', '/', 'a/b')) + # Paths which look like NT paths aren't treated specially. + self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) + self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) + self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) + def test_root(self): P = self.cls self.assertEqual(P('/a/b').root, '/') @@ -790,6 +716,65 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): ], }) + + def test_parse_parts(self): + check = self._check_parse_parts + # First part is anchored. + check(['c:'], ('c:', '', ['c:'])) + check(['c:/'], ('c:', '\\', ['c:\\'])) + check(['/'], ('', '\\', ['\\'])) + check(['c:a'], ('c:', '', ['c:', 'a'])) + check(['c:/a'], ('c:', '\\', ['c:\\', 'a'])) + check(['/a'], ('', '\\', ['\\', 'a'])) + # UNC paths. + check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) + check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) + check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) + # Second part is anchored, so that the first part is ignored. + check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) + check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) + # UNC paths. + check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) + # Collapsing and stripping excess slashes. + check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd'])) + # UNC paths. + check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) + # Extended paths. + check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\'])) + check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a'])) + check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b'])) + # Extended UNC paths (format is "\\?\UNC\server\share"). + check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\'])) + check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd'])) + # Second part has a root but not drive. + check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) + check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) + check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) + + def test_splitroot(self): + f = self.cls._splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a\\b'), ('', '', 'a\\b')) + self.assertEqual(f('\\a'), ('', '\\', 'a')) + self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) + self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) + self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) + # Redundant slashes in the root are collapsed. + self.assertEqual(f('\\\\a'), ('', '\\', 'a')) + self.assertEqual(f('\\\\\\a/b'), ('', '\\', 'a/b')) + self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) + self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) + # Valid UNC paths. + self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) + self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) + self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) + # These are non-UNC paths (according to ntpath.py and test_ntpath). + # However, command.com says such paths are invalid, so it's + # difficult to know what the right semantics are. + self.assertEqual(f('\\\\\\a\\b'), ('', '\\', 'a\\b')) + self.assertEqual(f('\\\\a'), ('', '\\', 'a')) + def test_str(self): p = self.cls('a/b/c') self.assertEqual(str(p), 'a\\b\\c') @@ -1304,12 +1289,12 @@ def test_concrete_class(self): self.assertIs(type(p), pathlib.PureWindowsPath if os.name == 'nt' else pathlib.PurePosixPath) - def test_different_flavours_unequal(self): + def test_different_types_unequal(self): p = pathlib.PurePosixPath('a') q = pathlib.PureWindowsPath('a') self.assertNotEqual(p, q) - def test_different_flavours_unordered(self): + def test_different_types_unordered(self): p = pathlib.PurePosixPath('a') q = pathlib.PureWindowsPath('a') with self.assertRaises(TypeError): @@ -2414,7 +2399,7 @@ def test_concrete_class(self): self.assertIs(type(p), pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath) - def test_unsupported_flavour(self): + def test_unsupported_type(self): if os.name == 'nt': self.assertRaises(NotImplementedError, pathlib.PosixPath) else: