diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 55c44f12e5a2fb..d45b122a0fa305 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -16,7 +16,6 @@ import warnings from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP -from operator import attrgetter from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes @@ -216,8 +215,8 @@ class _PathParents(Sequence): def __init__(self, path): # We don't store the instance to avoid reference cycles self._pathcls = type(path) - self._drv = path._drv - self._root = path._root + self._drv = path.drive + self._root = path.root self._parts = path._parts def __len__(self): @@ -251,7 +250,7 @@ class PurePath(object): directly, regardless of your system. """ __slots__ = ( - '_drv', '_root', '_parts', + '_fspath', '_drv', '_root', '_parts_cached', '_str', '_hash', '_parts_tuple', '_parts_normcase_cached', ) _flavour = os.path @@ -269,18 +268,16 @@ def __new__(cls, *args): def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - return (self.__class__, tuple(self._parts)) + return (self.__class__, self.parts) @classmethod - def _parse_parts(cls, parts): + def _from_parts(cls, parts): if not parts: - return '', '', [] + path = '' elif len(parts) == 1: path = os.fspath(parts[0]) else: path = cls._flavour.join(*parts) - sep = cls._flavour.sep - altsep = cls._flavour.altsep if isinstance(path, str): # Force-cast str subclasses to str (issue #21127) path = str(path) @@ -289,6 +286,16 @@ def _parse_parts(cls, parts): "argument should be a str or an os.PathLike " "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") + self = object.__new__(cls) + self._fspath = path + return self + + @classmethod + def _parse_path(cls, path): + if not path: + return '', '', [] + sep = cls._flavour.sep + altsep = cls._flavour.altsep if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) @@ -299,21 +306,18 @@ def _parse_parts(cls, parts): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed - @classmethod - def _from_parts(cls, args): - self = object.__new__(cls) - drv, root, parts = self._parse_parts(args) + def _load_parts(self): + drv, root, parts = self._parse_path(self._fspath) self._drv = drv self._root = root - self._parts = parts - return self + self._parts_cached = parts @classmethod def _from_parsed_parts(cls, drv, root, parts): self = object.__new__(cls) self._drv = drv self._root = root - self._parts = parts + self._parts_cached = parts return self @classmethod @@ -330,7 +334,7 @@ def __str__(self): try: return self._str except AttributeError: - self._str = self._format_parsed_parts(self._drv, self._root, + self._str = self._format_parsed_parts(self.drive, self.root, self._parts) or '.' return self._str @@ -356,7 +360,7 @@ def as_uri(self): if not self.is_absolute(): raise ValueError("relative path can't be expressed as a file URI") - drive = self._drv + drive = self.drive if len(drive) == 2 and drive[1] == ':': # It's a path on a local drive => 'file:///c:/a/b' prefix = 'file:///' + drive @@ -412,23 +416,43 @@ def __ge__(self, other): return NotImplemented return self._parts_normcase >= other._parts_normcase - drive = property(attrgetter('_drv'), - doc="""The drive prefix (letter or UNC path), if any.""") + @property + def drive(self): + """The drive prefix (letter or UNC path), if any.""" + try: + return self._drv + except AttributeError: + self._load_parts() + return self._drv - root = property(attrgetter('_root'), - doc="""The root of the path, if any.""") + @property + def root(self): + """The root of the path, if any.""" + try: + return self._root + except AttributeError: + self._load_parts() + return self._root + + @property + def _parts(self): + try: + return self._parts_cached + except AttributeError: + self._load_parts() + return self._parts_cached @property def anchor(self): """The concatenation of the drive and root, or ''.""" - anchor = self._drv + self._root + anchor = self.drive + self.root return anchor @property def name(self): """The final path component, if any.""" parts = self._parts - if len(parts) == (1 if (self._drv or self._root) else 0): + if len(parts) == (1 if (self.drive or self.root) else 0): return '' return parts[-1] @@ -477,7 +501,7 @@ def with_name(self, name): drv, root, tail = f.splitroot(name) if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail): raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self._drv, self._root, + return self._from_parsed_parts(self.drive, self.root, self._parts[:-1] + [name]) def with_stem(self, stem): @@ -502,7 +526,7 @@ def with_suffix(self, suffix): name = name + suffix else: name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self._drv, self._root, + return self._from_parsed_parts(self.drive, self.root, self._parts[:-1] + [name]) def relative_to(self, other, /, *_deprecated, walk_up=False): @@ -593,8 +617,8 @@ def __rtruediv__(self, key): @property def parent(self): """The logical parent of the path.""" - drv = self._drv - root = self._root + drv = self.drive + root = self.root parts = self._parts if len(parts) == 1 and (drv or root): return self @@ -610,7 +634,7 @@ def is_absolute(self): a drive).""" # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: - return bool(self._drv and self._root) + return bool(self.drive and self.root) return self._flavour.isabs(self) def is_reserved(self): @@ -634,7 +658,7 @@ def match(self, path_pattern): Return True if this path matches the given pattern. """ path_pattern = self._flavour.normcase(path_pattern) - drv, root, pat_parts = self._parse_parts((path_pattern,)) + drv, root, pat_parts = self._parse_path(path_pattern) if not pat_parts: raise ValueError("empty pattern") parts = self._parts_normcase @@ -700,7 +724,7 @@ def _make_child_relpath(self, part): # This is an optimization used for dir walking. `part` must be # a single part relative to this path. parts = self._parts + [part] - return self._from_parsed_parts(self._drv, self._root, parts) + return self._from_parsed_parts(self.drive, self.root, parts) def __enter__(self): # In previous versions of pathlib, __exit__() marked this path as @@ -770,7 +794,7 @@ def glob(self, pattern): sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): @@ -785,7 +809,7 @@ def rglob(self, pattern): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): @@ -802,9 +826,9 @@ def absolute(self): """ if self.is_absolute(): return self - elif self._drv: + elif self.drive: # There is a CWD on each drive-letter drive. - cwd = self._flavour.abspath(self._drv) + cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() return self._from_parts([cwd] + self._parts) @@ -1184,12 +1208,12 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - if (not (self._drv or self._root) and + if (not (self.drive or self.root) and self._parts and self._parts[0][:1] == '~'): homedir = self._flavour.expanduser(self._parts[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - drv, root, parts = self._parse_parts((homedir,)) + drv, root, parts = self._parse_path(homedir) return self._from_parsed_parts(drv, root, parts + self._parts[1:]) return self diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f05dead5886743..5a7e66ce0eaedc 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -26,7 +26,9 @@ class _BaseFlavourTest(object): def _check_parse_parts(self, arg, expected): - f = self.cls._parse_parts + def f(parts): + path = self.flavour.join(*parts) if parts else '' + return self.cls._parse_path(path) sep = self.flavour.sep altsep = self.flavour.altsep actual = f([x.replace('/', sep) for x in arg]) diff --git a/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst new file mode 100644 index 00000000000000..cbcab09342d592 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst @@ -0,0 +1,2 @@ +Speed up construction of :class:`pathlib.PurePath` objects by deferring +path parsing and normalization.