From ecd532857de380efaf9b80d5692042324d464b0e Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 3 Apr 2023 19:57:11 +0100 Subject: [PATCH] GH-76846, GH-85281: Call `__new__()` and `__init__()` on pathlib subclasses (GH-102789) Fix an issue where `__new__()` and `__init__()` were not called on subclasses of `pathlib.PurePath` and `Path` in some circumstances. Paths are now normalized on-demand. This speeds up path construction, `p.joinpath(q)`, and `p / q`. Co-authored-by: Steve Dower --- Lib/pathlib.py | 145 ++++++++++-------- Lib/test/test_pathlib.py | 27 +++- ...3-03-17-19-14-26.gh-issue-76846.KEamjK.rst | 3 + 3 files changed, 107 insertions(+), 68 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a126bf2fe5570a7..490f89f39d26d1b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -16,7 +16,6 @@ import warnings from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP -from operator import attrgetter from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes @@ -216,8 +215,8 @@ class _PathParents(Sequence): def __init__(self, path): # We don't store the instance to avoid reference cycles self._pathcls = type(path) - self._drv = path._drv - self._root = path._root + self._drv = path.drive + self._root = path.root self._parts = path._parts def __len__(self): @@ -251,12 +250,12 @@ class PurePath(object): directly, regardless of your system. """ __slots__ = ( - '_drv', '_root', '_parts', + '_raw_path', '_drv', '_root', '_parts_cached', '_str', '_hash', '_parts_tuple', '_parts_normcase_cached', ) _flavour = os.path - def __new__(cls, *args): + def __new__(cls, *args, **kwargs): """Construct a PurePath from one or several strings and or existing PurePath objects. The strings and path objects are combined so as to yield a canonicalized path, which is incorporated into the @@ -264,23 +263,20 @@ def __new__(cls, *args): """ if cls is PurePath: cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return cls._from_parts(args) + return object.__new__(cls) def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - return (self.__class__, tuple(self._parts)) + return (self.__class__, self.parts) - @classmethod - def _parse_parts(cls, parts): - if not parts: - return '', '', [] - elif len(parts) == 1: - path = os.fspath(parts[0]) + def __init__(self, *args): + if not args: + path = '' + elif len(args) == 1: + path = os.fspath(args[0]) else: - path = cls._flavour.join(*parts) - sep = cls._flavour.sep - altsep = cls._flavour.altsep + path = self._flavour.join(*args) if isinstance(path, str): # Force-cast str subclasses to str (issue #21127) path = str(path) @@ -289,6 +285,14 @@ def _parse_parts(cls, parts): "argument should be a str or an os.PathLike " "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") + self._raw_path = path + + @classmethod + def _parse_path(cls, path): + if not path: + return '', '', [] + sep = cls._flavour.sep + altsep = cls._flavour.altsep if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) @@ -299,21 +303,20 @@ def _parse_parts(cls, parts): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed - @classmethod - def _from_parts(cls, args): - self = object.__new__(cls) - drv, root, parts = self._parse_parts(args) + def _load_parts(self): + drv, root, parts = self._parse_path(self._raw_path) self._drv = drv self._root = root - self._parts = parts - return self + self._parts_cached = parts @classmethod def _from_parsed_parts(cls, drv, root, parts): - self = object.__new__(cls) + path = cls._format_parsed_parts(drv, root, parts) + self = cls(path) + self._str = path or '.' self._drv = drv self._root = root - self._parts = parts + self._parts_cached = parts return self @classmethod @@ -330,7 +333,7 @@ def __str__(self): try: return self._str except AttributeError: - self._str = self._format_parsed_parts(self._drv, self._root, + self._str = self._format_parsed_parts(self.drive, self.root, self._parts) or '.' return self._str @@ -356,7 +359,7 @@ def as_uri(self): if not self.is_absolute(): raise ValueError("relative path can't be expressed as a file URI") - drive = self._drv + drive = self.drive if len(drive) == 2 and drive[1] == ':': # It's a path on a local drive => 'file:///c:/a/b' prefix = 'file:///' + drive @@ -412,23 +415,43 @@ def __ge__(self, other): return NotImplemented return self._parts_normcase >= other._parts_normcase - drive = property(attrgetter('_drv'), - doc="""The drive prefix (letter or UNC path), if any.""") + @property + def drive(self): + """The drive prefix (letter or UNC path), if any.""" + try: + return self._drv + except AttributeError: + self._load_parts() + return self._drv + + @property + def root(self): + """The root of the path, if any.""" + try: + return self._root + except AttributeError: + self._load_parts() + return self._root - root = property(attrgetter('_root'), - doc="""The root of the path, if any.""") + @property + def _parts(self): + try: + return self._parts_cached + except AttributeError: + self._load_parts() + return self._parts_cached @property def anchor(self): """The concatenation of the drive and root, or ''.""" - anchor = self._drv + self._root + anchor = self.drive + self.root return anchor @property def name(self): """The final path component, if any.""" parts = self._parts - if len(parts) == (1 if (self._drv or self._root) else 0): + if len(parts) == (1 if (self.drive or self.root) else 0): return '' return parts[-1] @@ -477,7 +500,7 @@ def with_name(self, name): drv, root, tail = f.splitroot(name) if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail): raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self._drv, self._root, + return self._from_parsed_parts(self.drive, self.root, self._parts[:-1] + [name]) def with_stem(self, stem): @@ -502,7 +525,7 @@ def with_suffix(self, suffix): name = name + suffix else: name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self._drv, self._root, + return self._from_parsed_parts(self.drive, self.root, self._parts[:-1] + [name]) def relative_to(self, other, /, *_deprecated, walk_up=False): @@ -561,22 +584,7 @@ def joinpath(self, *args): paths) or a totally different path (if one of the arguments is anchored). """ - drv1, root1, parts1 = self._drv, self._root, self._parts - drv2, root2, parts2 = self._parse_parts(args) - if root2: - if not drv2 and drv1: - return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:]) - else: - return self._from_parsed_parts(drv2, root2, parts2) - elif drv2: - if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1): - # Same drive => second path is relative to the first. - return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:]) - else: - return self._from_parsed_parts(drv2, root2, parts2) - else: - # Second path is non-anchored (common case). - return self._from_parsed_parts(drv1, root1, parts1 + parts2) + return self.__class__(self._raw_path, *args) def __truediv__(self, key): try: @@ -586,15 +594,15 @@ def __truediv__(self, key): def __rtruediv__(self, key): try: - return self._from_parts([key] + self._parts) + return type(self)(key, self._raw_path) except TypeError: return NotImplemented @property def parent(self): """The logical parent of the path.""" - drv = self._drv - root = self._root + drv = self.drive + root = self.root parts = self._parts if len(parts) == 1 and (drv or root): return self @@ -610,7 +618,7 @@ def is_absolute(self): a drive).""" # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: - return bool(self._drv and self._root) + return bool(self.drive and self.root) return self._flavour.isabs(self) def is_reserved(self): @@ -634,7 +642,7 @@ def match(self, path_pattern): Return True if this path matches the given pattern. """ path_pattern = self._flavour.normcase(path_pattern) - drv, root, pat_parts = self._parse_parts((path_pattern,)) + drv, root, pat_parts = self._parse_path(path_pattern) if not pat_parts: raise ValueError("empty pattern") parts = self._parts_normcase @@ -687,20 +695,23 @@ class Path(PurePath): """ __slots__ = () - def __new__(cls, *args, **kwargs): + def __init__(self, *args, **kwargs): if kwargs: msg = ("support for supplying keyword arguments to pathlib.PurePath " "is deprecated and scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) + super().__init__(*args) + + def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath - return cls._from_parts(args) + return object.__new__(cls) def _make_child_relpath(self, part): # This is an optimization used for dir walking. `part` must be # a single part relative to this path. parts = self._parts + [part] - return self._from_parsed_parts(self._drv, self._root, parts) + return self._from_parsed_parts(self.drive, self.root, parts) def __enter__(self): # In previous versions of pathlib, __exit__() marked this path as @@ -770,7 +781,7 @@ def glob(self, pattern): sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): @@ -785,7 +796,7 @@ def rglob(self, pattern): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): @@ -802,12 +813,12 @@ def absolute(self): """ if self.is_absolute(): return self - elif self._drv: + elif self.drive: # There is a CWD on each drive-letter drive. - cwd = self._flavour.abspath(self._drv) + cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() - return self._from_parts([cwd] + self._parts) + return type(self)(cwd, self._raw_path) def resolve(self, strict=False): """ @@ -825,7 +836,7 @@ def check_eloop(e): except OSError as e: check_eloop(e) raise - p = self._from_parts((s,)) + p = type(self)(s) # In non-strict mode, realpath() doesn't raise on symlink loops. # Ensure we get an exception by calling stat() @@ -915,7 +926,7 @@ def readlink(self): """ if not hasattr(os, "readlink"): raise NotImplementedError("os.readlink() not available on this system") - return self._from_parts((os.readlink(self),)) + return type(self)(os.readlink(self)) def touch(self, mode=0o666, exist_ok=True): """ @@ -1184,12 +1195,12 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - if (not (self._drv or self._root) and + if (not (self.drive or self.root) and self._parts and self._parts[0][:1] == '~'): homedir = self._flavour.expanduser(self._parts[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - drv, root, parts = self._parse_parts((homedir,)) + drv, root, parts = self._parse_path(homedir) return self._from_parsed_parts(drv, root, parts + self._parts[1:]) return self diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 3041630da678998..8b6e012b730d752 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -27,7 +27,9 @@ class _BaseFlavourTest(object): def _check_parse_parts(self, arg, expected): - f = self.cls._parse_parts + def f(parts): + path = self.cls(*parts)._raw_path + return self.cls._parse_path(path) sep = self.flavour.sep altsep = self.flavour.altsep actual = f([x.replace('/', sep) for x in arg]) @@ -136,6 +138,14 @@ def test_parse_parts(self): # Tests for the pure classes. # +class _BasePurePathSubclass(object): + init_called = False + + def __init__(self, *args): + super().__init__(*args) + self.init_called = True + + class _BasePurePathTest(object): # Keys are canonical paths, values are list of tuples of arguments @@ -221,6 +231,21 @@ def test_str_subclass_common(self): self._check_str_subclass('a/b.txt') self._check_str_subclass('/a/b.txt') + def test_init_called_common(self): + class P(_BasePurePathSubclass, self.cls): + pass + p = P('foo', 'bar') + self.assertTrue((p / 'foo').init_called) + self.assertTrue(('foo' / p).init_called) + self.assertTrue(p.joinpath('foo').init_called) + self.assertTrue(p.with_name('foo').init_called) + self.assertTrue(p.with_stem('foo').init_called) + self.assertTrue(p.with_suffix('.foo').init_called) + self.assertTrue(p.relative_to('foo').init_called) + self.assertTrue(p.parent.init_called) + for parent in p.parents: + self.assertTrue(parent.init_called) + def test_join_common(self): P = self.cls p = P('a/b') diff --git a/Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst b/Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst new file mode 100644 index 000000000000000..9fba11f074ee1cd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst @@ -0,0 +1,3 @@ +Fix issue where ``__new__()`` and ``__init__()`` methods of +:class:`pathlib.PurePath` and :class:`~pathlib.Path` subclasses were not +called in some circumstances.