From abf8f93b6166de04c7956ac4cc30a01b6ae8d67e Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 13 Jan 2024 08:03:21 +0000 Subject: [PATCH] pathlib ABCs: add `_raw_path` property (#113976) It's wrong for the `PurePathBase` methods to rely so much on `__str__()`. Instead, they should treat the raw path(s) as opaque objects and leave the details to `pathmod`. This commit adds a `PurePathBase._raw_path` property and uses it through many of the other ABC methods. These methods are all redefined in `PurePath` and `Path`, so this has no effect on the public classes. --- Lib/pathlib/__init__.py | 12 +++++++----- Lib/pathlib/_abc.py | 39 ++++++++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 9d3fcd894164e52..e70cfe91d322bc3 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -257,7 +257,9 @@ def _parse_path(cls, path): parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] return drv, root, parsed - def _load_parts(self): + @property + def _raw_path(self): + """The joined but unnormalized path.""" paths = self._raw_paths if len(paths) == 0: path = '' @@ -265,7 +267,7 @@ def _load_parts(self): path = paths[0] else: path = self.pathmod.join(*paths) - self._drv, self._root, self._tail_cached = self._parse_path(path) + return path @property def drive(self): @@ -273,7 +275,7 @@ def drive(self): try: return self._drv except AttributeError: - self._load_parts() + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) return self._drv @property @@ -282,7 +284,7 @@ def root(self): try: return self._root except AttributeError: - self._load_parts() + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) return self._root @property @@ -290,7 +292,7 @@ def _tail(self): try: return self._tail_cached except AttributeError: - self._load_parts() + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) return self._tail_cached @property diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index d2a31ed643979af..e53921edaa2cae8 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -163,10 +163,15 @@ def with_segments(self, *pathsegments): """ return type(self)(*pathsegments) + @property + def _raw_path(self): + """The joined but unnormalized path.""" + return self.pathmod.join(*self._raw_paths) + def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" - return self.pathmod.join(*self._raw_paths) + return self._raw_path def as_posix(self): """Return the string representation of the path with forward (/) @@ -176,23 +181,23 @@ def as_posix(self): @property def drive(self): """The drive prefix (letter or UNC path), if any.""" - return self.pathmod.splitdrive(str(self))[0] + return self.pathmod.splitdrive(self._raw_path)[0] @property def root(self): """The root of the path, if any.""" - return self.pathmod.splitroot(str(self))[1] + return self.pathmod.splitroot(self._raw_path)[1] @property def anchor(self): """The concatenation of the drive and root, or ''.""" - drive, root, _ = self.pathmod.splitroot(str(self)) + drive, root, _ = self.pathmod.splitroot(self._raw_path) return drive + root @property def name(self): """The final path component, if any.""" - return self.pathmod.basename(str(self)) + return self.pathmod.basename(self._raw_path) @property def suffix(self): @@ -236,7 +241,7 @@ def with_name(self, name): dirname = self.pathmod.dirname if dirname(name): raise ValueError(f"Invalid name {name!r}") - return self.with_segments(dirname(str(self)), name) + return self.with_segments(dirname(self._raw_path), name) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -266,8 +271,10 @@ def relative_to(self, other, *, walk_up=False): other = self.with_segments(other) anchor0, parts0 = self._stack anchor1, parts1 = other._stack + if isinstance(anchor0, str) != isinstance(anchor1, str): + raise TypeError(f"{self._raw_path!r} and {other._raw_path!r} have different types") if anchor0 != anchor1: - raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") + raise ValueError(f"{self._raw_path!r} and {other._raw_path!r} have different anchors") while parts0 and parts1 and parts0[-1] == parts1[-1]: parts0.pop() parts1.pop() @@ -275,9 +282,9 @@ def relative_to(self, other, *, walk_up=False): if not part or part == '.': pass elif not walk_up: - raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") + raise ValueError(f"{self._raw_path!r} is not in the subpath of {other._raw_path!r}") elif part == '..': - raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") + raise ValueError(f"'..' segment in {other._raw_path!r} cannot be walked") else: parts0.append('..') return self.with_segments('', *reversed(parts0)) @@ -289,6 +296,8 @@ def is_relative_to(self, other): other = self.with_segments(other) anchor0, parts0 = self._stack anchor1, parts1 = other._stack + if isinstance(anchor0, str) != isinstance(anchor1, str): + raise TypeError(f"{self._raw_path!r} and {other._raw_path!r} have different types") if anchor0 != anchor1: return False while parts0 and parts1 and parts0[-1] == parts1[-1]: @@ -336,7 +345,7 @@ def _stack(self): *parts* is a reversed list of parts following the anchor. """ split = self.pathmod.split - path = str(self) + path = self._raw_path parent, name = split(path) names = [] while path != parent: @@ -348,7 +357,7 @@ def _stack(self): @property def parent(self): """The logical parent of the path.""" - path = str(self) + path = self._raw_path parent = self.pathmod.dirname(path) if path != parent: parent = self.with_segments(parent) @@ -360,7 +369,7 @@ def parent(self): def parents(self): """A sequence of this path's logical parents.""" dirname = self.pathmod.dirname - path = str(self) + path = self._raw_path parent = dirname(path) parents = [] while path != parent: @@ -379,7 +388,7 @@ def is_absolute(self): return True return False else: - return self.pathmod.isabs(str(self)) + return self.pathmod.isabs(self._raw_path) def is_reserved(self): """Return True if the path contains one of the special names reserved @@ -894,7 +903,7 @@ def resolve(self, strict=False): # encountered during resolution. link_count += 1 if link_count >= self._max_symlinks: - raise OSError(ELOOP, "Too many symbolic links in path", str(self)) + raise OSError(ELOOP, "Too many symbolic links in path", self._raw_path) target_root, target_parts = path.readlink()._stack # If the symlink target is absolute (like '/etc/hosts'), set the current # path to its uppermost parent (like '/'). @@ -908,7 +917,7 @@ def resolve(self, strict=False): parts.extend(target_parts) continue elif parts and not S_ISDIR(st.st_mode): - raise NotADirectoryError(ENOTDIR, "Not a directory", str(self)) + raise NotADirectoryError(ENOTDIR, "Not a directory", self._raw_path) except OSError: if strict: raise