From 12e2551a7dd80dcb5d5e998d4a450caf0ddbb12d Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 24 Nov 2023 09:16:31 +0000 Subject: [PATCH 1/2] GH-112361: Speed up pathlib by removing some temporary objects. Construct only one new list object (using `list.copy()`) when creating a new path object with a modified tail. This slightly speeds up `with_name()`, `with_suffix()`, `_make_child_relpath()` (used in walking and globbing), and `glob()`. --- Lib/pathlib.py | 37 ++++++++----------- Lib/test/test_pathlib.py | 2 - ...-11-24-09-27-01.gh-issue-112361.kYtnHW.rst | 2 + 3 files changed, 17 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-24-09-27-01.gh-issue-112361.kYtnHW.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 9bce5320ef68e9..87b61edceb632d 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -400,13 +400,14 @@ def stem(self): def with_name(self, name): """Return a new path with the file name changed.""" - if not self.name: - raise ValueError("%r has an empty name" % (self,)) m = self.pathmod if not name or m.sep in name or (m.altsep and m.altsep in name) or name == '.': - raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self.drive, self.root, - self._tail[:-1] + [name]) + raise ValueError(f"Invalid name {name!r}") + tail = self._tail.copy() + if not tail: + raise ValueError(f"{self!r} has an empty name") + tail[-1] = name + return self._from_parsed_parts(self.drive, self.root, tail) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -417,21 +418,12 @@ def with_suffix(self, suffix): has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. """ - m = self.pathmod - if m.sep in suffix or m.altsep and m.altsep in suffix: - raise ValueError("Invalid suffix %r" % (suffix,)) - if suffix and not suffix.startswith('.') or suffix == '.': - raise ValueError("Invalid suffix %r" % (suffix)) - name = self.name - if not name: - raise ValueError("%r has an empty name" % (self,)) - old_suffix = self.suffix - if not old_suffix: - name = name + suffix + if not suffix: + return self.with_name(self.stem) + elif suffix.startswith('.') and len(suffix) > 1: + return self.with_name(self.stem + suffix) else: - name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self.drive, self.root, - self._tail[:-1] + [name]) + raise ValueError(f"Invalid suffix {suffix!r}") def relative_to(self, other, /, *_deprecated, walk_up=False): """Return the relative path to another path identified by the passed @@ -993,18 +985,19 @@ def _scandir(self): def _make_child_relpath(self, name): path_str = str(self) - tail = self._tail + tail = self._tail.copy() if tail: path_str = f'{path_str}{self.pathmod.sep}{name}' elif path_str != '.': path_str = f'{path_str}{name}' else: path_str = name + tail.append(name) path = self.with_segments(path_str) path._str = path_str path._drv = self.drive path._root = self.root - path._tail_cached = tail + [name] + path._tail_cached = tail return path def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): @@ -1029,7 +1022,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): elif not path_pattern._tail: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - pattern_parts = list(path_pattern._tail) + pattern_parts = path_pattern._tail.copy() if pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. pattern_parts.append('') diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index e1121a9d76c040..427e082f3e16cb 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -575,8 +575,6 @@ def test_with_suffix_common(self): self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') self.assertRaises(ValueError, P('a/b').with_suffix, './.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') - self.assertRaises(ValueError, P('a/b').with_suffix, - (self.pathmod.sep, 'd')) def test_relative_to_common(self): P = self.cls diff --git a/Misc/NEWS.d/next/Library/2023-11-24-09-27-01.gh-issue-112361.kYtnHW.rst b/Misc/NEWS.d/next/Library/2023-11-24-09-27-01.gh-issue-112361.kYtnHW.rst new file mode 100644 index 00000000000000..5a83f93f9fbec8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-24-09-27-01.gh-issue-112361.kYtnHW.rst @@ -0,0 +1,2 @@ +Speed up a small handful of :mod:`pathlib` methods by removing some +temporary objects. From 92a58f4522385ebb3ddc86dbc74c422004b21bd6 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 25 Nov 2023 09:08:45 +0000 Subject: [PATCH 2/2] Undo change to `_make_child_relpath()` --- Lib/pathlib.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 87b61edceb632d..32ccf818b157c5 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -985,19 +985,18 @@ def _scandir(self): def _make_child_relpath(self, name): path_str = str(self) - tail = self._tail.copy() + tail = self._tail if tail: path_str = f'{path_str}{self.pathmod.sep}{name}' elif path_str != '.': path_str = f'{path_str}{name}' else: path_str = name - tail.append(name) path = self.with_segments(path_str) path._str = path_str path._drv = self.drive path._root = self.root - path._tail_cached = tail + path._tail_cached = tail + [name] return path def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):