Skip to content

Commit

Permalink
pythonGH-79634: Accept path-like objects as pathlib glob patterns.
Browse files Browse the repository at this point in the history
Allow `os.PathLike` objects to be passed as patterns to
`pathlib.Path.glob()` and `rglob()`.
  • Loading branch information
barneygale committed Jan 13, 2024
1 parent f20b151 commit 4e80705
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 30 deletions.
6 changes: 6 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,9 @@ call fails (for example because the path doesn't exist).
future Python release, patterns with this ending will match both files
and directories. Add a trailing slash to match only directories.

.. versionchanged:: 3.13
The *pattern* parameter accepts a :term:`path-like object`.

.. method:: Path.group(*, follow_symlinks=True)

Return the name of the group owning the file. :exc:`KeyError` is raised
Expand Down Expand Up @@ -1482,6 +1485,9 @@ call fails (for example because the path doesn't exist).
.. versionchanged:: 3.13
The *follow_symlinks* parameter was added.

.. versionchanged:: 3.13
The *pattern* parameter accepts a :term:`path-like object`.

.. method:: Path.rmdir()

Remove this directory. The directory must be empty.
Expand Down
39 changes: 22 additions & 17 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,23 @@ def as_uri(self):
from urllib.parse import quote_from_bytes
return prefix + quote_from_bytes(os.fsencode(path))

@property
def _pattern_parts(self):
"""List of path components, to be used with patterns in glob()."""
parts = self._tail.copy()
if self._raw_path.endswith('**'):
# GH-70303: '**' only matches directories. Add trailing slash.
warnings.warn(
"Pattern ending '**' will match files and directories in a "
"future Python release. Add a trailing slash to match only "
"directories and remove this warning.",
FutureWarning, 4)
parts.append('')
elif self._raw_path[-1] in (self.pathmod.sep, self.pathmod.altsep):
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
parts.append('')
return parts


# Subclassing os.PathLike makes isinstance() checks slower,
# which in turn makes Path construction slower. Register instead!
Expand Down Expand Up @@ -551,14 +568,8 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
kind, including directories) matching the given relative pattern.
"""
sys.audit("pathlib.Path.glob", self, pattern)
if pattern.endswith('**'):
# GH-70303: '**' only matches directories. Add trailing slash.
warnings.warn(
"Pattern ending '**' will match files and directories in a "
"future Python release. Add a trailing slash to match only "
"directories and remove this warning.",
FutureWarning, 2)
pattern = f'{pattern}/'
if not isinstance(pattern, PurePath):
pattern = self.with_segments(pattern)
return _abc.PathBase.glob(
self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)

Expand All @@ -568,15 +579,9 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
if pattern.endswith('**'):
# GH-70303: '**' only matches directories. Add trailing slash.
warnings.warn(
"Pattern ending '**' will match files and directories in a "
"future Python release. Add a trailing slash to match only "
"directories and remove this warning.",
FutureWarning, 2)
pattern = f'{pattern}/'
pattern = f'**/{pattern}'
if not isinstance(pattern, PurePath):
pattern = self.with_segments(pattern)
pattern = '**' / pattern
return _abc.PathBase.glob(
self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)

Expand Down
30 changes: 17 additions & 13 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,11 @@ def is_reserved(self):
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES

@property
def _pattern_parts(self):
"""List of path components, to be used with patterns in glob()."""
return list(self.parts)

def match(self, path_pattern, *, case_sensitive=None):
"""
Return True if this path matches the given pattern.
Expand All @@ -415,11 +420,10 @@ def match(self, path_pattern, *, case_sensitive=None):
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.pathmod)
sep = path_pattern.pathmod.sep
pattern_str = str(path_pattern)
if path_pattern.anchor:
pass
pattern_str = str(path_pattern)
elif path_pattern.parts:
pattern_str = f'**{sep}{pattern_str}'
pattern_str = str('**' / path_pattern)
else:
raise ValueError("empty pattern")
match = _compile_pattern(pattern_str, sep, case_sensitive)
Expand Down Expand Up @@ -706,16 +710,14 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
path_pattern = self.with_segments(pattern)
if path_pattern.anchor:
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
if pattern.anchor:
raise NotImplementedError("Non-relative patterns are unsupported")
elif not path_pattern.parts:
elif not pattern.parts:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))

pattern_parts = list(path_pattern.parts)
if not self.pathmod.basename(pattern):
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')
pattern_parts = pattern._pattern_parts

if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_children().
Expand Down Expand Up @@ -752,7 +754,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):

# Filter out paths that don't match pattern.
prefix_len = len(str(self._make_child_relpath('_'))) - 1
match = _compile_pattern(str(path_pattern), sep, case_sensitive)
match = _compile_pattern(str(pattern), sep, case_sensitive)
paths = (path for path in paths if match(str(path), prefix_len))
return paths

Expand All @@ -775,8 +777,10 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
directories) matching the given relative pattern, anywhere in
this subtree.
"""
return self.glob(
f'**/{pattern}', case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
pattern = '**' / pattern
return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
Expand Down
16 changes: 16 additions & 0 deletions Lib/test/test_pathlib/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,6 +1859,22 @@ def test_glob_recursive_no_trailing_slash(self):
with self.assertWarns(FutureWarning):
p.rglob('*/**')

def test_glob_pathlike(self):
P = self.cls
p = P(self.base)
pattern = "dir*/file*"
expect = {p / "dirB/fileB", p / "dirC/fileC"}
self.assertEqual(expect, set(p.glob(P(pattern))))
self.assertEqual(expect, set(p.glob(FakePath(pattern))))

def test_rglob_pathlike(self):
P = self.cls
p = P(self.base, "dirC")
pattern = "**/file*"
expect = {p / "fileC", p / "dirD/fileD"}
self.assertEqual(expect, set(p.rglob(P(pattern))))
self.assertEqual(expect, set(p.rglob(FakePath(pattern))))


@only_posix
class PosixPathTest(PathTest, PurePosixPathTest):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Accept :term:`path-like objects <path-like object>` as patterns in
:meth:`pathlib.Path.glob` and :meth:`~pathlib.Path.rglob`.

0 comments on commit 4e80705

Please sign in to comment.