From c65d645564974c0df27c2a7f36f5902ea1d1df6d Mon Sep 17 00:00:00 2001 From: John Sirois Date: Wed, 27 Nov 2024 14:42:21 -0800 Subject: [PATCH 1/2] Fix `pip cache remove` pattern matching. Previously, glob patterns were not properly accounted for, which could lead to `pip cache remove wheel-0.45.1-py3-none-any.whl` failing, for example, when exactly that file was shown to exist in the cache via `pip cache list`. Additionally, non-glob patterns previously only literally matched wheel project names; now they match the normalized name. For example, `pip cache remove pyfftw` will now remove a cached `pyFFTW-0.15.0-cp313-cp313-linux_x86_64.whl` wheel since the `pyfftw` pattern matches the normalized project name. Fixes #13086 --- news/13086.bugfix.rst | 1 + src/pip/_internal/commands/cache.py | 55 ++++++++++++++---- tests/functional/test_cache.py | 88 ++++++++++++++++++++++------- 3 files changed, 115 insertions(+), 29 deletions(-) create mode 100644 news/13086.bugfix.rst diff --git a/news/13086.bugfix.rst b/news/13086.bugfix.rst new file mode 100644 index 00000000000..a7c371d2449 --- /dev/null +++ b/news/13086.bugfix.rst @@ -0,0 +1 @@ +Fix ``pip cache remove`` pattern matching. diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 328336152cc..80d2ae097a1 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -1,4 +1,6 @@ +import fnmatch import os +import re import textwrap from optparse import Values from typing import Any, List @@ -204,7 +206,7 @@ def _find_http_files(self, options: Values) -> List[str]: def _find_wheels(self, options: Values, pattern: str) -> List[str]: wheel_dir = self._cache_dir(options, "wheels") - # The wheel filename format, as specified in PEP 427, is: + # The wheel filename format, as originally specified in PEP 427, is: # {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl # # Additionally, non-alphanumeric values in the distribution are @@ -212,14 +214,47 @@ def _find_wheels(self, options: Values, pattern: str) -> List[str]: # before `-{version}`. # # Given that information: - # - If the pattern we're given contains a hyphen (-), the user is - # providing at least the version. Thus, we can just append `*.whl` - # to match the rest of it. - # - If the pattern we're given doesn't contain a hyphen (-), the - # user is only providing the name. Thus, we append `-*.whl` to - # match the hyphen before the version, followed by anything else. + # - If the pattern we're given is not a glob: + # + We attempt project name normalization such that pyyaml matches PyYAML, + # etc. as outlined here: + # https://packaging.python.org/specifications/name-normalization. + # The one difference is we normalize non-alphanumeric to `_` instead of `-` + # since that is how the wheel filename components are normalized + # (facepalm). + # + If the pattern we're given contains a hyphen (-), the user is providing + # at least the version. Thus, we can just append `*.whl` to match the rest + # of it if it doesn't already glob to the end of the wheel file name. + # + If the pattern we're given is not a glob and doesn't contain a + # hyphen (-), the user is only providing the name. Thus, we append `-*.whl` + # to match the hyphen before the version, followed by anything else. + # - If the pattern is a glob, we ensure `*.whl` is appended if needed but + # cowardly do not attempt glob parsing / project name normalization. The user + # is on their own at that point. # # PEP 427: https://www.python.org/dev/peps/pep-0427/ - pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl") - - return filesystem.find_files(wheel_dir, pattern) + # And: https://packaging.python.org/specifications/binary-distribution-format/ + uses_glob_patterns = any( + glob_pattern_char in pattern for glob_pattern_char in ("[", "*", "?") + ) + if not uses_glob_patterns: + project_name, sep, rest = pattern.partition("-") + # N.B.: This only normalizes non-alphanumeric characters in the name, which + # is 1/2 the job. Stripping case sensitivity is the other 1/2 and is + # handled below in the conversion from a glob to a regex executed with the + # IGNORECASE flag active. + partially_normalized_project_name = re.sub(r"[^\w.]+", "_", project_name) + if not sep: + sep = "-" + pattern = f"{partially_normalized_project_name}{sep}{rest}" + + if not pattern.endswith((".whl", "*")): + pattern = f"{pattern}*.whl" + + regex = fnmatch.translate(pattern) + + return [ + os.path.join(root, filename) + for root, _, files in os.walk(wheel_dir) + for filename in files + if re.match(regex, filename, flags=re.IGNORECASE) + ] diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index 5b7e585260d..c2f74959d1f 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -68,23 +68,26 @@ def populate_http_cache(http_cache_dir: str) -> List[Tuple[str, str]]: return files -@pytest.fixture -def populate_wheel_cache(wheel_cache_dir: str) -> List[Tuple[str, str]]: +def _populate_wheel_cache( + wheel_cache_dir: str, *name_and_versions: str +) -> List[Tuple[str, str]]: destination = os.path.join(wheel_cache_dir, "arbitrary", "pathname") os.makedirs(destination) - files = [ - ("yyy-1.2.3", os.path.join(destination, "yyy-1.2.3-py3-none-any.whl")), - ("zzz-4.5.6", os.path.join(destination, "zzz-4.5.6-py3-none-any.whl")), - ("zzz-4.5.7", os.path.join(destination, "zzz-4.5.7-py3-none-any.whl")), - ("zzz-7.8.9", os.path.join(destination, "zzz-7.8.9-py3-none-any.whl")), - ] - - for _name, filename in files: + wheel_info = [] + for name_and_version in name_and_versions: + filename = os.path.join(destination, f"{name_and_version}-py3-none-any.whl") with open(filename, "w"): pass + wheel_info.append((name_and_version, filename)) + return wheel_info - return files + +@pytest.fixture +def populate_wheel_cache(wheel_cache_dir: str) -> List[Tuple[str, str]]: + return _populate_wheel_cache( + wheel_cache_dir, "yyy-1.2.3", "zzz-4.5.6", "zzz-4.5.7", "zzz-7.8.9" + ) @pytest.fixture @@ -332,13 +335,26 @@ def test_cache_remove_too_many_args(script: PipTestEnvironment) -> None: script.pip("cache", "remove", "aaa", "bbb", expect_error=True) +@pytest.mark.parametrize( + "pattern", + [ + "zzz", + "ZZZ", + "zZz-*-py3-none-any.whl", + "zZz-*.whl", + "zzz-*", + "zzz*", + "zz[a-zA-Z]?", + "[!y]", + ], +) @pytest.mark.usefixtures("populate_wheel_cache") def test_cache_remove_name_match( - script: PipTestEnvironment, remove_matches_wheel: RemoveMatches + script: PipTestEnvironment, pattern: str, remove_matches_wheel: RemoveMatches ) -> None: - """Running `pip cache remove zzz` should remove zzz-4.5.6 and zzz-7.8.9, - but nothing else.""" - result = script.pip("cache", "remove", "zzz", "--verbose") + """Running `pip cache remove ` should remove zzz-4.5.6, + zzz-4.5.7 and zzz-7.8.9, but nothing else.""" + result = script.pip("cache", "remove", pattern, "--verbose") assert not remove_matches_wheel("yyy-1.2.3", result) assert remove_matches_wheel("zzz-4.5.6", result) @@ -346,13 +362,23 @@ def test_cache_remove_name_match( assert remove_matches_wheel("zzz-7.8.9", result) +@pytest.mark.parametrize( + "pattern", + [ + "zzz-4.5.6", + "ZZZ-4.5.6", + "zZz-4.5.6-py3-none-any.whl", + "zZz-4.5.6-*.whl", + "zZz-4.5.[0-6]", + ], +) @pytest.mark.usefixtures("populate_wheel_cache") def test_cache_remove_name_and_version_match( - script: PipTestEnvironment, remove_matches_wheel: RemoveMatches + script: PipTestEnvironment, pattern: str, remove_matches_wheel: RemoveMatches ) -> None: - """Running `pip cache remove zzz-4.5.6` should remove zzz-4.5.6, but - nothing else.""" - result = script.pip("cache", "remove", "zzz-4.5.6", "--verbose") + """Running `pip cache remove ` should remove zzz-4.5.6, + but nothing else.""" + result = script.pip("cache", "remove", pattern, "--verbose") assert not remove_matches_wheel("yyy-1.2.3", result) assert remove_matches_wheel("zzz-4.5.6", result) @@ -360,6 +386,30 @@ def test_cache_remove_name_and_version_match( assert not remove_matches_wheel("zzz-7.8.9", result) +@pytest.mark.parametrize( + "pattern", + [ + "pyfftw", + "pyfftw-0.15", + "pyfftw-0.15.0", + "pyFFTW", + "pyFFTW-0.15", + "pyFFTW-0.15.0", + ], +) +def test_issue_13086_op_case( + script: PipTestEnvironment, + wheel_cache_dir: str, + pattern: str, + remove_matches_wheel: RemoveMatches, +) -> None: + _populate_wheel_cache(wheel_cache_dir, "pyFFTW-0.15.0", "foo-1.0") + result = script.pip("cache", "remove", pattern, "--verbose") + + assert remove_matches_wheel("pyFFTW-0.15.0", result) + assert not remove_matches_wheel("foo-1.0", result) + + @pytest.mark.usefixtures("populate_http_cache", "populate_wheel_cache") def test_cache_purge( script: PipTestEnvironment, From f2c5bcff22dc974c266af0862028a7c79a12265b Mon Sep 17 00:00:00 2001 From: John Sirois Date: Wed, 27 Nov 2024 15:06:14 -0800 Subject: [PATCH 2/2] Add a test for the other #13086 glob case. --- tests/functional/test_cache.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index c2f74959d1f..074c56bad6f 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -410,6 +410,22 @@ def test_issue_13086_op_case( assert not remove_matches_wheel("foo-1.0", result) +def test_issue_13086_glob_case( + script: PipTestEnvironment, + wheel_cache_dir: str, + remove_matches_wheel: RemoveMatches, +) -> None: + _populate_wheel_cache( + wheel_cache_dir, "foo0-1.0", "foo1-1.0", "foo2-1.0", "foo1bob-1.0" + ) + result = script.pip("cache", "remove", "foo[0-2]", "--verbose") + + assert remove_matches_wheel("foo0-1.0", result) + assert remove_matches_wheel("foo1-1.0", result) + assert remove_matches_wheel("foo2-1.0", result) + assert not remove_matches_wheel("foo1bob", result) + + @pytest.mark.usefixtures("populate_http_cache", "populate_wheel_cache") def test_cache_purge( script: PipTestEnvironment,