From 36b0fa84b942fac5b50d5c95832eda39660fb86f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 9 Aug 2023 18:39:53 -0400 Subject: [PATCH] make LinkMetadataCache - catch an exception when parsing metadata which only occurs in CI - handle --no-cache-dir - call os.makedirs() before writing to cache too - catch InvalidSchema when attempting git urls with BatchDownloader - fix other test failures - reuse should_cache(req) logic - gzip compress link metadata for a slight reduction in disk space - only cache built sdists - don't check should_cache() when fetching - cache lazy wheel dists - add news - turn debug logs in fetching from cache into exceptions - use scandir over listdir when searching normal wheel cache - handle metadata email parsing errors - correctly handle mutable cached requirement - use bz2 over gzip for an extremely slight improvement in disk usage --- news/12256.feature.rst | 1 + src/pip/_internal/cache.py | 118 ++++++++++++-- src/pip/_internal/cli/cmdoptions.py | 1 + src/pip/_internal/cli/req_command.py | 13 +- src/pip/_internal/exceptions.py | 19 +++ src/pip/_internal/metadata/__init__.py | 10 +- src/pip/_internal/metadata/base.py | 15 ++ src/pip/_internal/network/download.py | 2 +- src/pip/_internal/operations/prepare.py | 188 +++++++++++++++++++--- src/pip/_internal/req/constructors.py | 2 +- src/pip/_internal/req/req_install.py | 25 ++- src/pip/_internal/req/req_set.py | 2 +- src/pip/_internal/wheel_builder.py | 62 +------ tests/functional/test_check.py | 20 +-- tests/functional/test_install.py | 8 +- tests/functional/test_install_check.py | 2 +- tests/functional/test_install_metadata.py | 5 +- tests/functional/test_install_reqs.py | 2 +- tests/functional/test_wheel.py | 2 +- tests/unit/test_cache.py | 21 ++- tests/unit/test_wheel_builder.py | 26 +-- 21 files changed, 391 insertions(+), 153 deletions(-) create mode 100644 news/12256.feature.rst diff --git a/news/12256.feature.rst b/news/12256.feature.rst new file mode 100644 index 00000000000..0b5bb356188 --- /dev/null +++ b/news/12256.feature.rst @@ -0,0 +1 @@ +Cache computed metadata from sdists and lazy wheels in ``~/.cache/pip/link-metadata`` when ``--use-feature=metadata-cache`` is enabled. diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index f45ac23e95a..bb6b866476e 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -1,12 +1,14 @@ """Cache Management """ +import abc import hashlib import json import logging import os +import re from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Dict, Iterator, List, Optional, Tuple from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._vendor.packaging.utils import canonicalize_name @@ -15,21 +17,71 @@ from pip._internal.models.direct_url import DirectUrl from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel +from pip._internal.req.req_install import InstallRequirement from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds from pip._internal.utils.urls import path_to_url +from pip._internal.vcs import vcs logger = logging.getLogger(__name__) +_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE) + ORIGIN_JSON_NAME = "origin.json" +def _contains_egg_info(s: str) -> bool: + """Determine whether the string looks like an egg_info. + + :param s: The string to parse. E.g. foo-2.1 + """ + return bool(_egg_info_re.search(s)) + + +def should_cache( + req: InstallRequirement, +) -> bool: + """ + Return whether a built InstallRequirement can be stored in the persistent + wheel cache, assuming the wheel cache is available, and _should_build() + has determined a wheel needs to be built. + """ + if not req.link: + return False + + if req.link.is_wheel: + return False + + if req.editable or not req.source_dir: + # never cache editable requirements + return False + + if req.link and req.link.is_vcs: + # VCS checkout. Do not cache + # unless it points to an immutable commit hash. + assert not req.editable + assert req.source_dir + vcs_backend = vcs.get_backend_for_scheme(req.link.scheme) + assert vcs_backend + if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir): + return True + return False + + assert req.link + base, ext = req.link.splitext() + if _contains_egg_info(base): + return True + + # Otherwise, do not cache. + return False + + def _hash_dict(d: Dict[str, str]) -> str: """Return a stable sha224 of a dictionary.""" s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True) return hashlib.sha224(s.encode("ascii")).hexdigest() -class Cache: +class Cache(abc.ABC): """An abstract class - provides cache directories for data from links :param cache_dir: The root of the cache. @@ -73,20 +125,28 @@ def _get_cache_path_parts(self, link: Link) -> List[str]: return parts - def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]: - can_not_cache = not self.cache_dir or not canonical_package_name or not link - if can_not_cache: - return [] + @abc.abstractmethod + def get_path_for_link(self, link: Link) -> str: + """Return a directory to store cached items in for link.""" + ... + + def cache_path(self, link: Link) -> Path: + return Path(self.get_path_for_link(link)) - path = self.get_path_for_link(link) - if os.path.isdir(path): - return [(candidate, path) for candidate in os.listdir(path)] - return [] + +class LinkMetadataCache(Cache): + """Persistently store the metadata of dists found at each link.""" def get_path_for_link(self, link: Link) -> str: - """Return a directory to store cached items in for link.""" - raise NotImplementedError() + parts = self._get_cache_path_parts(link) + assert self.cache_dir + return os.path.join(self.cache_dir, "link-metadata", *parts) + +class WheelCacheBase(Cache): + """Specializations to the cache concept for wheels.""" + + @abc.abstractmethod def get( self, link: Link, @@ -96,10 +156,27 @@ def get( """Returns a link to a cached item if it exists, otherwise returns the passed link. """ - raise NotImplementedError() + ... + + def _can_cache(self, link: Link, canonical_package_name: str) -> bool: + return bool(self.cache_dir and canonical_package_name and link) + def _get_candidates( + self, link: Link, canonical_package_name: str + ) -> Iterator[Tuple[str, str]]: + if not self._can_cache(link, canonical_package_name): + return + + path = self.get_path_for_link(link) + if not os.path.isdir(path): + return -class SimpleWheelCache(Cache): + for candidate in os.scandir(path): + if candidate.is_file(): + yield (candidate.name, path) + + +class SimpleWheelCache(WheelCacheBase): """A cache of wheels for future installs.""" def __init__(self, cache_dir: str) -> None: @@ -131,7 +208,7 @@ def get( package_name: Optional[str], supported_tags: List[Tag], ) -> Link: - candidates = [] + candidates: List[Tuple[int, str, str]] = [] if not package_name: return link @@ -205,7 +282,7 @@ def __init__( ) -class WheelCache(Cache): +class WheelCache(WheelCacheBase): """Wraps EphemWheelCache and SimpleWheelCache into a single Cache This Cache allows for gracefully degradation, using the ephem wheel cache @@ -223,6 +300,15 @@ def get_path_for_link(self, link: Link) -> str: def get_ephem_path_for_link(self, link: Link) -> str: return self._ephem_cache.get_path_for_link(link) + def resolve_cache_dir(self, req: InstallRequirement) -> str: + """Return the persistent or temporary cache directory where the built or + downloaded wheel should be stored.""" + cache_available = bool(self.cache_dir) + assert req.link, req + if cache_available and should_cache(req): + return self.get_path_for_link(req.link) + return self.get_ephem_path_for_link(req.link) + def get( self, link: Link, diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index d05e502f908..8a5caf7349c 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -1008,6 +1008,7 @@ def check_list_path_option(options: Values) -> None: default=[], choices=[ "fast-deps", + "metadata-cache", "truststore", ] + ALWAYS_ENABLED_FEATURES, diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index 6f2f79c6b3f..52de7e7a298 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -12,7 +12,7 @@ from optparse import Values from typing import TYPE_CHECKING, Any, List, Optional, Tuple -from pip._internal.cache import WheelCache +from pip._internal.cache import LinkMetadataCache, WheelCache from pip._internal.cli import cmdoptions from pip._internal.cli.base_command import Command from pip._internal.cli.command_context import CommandContextMixIn @@ -305,6 +305,16 @@ def make_requirement_preparer( "fast-deps has no effect when used with the legacy resolver." ) + if options.cache_dir and "metadata-cache" in options.features_enabled: + logger.warning( + "pip is using a local cache for metadata information. " + "This experimental feature is enabled through " + "--use-feature=metadata-cache and it is not ready for " + "production." + ) + metadata_cache = LinkMetadataCache(options.cache_dir) + else: + metadata_cache = None return RequirementPreparer( build_dir=temp_build_dir_path, src_dir=options.src_dir, @@ -320,6 +330,7 @@ def make_requirement_preparer( lazy_wheel=lazy_wheel, verbosity=verbosity, legacy_resolver=legacy_resolver, + metadata_cache=metadata_cache, ) @classmethod diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 5007a622d82..f4a7693a151 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -250,6 +250,25 @@ def __str__(self) -> str: return f"None {self.metadata_name} metadata found for distribution: {self.dist}" +class CacheMetadataError(PipError): + """Raised when de/serializing a requirement into the metadata cache.""" + + def __init__( + self, + req: "InstallRequirement", + reason: str, + ) -> None: + """ + :param req: The requirement we attempted to cache. + :param reason: Context about the precise error that occurred. + """ + self.req = req + self.reason = reason + + def __str__(self) -> str: + return f"{self.reason} for {self.req} from {self.req.link}" + + class UserInstallationInvalid(InstallationError): """A --user install is requested on an environment without user site.""" diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py index aa232b6cabd..2ed1fe8d64a 100644 --- a/src/pip/_internal/metadata/__init__.py +++ b/src/pip/_internal/metadata/__init__.py @@ -6,7 +6,14 @@ from pip._internal.utils.misc import strtobool -from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel +from .base import ( + BaseDistribution, + BaseEnvironment, + FilesystemWheel, + MemoryWheel, + Wheel, + serialize_metadata, +) if TYPE_CHECKING: from typing import Literal, Protocol @@ -23,6 +30,7 @@ "get_environment", "get_wheel_distribution", "select_backend", + "serialize_metadata", ] diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index dea3d025d61..6a024542ee6 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -1,6 +1,9 @@ import csv +import email.generator import email.message +import email.policy import functools +import io import json import logging import pathlib @@ -97,6 +100,18 @@ def _convert_installed_files_path( return str(pathlib.Path(*info, *entry)) +def serialize_metadata(msg: email.message.Message) -> str: + """Write a dist's metadata to a string. + + Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives + as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to + avoid those parsing errors.""" + out = io.StringIO() + g = email.generator.Generator(out, policy=email.policy.EmailPolicy()) + g.flatten(msg) + return out.getvalue() + + class RequiresEntry(NamedTuple): requirement: str extra: str diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index d1d43541e6b..f629234e0f8 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -113,7 +113,7 @@ def _get_http_response_filename(resp: Response, link: Link) -> str: def _http_get_download(session: PipSession, link: Link) -> Response: - target_url = link.url.split("#", 1)[0] + target_url = link.url_without_fragment resp = session.get(target_url, headers=HEADERS, stream=True) raise_for_status(resp) return resp diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 8f7485115aa..9c33d2547de 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -4,16 +4,22 @@ # The following comment should be removed at some point in the future. # mypy: strict-optional=False +import bz2 +import json import mimetypes import os import shutil +from dataclasses import dataclass from pathlib import Path -from typing import Dict, Iterable, List, Optional +from typing import Dict, Iterable, List, Optional, Tuple from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.requests.exceptions import InvalidSchema +from pip._internal.cache import LinkMetadataCache, should_cache from pip._internal.distributions import make_distribution_for_install_requirement from pip._internal.exceptions import ( + CacheMetadataError, DirectoryUrlHashUnsupported, HashMismatch, HashUnpinned, @@ -26,6 +32,7 @@ from pip._internal.metadata import ( BaseDistribution, get_metadata_distribution, + serialize_metadata, ) from pip._internal.models.direct_url import ArchiveInfo from pip._internal.models.link import Link @@ -64,16 +71,17 @@ def _get_prepared_distribution( finder: PackageFinder, build_isolation: bool, check_build_deps: bool, -) -> BaseDistribution: +) -> Tuple[bool, BaseDistribution]: """Prepare a distribution for installation.""" abstract_dist = make_distribution_for_install_requirement(req) tracker_id = abstract_dist.build_tracker_id - if tracker_id is not None: + builds_metadata = tracker_id is not None + if builds_metadata: with build_tracker.track(req, tracker_id): abstract_dist.prepare_distribution_metadata( finder, build_isolation, check_build_deps ) - return abstract_dist.get_metadata_distribution() + return (builds_metadata, abstract_dist.get_metadata_distribution()) def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None: @@ -214,10 +222,49 @@ def _check_download_dir( return download_path +@dataclass(frozen=True) +class CacheableDist: + metadata: str + filename: Path + canonical_name: str + + @classmethod + def from_dist(cls, link: Link, dist: BaseDistribution) -> "CacheableDist": + """Extract the serializable data necessary to generate a metadata-only dist.""" + return cls( + metadata=serialize_metadata(dist.metadata), + filename=Path(link.filename), + canonical_name=dist.canonical_name, + ) + + def to_dist(self) -> BaseDistribution: + """Return a metadata-only dist from the deserialized cache entry.""" + return get_metadata_distribution( + metadata_contents=self.metadata.encode("utf-8"), + filename=str(self.filename), + canonical_name=self.canonical_name, + ) + + def to_json(self) -> Dict[str, str]: + return { + "metadata": self.metadata, + "filename": str(self.filename), + "canonical_name": self.canonical_name, + } + + @classmethod + def from_json(cls, args: Dict[str, str]) -> "CacheableDist": + return cls( + metadata=args["metadata"], + filename=Path(args["filename"]), + canonical_name=args["canonical_name"], + ) + + class RequirementPreparer: """Prepares a Requirement""" - def __init__( + def __init__( # noqa: PLR0913 self, build_dir: str, download_dir: Optional[str], @@ -233,6 +280,7 @@ def __init__( lazy_wheel: bool, verbosity: int, legacy_resolver: bool, + metadata_cache: Optional[LinkMetadataCache] = None, ) -> None: super().__init__() @@ -275,6 +323,8 @@ def __init__( # Previous "header" printed for a link-based InstallRequirement self._previous_requirement_header = ("", "") + self._metadata_cache = metadata_cache + def _log_preparing_link(self, req: InstallRequirement) -> None: """Provide context for the requirement being prepared.""" if req.link.is_file and not req.is_wheel_from_cache: @@ -367,14 +417,81 @@ def _fetch_metadata_only( ) return None if self.require_hashes: + # Hash checking also means hashes are provided for all reqs, so no resolve + # is necessary and metadata-only fetching provides no speedup. logger.debug( "Metadata-only fetching is not used as hash checking is required", ) return None - # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. - return self._fetch_metadata_using_link_data_attr( - req - ) or self._fetch_metadata_using_lazy_wheel(req.link) + + return ( + self._fetch_cached_metadata(req) + or self._fetch_metadata_using_link_data_attr(req) + or self._fetch_metadata_using_lazy_wheel(req) + ) + + def _locate_metadata_cache_entry(self, link: Link) -> Optional[Path]: + """If the metadata cache is active, generate a filesystem path from the hash of + the given Link.""" + if self._metadata_cache is None: + return None + + return self._metadata_cache.cache_path(link) + + def _fetch_cached_metadata( + self, req: InstallRequirement + ) -> Optional[BaseDistribution]: + cached_path = self._locate_metadata_cache_entry(req.link) + if cached_path is None: + return None + + # Quietly continue if the cache entry does not exist. + if not os.path.isfile(cached_path): + logger.debug( + "no cached metadata for link %s at %s", + req.link, + cached_path, + ) + return None + + try: + with bz2.open(cached_path, mode="rt", encoding="utf-8") as f: + logger.debug( + "found cached metadata for link %s at %s", req.link, cached_path + ) + args = json.load(f) + cached_dist = CacheableDist.from_json(args) + return cached_dist.to_dist() + except Exception: + raise CacheMetadataError(req, "error reading cached metadata") + + def _cache_metadata( + self, + req: InstallRequirement, + metadata_dist: BaseDistribution, + ) -> None: + cached_path = self._locate_metadata_cache_entry(req.link) + if cached_path is None: + return + + # The cache file exists already, so we have nothing to do. + if os.path.isfile(cached_path): + logger.debug( + "metadata for link %s is already cached at %s", req.link, cached_path + ) + return + + # The metadata cache is split across several subdirectories, so ensure the + # containing directory for the cache file exists before writing. + os.makedirs(str(cached_path.parent), exist_ok=True) + try: + cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist) + args = cacheable_dist.to_json() + logger.debug("caching metadata for link %s at %s", req.link, cached_path) + with bz2.open(cached_path, mode="wt", encoding="utf-8") as f: + json.dump(args, f) + except Exception: + raise CacheMetadataError(req, "failed to serialize metadata") def _fetch_metadata_using_link_data_attr( self, @@ -392,6 +509,9 @@ def _fetch_metadata_using_link_data_attr( metadata_link, ) # (2) Download the contents of the METADATA file, separate from the dist itself. + # NB: this request will hit the CacheControl HTTP cache, which will be very + # quick since the METADATA file is very small. Therefore, we can rely on + # HTTP caching instead of LinkMetadataCache. metadata_file = get_http_url( metadata_link, self._download, @@ -419,33 +539,45 @@ def _fetch_metadata_using_link_data_attr( def _fetch_metadata_using_lazy_wheel( self, - link: Link, + req: InstallRequirement, ) -> Optional[BaseDistribution]: """Fetch metadata using lazy wheel, if possible.""" # --use-feature=fast-deps must be provided. if not self.use_lazy_wheel: return None - if link.is_file or not link.is_wheel: + if req.link.is_file or not req.link.is_wheel: logger.debug( "Lazy wheel is not used as %r does not point to a remote wheel", - link, + req.link, ) return None - wheel = Wheel(link.filename) + wheel = Wheel(req.link.filename) name = canonicalize_name(wheel.name) logger.info( "Obtaining dependency information from %s %s", name, wheel.version, ) - url = link.url.split("#", 1)[0] + try: - return dist_from_wheel_url(name, url, self._session) + lazy_wheel_dist = dist_from_wheel_url( + name, req.link.url_without_fragment, self._session + ) except HTTPRangeRequestUnsupported: - logger.debug("%s does not support range requests", url) + logger.debug("%s does not support range requests", req.link) return None + # If we've used the lazy wheel approach, then PEP 658 metadata is not available. + # If the wheel is very large (>1GB), then retrieving it from the CacheControl + # HTTP cache may take multiple seconds, even on a fast computer, and the + # preparer will unnecessarily copy the cached response to disk before deleting + # it at the end of the run. Caching the dist metadata in LinkMetadataCache means + # later pip executions can retrieve metadata within milliseconds and avoid + # thrashing the disk. + self._cache_metadata(req, lazy_wheel_dist) + return lazy_wheel_dist + def _complete_partial_requirements( self, partially_downloaded_reqs: Iterable[InstallRequirement], @@ -462,7 +594,21 @@ def _complete_partial_requirements( links_to_fully_download: Dict[Link, InstallRequirement] = {} for req in partially_downloaded_reqs: assert req.link - links_to_fully_download[req.link] = req + + # (1) File URLs don't need to be downloaded, so skip them. + if req.link.scheme == "file": + continue + # (2) If this is e.g. a git url, we don't know how to handle that in the + # BatchDownloader, so leave it for self._prepare_linked_requirement() at + # the end of this method, which knows how to handle any URL. + can_simply_download = True + try: + # This will raise InvalidSchema if our Session can't download it. + self._session.get_adapter(req.link.url) + except InvalidSchema: + can_simply_download = False + if can_simply_download: + links_to_fully_download[req.link] = req batch_download = self._batch_download( links_to_fully_download.keys(), @@ -677,13 +823,15 @@ def _prepare_linked_requirement( self._populate_download_info(req) - dist = _get_prepared_distribution( + (builds_metadata, dist) = _get_prepared_distribution( req, self.build_tracker, self.finder, self.build_isolation, self.check_build_deps, ) + if builds_metadata and should_cache(req): + self._cache_metadata(req, dist) return dist def _populate_download_info(self, req: InstallRequirement) -> None: @@ -757,7 +905,7 @@ def prepare_editable_requirement( assert req.source_dir req.download_info = direct_url_for_editable(req.unpacked_source_directory) - dist = _get_prepared_distribution( + (_, dist) = _get_prepared_distribution( req, self.build_tracker, self.finder, @@ -793,7 +941,7 @@ def prepare_installed_requirement( "completely repeatable environment, install into an " "empty virtualenv." ) - dist = _get_prepared_distribution( + (_, dist) = _get_prepared_distribution( req, self.build_tracker, self.finder, diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index 7e2d0e5b879..2a751a1aa4f 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -568,7 +568,7 @@ def install_req_extend_extras( """ result = copy.copy(ireq) result.extras = {*ireq.extras, *extras} - result.req = ( + result._req = ( _set_requirement_extras(ireq.req, result.extras) if ireq.req is not None else None diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index bb36497ad12..bbbdf18acd7 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -85,7 +85,7 @@ def __init__( permit_editable_wheels: bool = False, ) -> None: assert req is None or isinstance(req, Requirement), req - self.req = req + self._req = req self.comes_from = comes_from self.constraint = constraint self.editable = editable @@ -188,6 +188,23 @@ def __init__( # This requirement needs to be unpacked before it can be installed. self._archive_source: Optional[Path] = None + @property + def req(self) -> Optional[Requirement]: + """Calculate a requirement from the cached dist, if populated. + + The cached dist can be populated by either + ``self.cache_virtual_metadata_only_dist()`` or + ``self.cache_concrete_dist()`` and can also be retrieved with + ``self.get_dist()``.""" + if self._req is not None: + return self._req + if self._dist is not None: + name = self._dist.canonical_name + version = str(self._dist.version) + self._req = Requirement(f"{name}=={version}") + return self._req + return None + def __str__(self) -> str: if self.req: s = redact_auth_from_requirement(self.req) @@ -381,7 +398,7 @@ def ensure_build_location( def _set_requirement(self) -> None: """Set requirement after generating metadata.""" - assert self.req is None + assert self._req is None assert self.metadata is not None assert self.source_dir is not None @@ -391,7 +408,7 @@ def _set_requirement(self) -> None: else: op = "===" - self.req = Requirement( + self._req = Requirement( "".join( [ self.metadata["Name"], @@ -417,7 +434,7 @@ def warn_on_mismatching_name(self) -> None: metadata_name, self.name, ) - self.req = Requirement(metadata_name) + self._req = Requirement(metadata_name) def check_if_exists(self, use_user_site: bool) -> None: """Find an installed distribution that satisfies or conflicts diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py index 1bf73d595f6..f89c5fd3412 100644 --- a/src/pip/_internal/req/req_set.py +++ b/src/pip/_internal/req/req_set.py @@ -46,7 +46,7 @@ def add_unnamed_requirement(self, install_req: InstallRequirement) -> None: self.unnamed_requirements.append(install_req) def add_named_requirement(self, install_req: InstallRequirement) -> None: - assert install_req.name + assert install_req.name, install_req project_name = canonicalize_name(install_req.name) self.requirements[project_name] = install_req diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index b1debe3496c..06be9af0320 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -3,7 +3,6 @@ import logging import os.path -import re import shutil from typing import Iterable, List, Optional, Tuple @@ -25,23 +24,12 @@ from pip._internal.utils.subprocess import call_subprocess from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.urls import path_to_url -from pip._internal.vcs import vcs logger = logging.getLogger(__name__) -_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE) - BuildResult = Tuple[List[InstallRequirement], List[InstallRequirement]] -def _contains_egg_info(s: str) -> bool: - """Determine whether the string looks like an egg_info. - - :param s: The string to parse. E.g. foo-2.1 - """ - return bool(_egg_info_re.search(s)) - - def _should_build( req: InstallRequirement, need_wheel: bool, @@ -87,54 +75,6 @@ def should_build_for_install_command( return _should_build(req, need_wheel=False) -def _should_cache( - req: InstallRequirement, -) -> Optional[bool]: - """ - Return whether a built InstallRequirement can be stored in the persistent - wheel cache, assuming the wheel cache is available, and _should_build() - has determined a wheel needs to be built. - """ - if req.editable or not req.source_dir: - # never cache editable requirements - return False - - if req.link and req.link.is_vcs: - # VCS checkout. Do not cache - # unless it points to an immutable commit hash. - assert not req.editable - assert req.source_dir - vcs_backend = vcs.get_backend_for_scheme(req.link.scheme) - assert vcs_backend - if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir): - return True - return False - - assert req.link - base, ext = req.link.splitext() - if _contains_egg_info(base): - return True - - # Otherwise, do not cache. - return False - - -def _get_cache_dir( - req: InstallRequirement, - wheel_cache: WheelCache, -) -> str: - """Return the persistent or temporary cache directory where the built - wheel need to be stored. - """ - cache_available = bool(wheel_cache.cache_dir) - assert req.link - if cache_available and _should_cache(req): - cache_dir = wheel_cache.get_path_for_link(req.link) - else: - cache_dir = wheel_cache.get_ephem_path_for_link(req.link) - return cache_dir - - def _verify_one(req: InstallRequirement, wheel_path: str) -> None: canonical_name = canonicalize_name(req.name or "") w = Wheel(os.path.basename(wheel_path)) @@ -315,7 +255,7 @@ def build( build_successes, build_failures = [], [] for req in requirements: assert req.name - cache_dir = _get_cache_dir(req, wheel_cache) + cache_dir = wheel_cache.resolve_cache_dir(req) wheel_file = _build_one( req, cache_dir, diff --git a/tests/functional/test_check.py b/tests/functional/test_check.py index 79b6df39c19..a5715bb8cba 100644 --- a/tests/functional/test_check.py +++ b/tests/functional/test_check.py @@ -119,10 +119,7 @@ def test_check_complicated_name_missing(script: PipTestEnvironment) -> None: # Without dependency result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip("check", expect_error=True) expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",) @@ -145,10 +142,7 @@ def test_check_complicated_name_broken(script: PipTestEnvironment) -> None: # With broken dependency result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip( "install", @@ -181,10 +175,7 @@ def test_check_complicated_name_clean(script: PipTestEnvironment) -> None: ) result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip( "install", @@ -212,10 +203,7 @@ def test_check_considers_conditional_reqs(script: PipTestEnvironment) -> None: ) result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip("check", expect_error=True) expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",) diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 3411f66d801..7fe2ed3b0ab 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -2071,7 +2071,7 @@ def test_install_conflict_results_in_warning( # Install pkgA without its dependency result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps") - assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1) + assert "Successfully installed pkga-1.0" in result1.stdout, str(result1) # Then install an incorrect version of the dependency result2 = script.pip( @@ -2081,7 +2081,7 @@ def test_install_conflict_results_in_warning( allow_stderr_error=True, ) assert "pkga 1.0 requires pkgb==1.0" in result2.stderr, str(result2) - assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2) + assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2) def test_install_conflict_warning_can_be_suppressed( @@ -2101,11 +2101,11 @@ def test_install_conflict_warning_can_be_suppressed( # Install pkgA without its dependency result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps") - assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1) + assert "Successfully installed pkga-1.0" in result1.stdout, str(result1) # Then install an incorrect version of the dependency; suppressing warning result2 = script.pip("install", "--no-index", pkgB_path, "--no-warn-conflicts") - assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2) + assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2) def test_target_install_ignores_distutils_config_install_prefix( diff --git a/tests/functional/test_install_check.py b/tests/functional/test_install_check.py index 8a8a7c93a80..d16165e03f4 100644 --- a/tests/functional/test_install_check.py +++ b/tests/functional/test_install_check.py @@ -28,7 +28,7 @@ def test_check_install_canonicalization(script: PipTestEnvironment) -> None: # Let's install pkgA without its dependency result = script.pip("install", "--no-index", pkga_path, "--no-deps") - assert "Successfully installed pkgA-1.0" in result.stdout, str(result) + assert "Successfully installed pkga-1.0" in result.stdout, str(result) # Install the first missing dependency. Only an error for the # second dependency should remain. diff --git a/tests/functional/test_install_metadata.py b/tests/functional/test_install_metadata.py index ba0a4996acb..dc63adcdea1 100644 --- a/tests/functional/test_install_metadata.py +++ b/tests/functional/test_install_metadata.py @@ -210,7 +210,10 @@ def test_canonicalizes_package_name_before_verifying_metadata( ( # It's important that we verify pip won't even attempt to fetch the file, so we # construct an input that will cause it to error if it tries at all. - ("complex-dist==0.1", "404 Client Error: FileNotFoundError"), + ( + "complex-dist==0.1", + "Could not install packages due to an OSError: [Errno 2] No such file or directory", # noqa: E501 + ), ("corruptwheel==1.0", ".whl is invalid."), ), ) diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py index 993f25a2abf..39d1d0cec36 100644 --- a/tests/functional/test_install_reqs.py +++ b/tests/functional/test_install_reqs.py @@ -620,7 +620,7 @@ def test_install_distribution_full_union( result = script.pip_install_local( to_install, f"{to_install}[bar]", f"{to_install}[baz]" ) - assert "Building wheel for LocalExtras" in result.stdout + assert "Building wheel for localextras" in result.stdout result.did_create(script.site_packages / "simple") result.did_create(script.site_packages / "singlemodule.py") diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py index b1183fc830b..3f56b88fd4c 100644 --- a/tests/functional/test_wheel.py +++ b/tests/functional/test_wheel.py @@ -284,7 +284,7 @@ def test_wheel_package_with_latin1_setup( pkg_to_wheel = data.packages.joinpath("SetupPyLatin1") result = script.pip("wheel", pkg_to_wheel) - assert "Successfully built SetupPyUTF8" in result.stdout + assert "Successfully built setuppyutf8" in result.stdout def test_pip_wheel_with_pep518_build_reqs( diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py index d0fee69c39b..ff989a07b7e 100644 --- a/tests/unit/test_cache.py +++ b/tests/unit/test_cache.py @@ -1,13 +1,32 @@ import os from pathlib import Path +import pytest from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version -from pip._internal.cache import WheelCache, _hash_dict +from pip._internal.cache import WheelCache, _contains_egg_info, _hash_dict from pip._internal.models.link import Link from pip._internal.utils.misc import ensure_dir +@pytest.mark.parametrize( + "s, expected", + [ + # Trivial. + ("pip-18.0", True), + # Ambiguous. + ("foo-2-2", True), + ("im-valid", True), + # Invalid. + ("invalid", False), + ("im_invalid", False), + ], +) +def test_contains_egg_info(s: str, expected: bool) -> None: + result = _contains_egg_info(s) + assert result == expected + + def test_falsey_path_none() -> None: wc = WheelCache("") assert wc.cache_dir is None diff --git a/tests/unit/test_wheel_builder.py b/tests/unit/test_wheel_builder.py index 9044f945307..cd4b915cf62 100644 --- a/tests/unit/test_wheel_builder.py +++ b/tests/unit/test_wheel_builder.py @@ -5,7 +5,7 @@ import pytest -from pip._internal import wheel_builder +from pip._internal import cache, wheel_builder from pip._internal.models.link import Link from pip._internal.operations.build.wheel_legacy import format_command_result from pip._internal.req.req_install import InstallRequirement @@ -13,24 +13,6 @@ from tests.lib import _create_test_package -@pytest.mark.parametrize( - "s, expected", - [ - # Trivial. - ("pip-18.0", True), - # Ambiguous. - ("foo-2-2", True), - ("im-valid", True), - # Invalid. - ("invalid", False), - ("im_invalid", False), - ], -) -def test_contains_egg_info(s: str, expected: bool) -> None: - result = wheel_builder._contains_egg_info(s) - assert result == expected - - class ReqMock: def __init__( self, @@ -128,7 +110,7 @@ def test_should_build_for_wheel_command(req: ReqMock, expected: bool) -> None: ], ) def test_should_cache(req: ReqMock, expected: bool) -> None: - assert wheel_builder._should_cache(cast(InstallRequirement, req)) is expected + assert cache.should_cache(cast(InstallRequirement, req)) is expected def test_should_cache_git_sha(tmpdir: Path) -> None: @@ -138,12 +120,12 @@ def test_should_cache_git_sha(tmpdir: Path) -> None: # a link referencing a sha should be cached url = "git+https://g.c/o/r@" + commit + "#egg=mypkg" req = ReqMock(link=Link(url), source_dir=repo_path) - assert wheel_builder._should_cache(cast(InstallRequirement, req)) + assert cache.should_cache(cast(InstallRequirement, req)) # a link not referencing a sha should not be cached url = "git+https://g.c/o/r@master#egg=mypkg" req = ReqMock(link=Link(url), source_dir=repo_path) - assert not wheel_builder._should_cache(cast(InstallRequirement, req)) + assert not cache.should_cache(cast(InstallRequirement, req)) def test_format_command_result__INFO(caplog: pytest.LogCaptureFixture) -> None: