Skip to content

Commit

Permalink
refactor: extract cache utilities (#7621)
Browse files Browse the repository at this point in the history
Co-authored-by: Randy Döring <30527984+radoering@users.noreply.github.com>
  • Loading branch information
ralbertazzi and radoering authored Mar 19, 2023
1 parent b8e912d commit 36ca327
Show file tree
Hide file tree
Showing 9 changed files with 419 additions and 379 deletions.
6 changes: 5 additions & 1 deletion src/poetry/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,11 @@ def _get_environment_repositories() -> dict[str, dict[str, str]]:

@property
def repository_cache_directory(self) -> Path:
return Path(self.get("cache-dir")) / "cache" / "repositories"
return Path(self.get("cache-dir")).expanduser() / "cache" / "repositories"

@property
def artifacts_cache_directory(self) -> Path:
return Path(self.get("cache-dir")).expanduser() / "artifacts"

@property
def virtualenvs_path(self) -> Path:
Expand Down
89 changes: 8 additions & 81 deletions src/poetry/installation/chef.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import hashlib
import json
import tarfile
import tempfile
import zipfile
Expand All @@ -19,18 +17,14 @@
from poetry.core.utils.helpers import temporary_directory
from pyproject_hooks import quiet_subprocess_runner # type: ignore[import]

from poetry.installation.chooser import InvalidWheelName
from poetry.installation.chooser import Wheel
from poetry.utils.env import ephemeral_environment


if TYPE_CHECKING:
from contextlib import AbstractContextManager

from poetry.core.packages.utils.link import Link

from poetry.config.config import Config
from poetry.repositories import RepositoryPool
from poetry.utils.cache import ArtifactCache
from poetry.utils.env import Env


Expand Down Expand Up @@ -86,12 +80,12 @@ def install(self, requirements: Collection[str]) -> None:


class Chef:
def __init__(self, config: Config, env: Env, pool: RepositoryPool) -> None:
def __init__(
self, artifact_cache: ArtifactCache, env: Env, pool: RepositoryPool
) -> None:
self._env = env
self._pool = pool
self._cache_dir = (
Path(config.get("cache-dir")).expanduser().joinpath("artifacts")
)
self._artifact_cache = artifact_cache

def prepare(
self, archive: Path, output_dir: Path | None = None, *, editable: bool = False
Expand Down Expand Up @@ -181,7 +175,9 @@ def _prepare_sdist(self, archive: Path, destination: Path | None = None) -> Path
sdist_dir = archive_dir

if destination is None:
destination = self.get_cache_directory_for_link(Link(archive.as_uri()))
destination = self._artifact_cache.get_cache_directory_for_link(
Link(archive.as_uri())
)

destination.mkdir(parents=True, exist_ok=True)

Expand All @@ -196,72 +192,3 @@ def _should_prepare(self, archive: Path) -> bool:
@classmethod
def _is_wheel(cls, archive: Path) -> bool:
return archive.suffix == ".whl"

def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None:
archives = self.get_cached_archives_for_link(link)
if not archives:
return None

candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
return archive
continue
if archive.suffix != ".whl":
candidates.append((float("inf"), archive))
continue

try:
wheel = Wheel(archive.name)
except InvalidWheelName:
continue

if not wheel.is_supported_by_environment(self._env):
continue

candidates.append(
(wheel.get_minimum_supported_index(self._env.supported_tags), archive),
)

if not candidates:
return None

return min(candidates)[1]

def get_cached_archives_for_link(self, link: Link) -> list[Path]:
cache_dir = self.get_cache_directory_for_link(link)

archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths = []
for archive_type in archive_types:
for archive in cache_dir.glob(f"*.{archive_type}"):
paths.append(Path(archive))

return paths

def get_cache_directory_for_link(self, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash

if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

key_parts["interpreter_name"] = self._env.marker_env["interpreter_name"]
key_parts["interpreter_version"] = "".join(
self._env.marker_env["interpreter_version"].split(".")[:2]
)

key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

return self._cache_dir.joinpath(*split_key)
35 changes: 1 addition & 34 deletions src/poetry/installation/chooser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
from typing import TYPE_CHECKING
from typing import Any

from packaging.tags import Tag

from poetry.config.config import Config
from poetry.config.config import PackageFilterPolicy
from poetry.utils.patterns import wheel_file_re
from poetry.utils.wheel import Wheel


if TYPE_CHECKING:
Expand All @@ -25,37 +23,6 @@
logger = logging.getLogger(__name__)


class InvalidWheelName(Exception):
pass


class Wheel:
def __init__(self, filename: str) -> None:
wheel_info = wheel_file_re.match(filename)
if not wheel_info:
raise InvalidWheelName(f"{filename} is not a valid wheel filename.")

self.filename = filename
self.name = wheel_info.group("name").replace("_", "-")
self.version = wheel_info.group("ver").replace("_", "-")
self.build_tag = wheel_info.group("build")
self.pyversions = wheel_info.group("pyver").split(".")
self.abis = wheel_info.group("abi").split(".")
self.plats = wheel_info.group("plat").split(".")

self.tags = {
Tag(x, y, z) for x in self.pyversions for y in self.abis for z in self.plats
}

def get_minimum_supported_index(self, tags: list[Tag]) -> int | None:
indexes = [tags.index(t) for t in self.tags if t in tags]

return min(indexes) if indexes else None

def is_supported_by_environment(self, env: Env) -> bool:
return bool(set(env.supported_tags).intersection(self.tags))


class Chooser:
"""
A Chooser chooses an appropriate release archive for packages.
Expand Down
24 changes: 18 additions & 6 deletions src/poetry/installation/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from poetry.puzzle.exceptions import SolverProblemError
from poetry.utils._compat import decode
from poetry.utils.authenticator import Authenticator
from poetry.utils.cache import ArtifactCache
from poetry.utils.env import EnvCommandError
from poetry.utils.helpers import atomic_open
from poetry.utils.helpers import get_file_hash
Expand Down Expand Up @@ -77,10 +78,11 @@ def __init__(
else:
self._max_workers = 1

self._artifact_cache = ArtifactCache(cache_dir=config.artifacts_cache_directory)
self._authenticator = Authenticator(
config, self._io, disable_cache=disable_cache, pool_size=self._max_workers
)
self._chef = Chef(config, self._env, pool)
self._chef = Chef(self._artifact_cache, self._env, pool)
self._chooser = Chooser(pool, self._env, config)

self._executor = ThreadPoolExecutor(max_workers=self._max_workers)
Expand Down Expand Up @@ -709,15 +711,19 @@ def _download(self, operation: Install | Update) -> Path:
def _download_link(self, operation: Install | Update, link: Link) -> Path:
package = operation.package

output_dir = self._chef.get_cache_directory_for_link(link)
output_dir = self._artifact_cache.get_cache_directory_for_link(link)
# Try to get cached original package for the link provided
original_archive = self._chef.get_cached_archive_for_link(link, strict=True)
original_archive = self._artifact_cache.get_cached_archive_for_link(
link, strict=True
)
if original_archive is None:
# No cached original distributions was found, so we download and prepare it
try:
original_archive = self._download_archive(operation, link)
except BaseException:
cache_directory = self._chef.get_cache_directory_for_link(link)
cache_directory = self._artifact_cache.get_cache_directory_for_link(
link
)
cached_file = cache_directory.joinpath(link.filename)
# We can't use unlink(missing_ok=True) because it's not available
# prior to Python 3.8
Expand All @@ -728,7 +734,11 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path:

# Get potential higher prioritized cached archive, otherwise it will fall back
# to the original archive.
archive = self._chef.get_cached_archive_for_link(link, strict=False)
archive = self._artifact_cache.get_cached_archive_for_link(
link,
strict=False,
env=self._env,
)
# 'archive' can at this point never be None. Since we previously downloaded
# an archive, we now should have something cached that we can use here
assert archive is not None
Expand Down Expand Up @@ -792,7 +802,9 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path:
progress.start()

done = 0
archive = self._chef.get_cache_directory_for_link(link) / link.filename
archive = (
self._artifact_cache.get_cache_directory_for_link(link) / link.filename
)
archive.parent.mkdir(parents=True, exist_ok=True)
with atomic_open(archive) as f:
for chunk in response.iter_content(chunk_size=4096):
Expand Down
90 changes: 90 additions & 0 deletions src/poetry/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,21 @@
import time

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Generic
from typing import TypeVar

from poetry.utils.wheel import InvalidWheelName
from poetry.utils.wheel import Wheel


if TYPE_CHECKING:
from poetry.core.packages.utils.link import Link

from poetry.utils.env import Env


# Used by Cachy for items that do not expire.
MAX_DATE = 9999999999
Expand Down Expand Up @@ -196,3 +206,83 @@ def _deserialize(self, data_raw: bytes) -> CacheItem[T]:
data = json.loads(data_str[10:])
expires = int(data_str[:10])
return CacheItem(data, expires)


class ArtifactCache:
def __init__(self, *, cache_dir: Path) -> None:
self._cache_dir = cache_dir

def get_cache_directory_for_link(self, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash

if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

return self._cache_dir.joinpath(*split_key)

def get_cached_archive_for_link(
self,
link: Link,
*,
strict: bool,
env: Env | None = None,
) -> Path | None:
assert strict or env is not None

archives = self._get_cached_archives_for_link(link)
if not archives:
return None

candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
return archive
continue

assert env is not None

if archive.suffix != ".whl":
candidates.append((float("inf"), archive))
continue

try:
wheel = Wheel(archive.name)
except InvalidWheelName:
continue

if not wheel.is_supported_by_environment(env):
continue

candidates.append(
(wheel.get_minimum_supported_index(env.supported_tags), archive),
)

if not candidates:
return None

return min(candidates)[1]

def _get_cached_archives_for_link(self, link: Link) -> list[Path]:
cache_dir = self.get_cache_directory_for_link(link)

archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths = []
for archive_type in archive_types:
for archive in cache_dir.glob(f"*.{archive_type}"):
paths.append(Path(archive))

return paths
Loading

0 comments on commit 36ca327

Please sign in to comment.