Skip to content

Commit

Permalink
use packaging.metadata instead of pkginfo
Browse files Browse the repository at this point in the history
  • Loading branch information
radoering committed Dec 23, 2023
1 parent 83706ba commit c427971
Show file tree
Hide file tree
Showing 7 changed files with 253 additions and 251 deletions.
28 changes: 17 additions & 11 deletions src/poetry/inspection/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@
if TYPE_CHECKING:
from collections.abc import Iterator

from packaging.metadata import RawMetadata
from poetry.core.packages.project_package import ProjectPackage

from poetry.inspection.lazy_wheel import MemoryWheel


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -233,7 +232,9 @@ def to_package(
return package

@classmethod
def _from_distribution(cls, dist: pkginfo.Distribution) -> PackageInfo:
def _from_distribution(
cls, dist: pkginfo.BDist | pkginfo.SDist | pkginfo.Wheel
) -> PackageInfo:
"""
Helper method to parse package information from a `pkginfo.Distribution`
instance.
Expand All @@ -244,7 +245,7 @@ def _from_distribution(cls, dist: pkginfo.Distribution) -> PackageInfo:

if dist.requires_dist:
requirements = list(dist.requires_dist)
elif isinstance(dist, (pkginfo.BDist, pkginfo.SDist, pkginfo.Wheel)):
else:
requires = Path(dist.filename) / "requires.txt"
if requires.exists():
text = requires.read_text(encoding="utf-8")
Expand All @@ -258,9 +259,8 @@ def _from_distribution(cls, dist: pkginfo.Distribution) -> PackageInfo:
requires_python=dist.requires_python,
)

if isinstance(dist, (pkginfo.BDist, pkginfo.SDist, pkginfo.Wheel)):
info._source_type = "file"
info._source_url = Path(dist.filename).resolve().as_posix()
info._source_type = "file"
info._source_url = Path(dist.filename).resolve().as_posix()

return info

Expand Down Expand Up @@ -524,13 +524,19 @@ def from_wheel(cls, path: Path) -> PackageInfo:
return PackageInfo()

@classmethod
def from_memory_wheel(cls, wheel: MemoryWheel) -> PackageInfo:
def from_wheel_metadata(cls, metadata: RawMetadata) -> PackageInfo:
"""
Gather package information from a partial fetched wheel kept in memory.
Gather package information from metadata of a remote wheel.
:param path: Path to wheel.
:param metadata: metadata of the wheel.
"""
return cls._from_distribution(wheel)
return cls(
name=metadata.get("name"),
version=metadata.get("version"),
summary=metadata.get("summary"),
requires_dist=metadata.get("requires_dist"),
requires_python=metadata.get("requires_python"),
)

@classmethod
def from_bdist(cls, path: Path) -> PackageInfo:
Expand Down
59 changes: 26 additions & 33 deletions src/poetry/inspection/lazy_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from zipfile import BadZipFile
from zipfile import ZipFile

from pkginfo import Distribution
from packaging.metadata import parse_email
from requests.models import CONTENT_CHUNK_SIZE
from requests.models import HTTPError
from requests.models import Response
Expand All @@ -31,6 +31,7 @@
from collections.abc import Iterable
from collections.abc import Iterator

from packaging.metadata import RawMetadata
from requests import Session

from poetry.utils.authenticator import Authenticator
Expand Down Expand Up @@ -58,29 +59,10 @@ def __str__(self) -> str:
return f"Wheel '{self.name}' located at {self.location} is invalid."


class MemoryWheel(Distribution):
def __init__(self, lazy_file: LazyWheelOverHTTP) -> None:
self.lazy_file = lazy_file
self.extractMetadata()

def read(self) -> bytes:
with ZipFile(self.lazy_file) as archive:
tuples = [x.split("/") for x in archive.namelist() if "METADATA" in x]
schwarz = sorted([(len(x), x) for x in tuples])
for path in [x[1] for x in schwarz]:
candidate = "/".join(path)
logger.debug(f"read {candidate}")
data = archive.read(candidate)
if b"Metadata-Version" in data:
return data
else:
raise ValueError(f"No METADATA in archive: {self.lazy_file.name}")


def memory_wheel_from_url(
def metadata_from_wheel_url(
name: str, url: str, session: Session | Authenticator
) -> MemoryWheel:
"""Return a MemoryWheel (compatible to pkginfo.Wheel) from the given wheel URL.
) -> RawMetadata:
"""Fetch metadata from the given wheel URL.
This uses HTTP range requests to only fetch the portion of the wheel
containing metadata, just enough for the object to be constructed.
Expand All @@ -92,10 +74,10 @@ def memory_wheel_from_url(
# After context manager exit, wheel.name will point to a deleted file path.
# Add `delete_backing_file=False` to disable this for debugging.
with LazyWheelOverHTTP(url, session) as lazy_file:
# prefetch metadata to reduce the number of range requests
# (we know that METADATA is the only file from the wheel we need)
lazy_file.prefetch_metadata(name)
return MemoryWheel(lazy_file)
metadata_bytes = lazy_file.read_metadata(name)

metadata, _ = parse_email(metadata_bytes)
return metadata

except (BadZipFile, UnsupportedWheel):
# We assume that these errors have occurred because the wheel contents
Expand Down Expand Up @@ -720,7 +702,7 @@ def _extract_content_length(
self._domains_without_negative_range.add(domain)
return file_length, tail

def prefetch_metadata(self, name: str) -> None:
def _prefetch_metadata(self, name: str) -> str:
"""Locate the *.dist-info/METADATA entry from a temporary ``ZipFile`` wrapper,
and download it.
Expand All @@ -729,7 +711,7 @@ def prefetch_metadata(self, name: str) -> None:
can be downloaded in a single ranged GET request."""
logger.debug("begin prefetching METADATA for %s", name)

dist_info_prefix = re.compile(r"^[^/]*\.dist-info/METADATA")
metadata_regex = re.compile(r"^[^/]*\.dist-info/METADATA$")
start: int | None = None
end: int | None = None

Expand All @@ -738,25 +720,36 @@ def prefetch_metadata(self, name: str) -> None:
# should be set large enough to avoid this).
zf = ZipFile(self)

filename = ""
for info in zf.infolist():
if start is None:
if dist_info_prefix.search(info.filename):
if metadata_regex.search(info.filename):
filename = info.filename
start = info.header_offset
continue
else:
# The last .dist-info/ entry may be before the end of the file if the
# wheel's entries are sorted lexicographically (which is unusual).
if not dist_info_prefix.search(info.filename):
if not metadata_regex.search(info.filename):
end = info.header_offset
break
if start is None:
raise UnsupportedWheel(
f"no {dist_info_prefix!r} found for {name} in {self.name}"
f"no {metadata_regex!r} found for {name} in {self.name}"
)
# If it is the last entry of the zip, then give us everything
# until the start of the central directory.
if end is None:
end = zf.start_dir
logger.debug("fetch METADATA")
logger.debug(f"fetch {filename}")
self.ensure_downloaded(start, end)
logger.debug("done prefetching METADATA for %s", name)

return filename

def read_metadata(self, name: str) -> bytes:
"""Download and read the METADATA file from the remote wheel."""
with ZipFile(self) as zf:
# prefetch metadata to reduce the number of range requests
filename = self._prefetch_metadata(name)
return zf.read(filename)
6 changes: 3 additions & 3 deletions src/poetry/repositories/http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from poetry.config.config import Config
from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
from poetry.inspection.lazy_wheel import memory_wheel_from_url
from poetry.inspection.lazy_wheel import metadata_from_wheel_url
from poetry.repositories.cached_repository import CachedRepository
from poetry.repositories.exceptions import PackageNotFound
from poetry.repositories.exceptions import RepositoryError
Expand Down Expand Up @@ -118,8 +118,8 @@ def _get_info_from_wheel(self, url: str) -> PackageInfo:
# or we don't know yet, we try range requests.
if self._lazy_wheel and self._supports_range_requests.get(netloc, True):
try:
package_info = PackageInfo.from_memory_wheel(
memory_wheel_from_url(link.filename, link.url, self.session)
package_info = PackageInfo.from_wheel_metadata(
metadata_from_wheel_url(link.filename, link.url, self.session)
)
except HTTPRangeRequestUnsupported:
# Do not set to False if we already know that the domain supports
Expand Down
148 changes: 0 additions & 148 deletions tests/inspection/conftest.py

This file was deleted.

Loading

0 comments on commit c427971

Please sign in to comment.