From bcdad504318d0cf8232f84f5069f0ee641d0035c Mon Sep 17 00:00:00 2001 From: Avasam Date: Sat, 14 Jan 2023 22:00:09 -0500 Subject: [PATCH 1/8] get sdist requires from egg-info --- stub_uploader/metadata.py | 64 +++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 859dbf35..0fe2de54 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -4,7 +4,10 @@ import graphlib import os import re -from collections.abc import Iterator, Iterable +import tarfile +from collections.abc import Iterable, Iterator +from glob import glob +from pathlib import Path from typing import Any, Optional import requests @@ -170,12 +173,50 @@ def verify_typeshed_req(req: Requirement) -> None: } +def validate_response(resp: requests.Response, req: Requirement): + if resp.status_code != 200: + raise InvalidRequires( + f"Expected dependency {req} to be accessible on PyPI, but got {resp.status_code}" + ) + + +def get_sdist_requires(sdist_data: dict[str, str], req: Requirement) -> list[str]: + filename = sdist_data["filename"] + resp = requests.get(sdist_data["url"], stream=True) + validate_response(resp, req) + with open(filename, "wb") as file: + file.write(resp.raw.read()) + + folder_name = "" + with tarfile.open(filename) as file_in: + file_in.extractall() + for info in file_in: + folder_name = info.name + # Assume a single folder + break + + sdist_requires: list[str] = [] + requires_filepath = Path(folder_name) / f"*.egg-info" / "requires.txt" + matches = glob(str(requires_filepath)) + for match in matches: + with open(match) as requires_file: + sdist_requires = [ + canonical_name(Requirement(line).name) + for line in requires_file.readlines() + ] + # Assume a single *.egg-info folder + break + return sdist_requires + + def verify_external_req( req: Requirement, upstream_distribution: Optional[str], _unsafe_ignore_allowlist: bool = False, # used for tests ) -> None: - if canonical_name(req.name) in uploaded_packages.read(): + req_canonical_name = canonical_name(req.name) + + if req_canonical_name in uploaded_packages.read(): raise InvalidRequires( f"Expected dependency {req} to not be uploaded from stub_uploader" ) @@ -190,30 +231,27 @@ def verify_external_req( f"There is no upstream distribution on PyPI, so cannot verify {req}" ) - resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json") - if resp.status_code != 200: + if req.name not in EXTERNAL_REQ_ALLOWLIST and not _unsafe_ignore_allowlist: raise InvalidRequires( - f"Expected dependency {req} to be accessible on PyPI, but got {resp.status_code}" + f"Expected dependency {req} to be present in the allowlist" ) + resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json") + validate_response(resp, req) data = resp.json() - # TODO: consider allowing external dependencies for stubs for packages that do not ship wheels. - # Note that we can't build packages from sdists, since that can execute arbitrary code. - # We could do some hacky setup.py parsing though... # TODO: PyPI doesn't seem to have version specific requires_dist. This does mean we can be # broken by new releases of upstream packages, even if they do not match the version spec we # have for the upstream distribution. if req.name not in [ Requirement(r).name for r in (data["info"].get("requires_dist") or []) ]: - raise InvalidRequires( - f"Expected dependency {req} to be listed in {upstream_distribution}'s requires_dist" - ) + return # Ok! - if req.name not in EXTERNAL_REQ_ALLOWLIST and not _unsafe_ignore_allowlist: + if req_canonical_name not in get_sdist_requires(data["urls"][-1], req): raise InvalidRequires( - f"Expected dependency {req} to be present in the allowlist" + f"Expected dependency {req} to be listed in {upstream_distribution}'s " + + "requires_dist or the sdist's *.egg-info/requires.txt" ) From 3b955417ca36d2589602bc01c5f2c8093115047c Mon Sep 17 00:00:00 2001 From: Avasam Date: Sat, 14 Jan 2023 22:11:30 -0500 Subject: [PATCH 2/8] Fix mypy --- stub_uploader/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 0fe2de54..6b84fa0c 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -173,7 +173,7 @@ def verify_typeshed_req(req: Requirement) -> None: } -def validate_response(resp: requests.Response, req: Requirement): +def validate_response(resp: requests.Response, req: Requirement) -> None: if resp.status_code != 200: raise InvalidRequires( f"Expected dependency {req} to be accessible on PyPI, but got {resp.status_code}" From c9913b5d110243ae8bb8d74471815b529c423a9e Mon Sep 17 00:00:00 2001 From: Avasam Date: Sat, 14 Jan 2023 22:12:58 -0500 Subject: [PATCH 3/8] invert condition --- stub_uploader/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 6b84fa0c..eecb7237 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -243,7 +243,7 @@ def verify_external_req( # TODO: PyPI doesn't seem to have version specific requires_dist. This does mean we can be # broken by new releases of upstream packages, even if they do not match the version spec we # have for the upstream distribution. - if req.name not in [ + if req.name in [ Requirement(r).name for r in (data["info"].get("requires_dist") or []) ]: return # Ok! From 030084099eb8ce86e3de2fc8e898d977c606e789 Mon Sep 17 00:00:00 2001 From: Avasam Date: Sun, 15 Jan 2023 05:41:06 -0500 Subject: [PATCH 4/8] Skip empty lines and extras --- stub_uploader/metadata.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index eecb7237..76ff6108 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -180,7 +180,9 @@ def validate_response(resp: requests.Response, req: Requirement) -> None: ) -def get_sdist_requires(sdist_data: dict[str, str], req: Requirement) -> list[str]: +def get_sdist_requires_canonical( + sdist_data: dict[str, str], req: Requirement +) -> list[str]: filename = sdist_data["filename"] resp = requests.get(sdist_data["url"], stream=True) validate_response(resp, req) @@ -203,6 +205,8 @@ def get_sdist_requires(sdist_data: dict[str, str], req: Requirement) -> list[str sdist_requires = [ canonical_name(Requirement(line).name) for line in requires_file.readlines() + # Skip empty lines and extras + if line[0] not in {"\n", "["} ] # Assume a single *.egg-info folder break @@ -248,7 +252,7 @@ def verify_external_req( ]: return # Ok! - if req_canonical_name not in get_sdist_requires(data["urls"][-1], req): + if req_canonical_name not in get_sdist_requires_canonical(data["urls"][-1], req): raise InvalidRequires( f"Expected dependency {req} to be listed in {upstream_distribution}'s " + "requires_dist or the sdist's *.egg-info/requires.txt" From 331cd3597125cad4c70b50991fcd30997a717a19 Mon Sep 17 00:00:00 2001 From: Avasam Date: Sun, 15 Jan 2023 06:05:35 -0500 Subject: [PATCH 5/8] Check for url_data["packagetype"] == "sdist" --- stub_uploader/metadata.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 76ff6108..aff46ee3 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -242,7 +242,7 @@ def verify_external_req( resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json") validate_response(resp, req) - data = resp.json() + data: dict[str, Any] = resp.json() # TODO: PyPI doesn't seem to have version specific requires_dist. This does mean we can be # broken by new releases of upstream packages, even if they do not match the version spec we @@ -252,7 +252,18 @@ def verify_external_req( ]: return # Ok! - if req_canonical_name not in get_sdist_requires_canonical(data["urls"][-1], req): + sdist_data: dict[str, Any] | None = next( + ( + url_data + for url_data in reversed(data["urls"]) + if url_data["packagetype"] == "sdist" + ), + None, + ) + if not ( + sdist_data + and req_canonical_name in get_sdist_requires_canonical(sdist_data, req) + ): raise InvalidRequires( f"Expected dependency {req} to be listed in {upstream_distribution}'s " + "requires_dist or the sdist's *.egg-info/requires.txt" From e233fd87250bca19e0efc29093bba459de3bd890 Mon Sep 17 00:00:00 2001 From: Avasam Date: Sun, 15 Jan 2023 06:16:27 -0500 Subject: [PATCH 6/8] yield generator of Requirement instead of return list of str --- stub_uploader/metadata.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index aff46ee3..6f0c1874 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -5,7 +5,7 @@ import os import re import tarfile -from collections.abc import Iterable, Iterator +from collections.abc import Generator, Iterable from glob import glob from pathlib import Path from typing import Any, Optional @@ -180,9 +180,9 @@ def validate_response(resp: requests.Response, req: Requirement) -> None: ) -def get_sdist_requires_canonical( +def get_sdist_requires( sdist_data: dict[str, str], req: Requirement -) -> list[str]: +) -> Generator[Requirement, None, None]: filename = sdist_data["filename"] resp = requests.get(sdist_data["url"], stream=True) validate_response(resp, req) @@ -197,20 +197,18 @@ def get_sdist_requires_canonical( # Assume a single folder break - sdist_requires: list[str] = [] - requires_filepath = Path(folder_name) / f"*.egg-info" / "requires.txt" + requires_filepath = Path(folder_name, "*.egg-info", "requires.txt") matches = glob(str(requires_filepath)) for match in matches: + lines: list[str] = [] with open(match) as requires_file: - sdist_requires = [ - canonical_name(Requirement(line).name) - for line in requires_file.readlines() - # Skip empty lines and extras - if line[0] not in {"\n", "["} - ] + lines = requires_file.readlines() + for line in lines: + # Skip empty lines and extras + if line[0] not in {"\n", "["}: + yield Requirement(line) # Assume a single *.egg-info folder break - return sdist_requires def verify_external_req( @@ -262,7 +260,8 @@ def verify_external_req( ) if not ( sdist_data - and req_canonical_name in get_sdist_requires_canonical(sdist_data, req) + and req_canonical_name + in [canonical_name(r.name) for r in get_sdist_requires(sdist_data, req)] ): raise InvalidRequires( f"Expected dependency {req} to be listed in {upstream_distribution}'s " @@ -272,7 +271,7 @@ def verify_external_req( def sort_by_dependency( typeshed_dir: str, distributions: Iterable[str] -) -> Iterator[str]: +) -> Generator[str, None, None]: # Just a simple topological sort. Unlike previous versions of the code, we do not rely # on this to perform validation, like requiring the graph to be complete. # We only use this to help with edge cases like multiple packages being uploaded From 57905b467bc3828735839256f07ce90992ceed66 Mon Sep 17 00:00:00 2001 From: Avasam Date: Tue, 2 May 2023 10:01:06 -0400 Subject: [PATCH 7/8] Redundant variable initialization --- stub_uploader/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index 6f0c1874..ba94f6c1 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -200,7 +200,6 @@ def get_sdist_requires( requires_filepath = Path(folder_name, "*.egg-info", "requires.txt") matches = glob(str(requires_filepath)) for match in matches: - lines: list[str] = [] with open(match) as requires_file: lines = requires_file.readlines() for line in lines: From 1fb9cdb582e1ade23df0be4e59aaa3b57ee69266 Mon Sep 17 00:00:00 2001 From: Avasam Date: Wed, 3 May 2023 12:20:28 -0400 Subject: [PATCH 8/8] use temp folder for archive download and manipulation use glob to not have to assume match count --- stub_uploader/metadata.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/stub_uploader/metadata.py b/stub_uploader/metadata.py index c54b0d55..d8d732ec 100644 --- a/stub_uploader/metadata.py +++ b/stub_uploader/metadata.py @@ -8,6 +8,7 @@ from collections.abc import Generator, Iterable from glob import glob from pathlib import Path +import tempfile from typing import Any, Optional import requests @@ -184,24 +185,23 @@ def validate_response(resp: requests.Response, req: Requirement) -> None: ) -def get_sdist_requires( +def extract_sdist_requires( sdist_data: dict[str, str], req: Requirement ) -> Generator[Requirement, None, None]: - filename = sdist_data["filename"] + tmpdir = tempfile.mkdtemp() + archive_path = Path(tmpdir, sdist_data["filename"]) + resp = requests.get(sdist_data["url"], stream=True) validate_response(resp, req) - with open(filename, "wb") as file: + with open(archive_path, "wb") as file: file.write(resp.raw.read()) - folder_name = "" - with tarfile.open(filename) as file_in: - file_in.extractall() - for info in file_in: - folder_name = info.name - # Assume a single folder - break + with tarfile.open(archive_path) as file_in: + file_in.extractall(tmpdir) - requires_filepath = Path(folder_name, "*.egg-info", "requires.txt") + # Only a single folder with ".egg-info/requires.txt" in the archive should exist + # but this supports possible edge-cases and doesn't require knowing any variable name in the path. + requires_filepath = Path(tmpdir, "*", "*.egg-info", "requires.txt") matches = glob(str(requires_filepath)) for match in matches: with open(match) as requires_file: @@ -210,8 +210,6 @@ def get_sdist_requires( # Skip empty lines and extras if line[0] not in {"\n", "["}: yield Requirement(line) - # Assume a single *.egg-info folder - break def verify_external_req( @@ -266,7 +264,7 @@ def verify_external_req( if not ( sdist_data and req_canonical_name - in [canonical_name(r.name) for r in get_sdist_requires(sdist_data, req)] + in [canonical_name(r.name) for r in extract_sdist_requires(sdist_data, req)] ): raise InvalidRequires( f"Expected dependency {req} to be listed in {upstream_distribution}'s "