From 12b25a9ad75342467c78314b5bc4e41514a0a1dd Mon Sep 17 00:00:00 2001 From: Adrien Schildknecht Date: Mon, 2 Oct 2023 14:55:26 +0200 Subject: [PATCH 1/2] url2purl: handle pypi.org urls pypi.org replaced pypi.python.org in 2017 and so most urls are now using this. See: https://packaging.python.org/en/latest/glossary/#term-pypi.org Make sure we route this domain properly so that we don't generate generic purls. Input: https://pypi.org/packages/source/z/zc.recipe.egg/zc.recipe.egg-2.0.0.tar.gz Before: pkg:generic/zc.recipe.egg-2.0.0.tar.gz?download... After: pkg:pypi/zc.recipe.egg@2.0.0 --- src/packageurl/contrib/url2purl.py | 6 +++++- tests/contrib/data/url2purl.json | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index 0f51b2e..d74a422 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -277,6 +277,7 @@ def build_rubygems_purl(uri): return purl_from_pattern("rubygems", rubygems_pattern, uri) +# https://pypi.org/packages/source/a/anyjson/anyjson-0.3.3.tar.gz # https://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.3.tar.gz # https://pypi.python.org/packages/2.6/t/threadpool/threadpool-1.2.7-py2.6.egg # https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm @@ -296,7 +297,10 @@ def build_rubygems_purl(uri): ) -@purl_router.route("https?://.+python.+org/packages/.*") +@purl_router.route( + "https?://pypi.org/packages/.*", + "https?://.+python.+org/packages/.*", +) def build_pypi_purl(uri): path = unquote_plus(urlparse(uri).path) last_segment = path.split("/")[-1] diff --git a/tests/contrib/data/url2purl.json b/tests/contrib/data/url2purl.json index ab92a05..fbcad26 100644 --- a/tests/contrib/data/url2purl.json +++ b/tests/contrib/data/url2purl.json @@ -121,6 +121,7 @@ "https://rubygems.org/downloads/unf-0.1.3.gem": "pkg:rubygems/unf@0.1.3", "https://rubygems.org/downloads/yajl-ruby-1.2.0.gem": "pkg:rubygems/yajl-ruby@1.2.0", "https://rubygems.org/gems/i18n-js-3.0.11.gem": "pkg:rubygems/i18n-js@3.0.11", + "https://pypi.org/packages/source/z/zc.recipe.egg/zc.recipe.egg-2.0.0.tar.gz": "pkg:pypi/zc.recipe.egg@2.0.0", "https://pypi.python.org/packages/source/z/zc.recipe.egg/zc.recipe.egg-2.0.0.tar.gz": "pkg:pypi/zc.recipe.egg@2.0.0", "https://pypi.python.org/packages/source/p/python-openid/python-openid-2.2.5.zip": "pkg:pypi/python-openid@2.2.5", "https://pypi.python.org/packages/38/e2/b23434f4030bbb1af3bcdbb2ecff6b11cf2e467622446ce66a08e99f2ea9/pluggy-0.4.0.zip#md5=447a92368175965d2fbacaef9f3df842": "pkg:pypi/pluggy@0.4.0", @@ -257,4 +258,4 @@ "http://apt-rpm.org/": null, "": null, "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm": "pkg:generic/code.google.com/android-notifier?download_url=https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm" -} \ No newline at end of file +} From 2e7ceda1d52be67c14cc8d8c44774e2528157267 Mon Sep 17 00:00:00 2001 From: Adrien Schildknecht Date: Mon, 2 Oct 2023 15:36:01 +0200 Subject: [PATCH 2/2] url2purl: handle pypi project urls Some tools are reporting `https://pypi.org/project//` urls for pypi packages, let's make sure we can generate proper purls. Input: https://pypi.org/project/widgetsnbextension/3.0.7 Before: pkg:generic/3.0.7?download_url=... After: pkg:pypi/widgetsnbextension@3.0.7 --- src/packageurl/contrib/url2purl.py | 14 +++++++++++--- tests/contrib/data/url2purl.json | 4 ++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index d74a422..b61b743 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -298,12 +298,13 @@ def build_rubygems_purl(uri): @purl_router.route( - "https?://pypi.org/packages/.*", - "https?://.+python.+org/packages/.*", + "https?://pypi.org/(packages|project)/.+", + "https?://.+python.+org/(packages|project)/.*", ) def build_pypi_purl(uri): path = unquote_plus(urlparse(uri).path) - last_segment = path.split("/")[-1] + segments = path.split("/") + last_segment = segments[-1] # /wheel-0.29.0-py2.py3-none-any.whl if last_segment.endswith(".whl"): @@ -315,6 +316,13 @@ def build_pypi_purl(uri): version=match.group("version"), ) + if segments[1] == "project": + return PackageURL( + "pypi", + name=segments[2], + version=segments[3] if len(segments) > 3 else None, + ) + return purl_from_pattern("pypi", pypi_pattern, last_segment) diff --git a/tests/contrib/data/url2purl.json b/tests/contrib/data/url2purl.json index fbcad26..821e566 100644 --- a/tests/contrib/data/url2purl.json +++ b/tests/contrib/data/url2purl.json @@ -122,6 +122,10 @@ "https://rubygems.org/downloads/yajl-ruby-1.2.0.gem": "pkg:rubygems/yajl-ruby@1.2.0", "https://rubygems.org/gems/i18n-js-3.0.11.gem": "pkg:rubygems/i18n-js@3.0.11", "https://pypi.org/packages/source/z/zc.recipe.egg/zc.recipe.egg-2.0.0.tar.gz": "pkg:pypi/zc.recipe.egg@2.0.0", + "https://pypi.org/project/widgetsnbextension": "pkg:pypi/widgetsnbextension", + "https://pypi.org/project/widgetsnbextension/3.0.7/": "pkg:pypi/widgetsnbextension@3.0.7", + "https://pypi.org/project/widgetsnbextension/3.0.7/#files": "pkg:pypi/widgetsnbextension@3.0.7", + "https://pypi.python.org/project/widgetsnbextension/3.0.7/": "pkg:pypi/widgetsnbextension@3.0.7", "https://pypi.python.org/packages/source/z/zc.recipe.egg/zc.recipe.egg-2.0.0.tar.gz": "pkg:pypi/zc.recipe.egg@2.0.0", "https://pypi.python.org/packages/source/p/python-openid/python-openid-2.2.5.zip": "pkg:pypi/python-openid@2.2.5", "https://pypi.python.org/packages/38/e2/b23434f4030bbb1af3bcdbb2ecff6b11cf2e467622446ce66a08e99f2ea9/pluggy-0.4.0.zip#md5=447a92368175965d2fbacaef9f3df842": "pkg:pypi/pluggy@0.4.0",