Skip to content

Commit

Permalink
merge_cachi2_sboms: support multi-purl packages
Browse files Browse the repository at this point in the history
It's unclear at this time whether this will be needed. Syft seems to use
at most one purl for every SPDX package, but Cachi2 could conceivably
want to group multiple purls under a single package in some cases.

For example, two golang purls where the only difference is

    ?type=module vs. ?type=package

Instead of raising an error for any SPDX package with 2 or more purls,
only raise the error if the purls aren't "similar enough" (identical
except for the qualifiers).

To be extra safe, change the is_cachi2_non_registry_dependency check to
inspect all purls. That is the only piece of code that depends on the
qualifiers.

Signed-off-by: Adam Cmiel <acmiel@redhat.com>
  • Loading branch information
chmeliik committed Dec 17, 2024
1 parent b1e06a2 commit d4ee394
Showing 1 changed file with 25 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class SBOMItem(Protocol):
def name(self) -> str: ...
def version(self) -> str: ...
def purl(self) -> PackageURL | None: ...
def all_purls(self) -> list[PackageURL]: ...


@dataclass
Expand All @@ -37,6 +38,10 @@ def purl(self) -> PackageURL | None:
return try_parse_purl(purl_str)
return None

def all_purls(self) -> list[PackageURL]:
purl = self.purl()
return [purl] if purl else []


def wrap_as_cdx(items: list[dict[str, Any]]) -> list[CDXComponent]:
return list(map(CDXComponent, items))
Expand All @@ -60,12 +65,24 @@ def version(self) -> str:
return self.data.get("versionInfo") or ""

def purl(self) -> PackageURL | None:
purls = self.all_purls()
if len(purls) > 1:
self._verify_purl_similarity(purls)
return purls[0] if purls else None

def all_purls(self) -> list[PackageURL]:
purls = [
ref["referenceLocator"] for ref in self.data.get("externalRefs") or [] if ref["referenceType"] == "purl"
]
if len(purls) > 1:
raise ValueError(f"Found {len(purls)} for a single SPDX package, this is unsupported: {purls}")
return try_parse_purl(purls[0]) if purls else None
return list(filter(None, map(try_parse_purl, purls)))

def _verify_purl_similarity(self, purls: list[PackageURL]) -> None:
# Verify that the purls for a single package are "similar enough" for the purposes of this script.
# In practice, that means they need to be identical except for the qualifiers.
# Beyond that, we'll trust cachi2 and syft not to group purls for unrelated packages.
less_detailed_purls = set(purl._replace(qualifiers=None).to_string() for purl in purls)
if len(less_detailed_purls) != 1:
raise ValueError(f"The purls for an SPDX package are too different: {sorted(less_detailed_purls)}")


def wrap_as_spdx(items: list[dict[str, Any]]) -> list[SPDXPackage]:
Expand Down Expand Up @@ -110,12 +127,12 @@ def _is_cachi2_non_registry_dependency(component: SBOMItem) -> bool:
Note that this function is only applicable for PyPI or NPM components.
"""
purl = component.purl()
if not purl:
return False

qualifiers = purl.qualifiers or {}
return purl.type in ("pypi", "npm") and ("vcs_url" in qualifiers or "download_url" in qualifiers)
def is_external(purl: PackageURL) -> bool:
qualifiers = purl.qualifiers or {}
return purl.type in ("pypi", "npm") and ("vcs_url" in qualifiers or "download_url" in qualifiers)

return any(map(is_external, component.all_purls()))


def _unique_key_cachi2(component: SBOMItem) -> str:
Expand Down

0 comments on commit d4ee394

Please sign in to comment.