Skip to content

Commit

Permalink
WIP: report yarn patches as pedigree not components
Browse files Browse the repository at this point in the history
Instead of reporting yarn patches as independent Components in the SBOM,
report them instead as Pedigree for the parent, non-patch Component.

Yarn has the concept of "builtin" patches that are applied by yarn
itself to make certain features of yarn work. These are currently ignored
and not reported.

Signed-off-by: Taylor Madore <tmadore@redhat.com>
  • Loading branch information
taylormadore committed Jan 13, 2025
1 parent a3a2410 commit fb53a2e
Show file tree
Hide file tree
Showing 4 changed files with 283 additions and 95 deletions.
90 changes: 65 additions & 25 deletions cachi2/core/package_managers/yarn/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from packageurl import PackageURL

from cachi2.core.errors import PackageManagerError, PackageRejected, UnsupportedFeature
from cachi2.core.models.sbom import Component
from cachi2.core.models.sbom import Component, Patch, PatchDiff, Pedigree
from cachi2.core.package_managers.yarn.locators import (
FileLocator,
HttpsLocator,
Expand Down Expand Up @@ -165,8 +165,18 @@ def create_components(
packages: list[Package], project: Project, output_dir: RootedPath
) -> list[Component]:
"""Create SBOM components for all the packages parsed from the 'yarn info' output."""
package_mapping = {package.parsed_locator: package for package in packages}
component_resolver = _ComponentResolver(package_mapping, project, output_dir)
package_mapping: dict[Locator, Package] = {}
patch_locators: list[PatchLocator] = []

# Patches are not components themselves, but they are necessary to
# resolve pedigree for their non-patch parent package components
for package in packages:
if isinstance(package.parsed_locator, PatchLocator):
patch_locators.append(package.parsed_locator)
else:
package_mapping[package.parsed_locator] = package

component_resolver = _ComponentResolver(package_mapping, patch_locators, project, output_dir)
return [component_resolver.get_component(package) for package in package_mapping.values()]


Expand All @@ -192,11 +202,41 @@ class _CouldNotResolve(ValueError):

class _ComponentResolver:
def __init__(
self, package_mapping: Mapping[Locator, Package], project: Project, output_dir: RootedPath
self,
package_mapping: Mapping[Locator, Package],
patch_locators: list[PatchLocator],
project: Project,
output_dir: RootedPath,
) -> None:
self._project = project
self._output_dir = output_dir
self._package_mapping = package_mapping
self._pedigree_mapping = self._get_pedigree_mapping(patch_locators)

def _get_pedigree_mapping(self, patch_locators: list[PatchLocator]) -> dict[Locator, Pedigree]:
"""Map locators for dependencies that get patched to their Pedigree."""
pedigree_mapping: dict[Locator, Pedigree] = {}
for patch_locator in patch_locators:
# Filter out builtin patches
patch_paths: list[Path] = [p for p in patch_locator.patches if isinstance(p, Path)]
if not patch_paths:
continue

patched_package = self._get_patched_package(patch_locator)
pedigree = pedigree_mapping.setdefault(patched_package, Pedigree(patches=[]))
for patch_path in patch_paths:
patch_url = self._get_patch_url(patch_locator, patch_path)
pedigree.patches.append(Patch(type="unofficial", diff=PatchDiff(url=patch_url)))

return pedigree_mapping

def _get_patched_package(self, patch_locator: PatchLocator) -> Locator:
"""Return the non-patch parent package for a given patch locator."""
patched_locator = patch_locator.package
while isinstance(patched_locator, PatchLocator):
patched_locator = patched_locator.package

return patched_locator

def get_component(self, package: Package) -> Component:
"""Create an SBOM component for a yarn Package."""
Expand All @@ -217,6 +257,7 @@ def get_component(self, package: Package) -> Component:
name=resolved_package.name,
version=resolved_package.version,
purl=purl,
pedigree=self._pedigree_mapping.get(package.parsed_locator),
)

@staticmethod
Expand Down Expand Up @@ -262,10 +303,7 @@ def _generate_purl_for_package(package: _ResolvedPackage, project: Project) -> s
subpath = str(normalized.subpath_from_root)

elif isinstance(package.locator, PatchLocator):
# ignore patch locators
# the actual dependency that is patched is reported separately
# the patch itself will be reported via SBOM pedigree patches
pass
raise _CouldNotResolve("Patches cannot be resolved into Components")
else:
assert_never(package.locator)

Expand Down Expand Up @@ -329,23 +367,7 @@ def log_for_locator(msg: str, *args: Any, level: int = logging.DEBUG) -> None:
)
name, version = self._read_name_version_from_packjson(packjson)
elif isinstance(locator, PatchLocator):
if (
package.cache_path
# yarn info seems to always report the cache path for patch dependencies,
# but the path doesn't always exist
and (cache_path := self._cache_path_as_rooted(package.cache_path)).path.exists()
):
log_for_locator("reading package name from %s", cache_path.subpath_from_root)
name = self._read_name_from_cache(cache_path)
elif orig_package := self._package_mapping.get(locator.package):
log_for_locator("resolving the name of the original package")
name = self._resolve_package(orig_package).name
else:
raise _CouldNotResolve(
"the 'yarn info' output does not include either an existing zip archive "
"or the original unpatched package",
)
version = package.version
raise _CouldNotResolve("Patches cannot be resolved into Components")
else:
# This line can never be reached assuming type-checker checks are passing
# https://typing.readthedocs.io/en/latest/source/unreachable.html#assert-never-and-exhaustiveness-checking
Expand Down Expand Up @@ -412,3 +434,21 @@ def _cache_path_as_rooted(self, cache_path: str) -> RootedPath:
return self._project_subpath(cache_path)
else:
return self._output_dir.join_within_root(cache_path)

def _get_patch_url(self, patch_locator: PatchLocator, patch_path: Path) -> str:
"""Return a PURL-style VCS URL qualifier with subpath for a Patch."""
if patch_locator.locator is None:
raise UnsupportedFeature(
(
f"{patch_locator} is missing an associated workspace locator "
"and Cachi2 expects all non-builtin yarn patches to have one"
)
)

project_path = self._project.source_dir
workspace_path = patch_locator.locator.relpath
normalized = self._project.source_dir.join_within_root(workspace_path, patch_path)
repo_url = get_repo_id(project_path.root).as_vcs_url_qualifier()
subpath_from_root = str(normalized.subpath_from_root)

return f"{repo_url}#{subpath_from_root}"
48 changes: 36 additions & 12 deletions tests/integration/test_data/yarn_e2e_test/bom.json
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,16 @@
},
{
"name": "cachito-npm-without-deps",
"properties": [
{
"name": "cachi2:found_by",
"value": "cachi2"
}
],
"purl": "pkg:npm/cachito-npm-without-deps@1.0.0",
"type": "library",
"version": "1.0.0"
},
{
"name": "cachito-npm-without-deps",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#.yarn/patches/ccto-wo-deps-git@github.com-e0fce8c89c.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -662,6 +660,16 @@
},
{
"name": "fsevents",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/fsevents.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -961,6 +969,22 @@
},
{
"name": "left-pad",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/left-pad.patch"
},
"type": "unofficial"
},
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@70515793108df42547d3320c7ea4cd6b6e505c46#my-patches/left-pad-2.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down
48 changes: 36 additions & 12 deletions tests/integration/test_data/yarn_v4/bom.json
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,16 @@
},
{
"name": "cachito-npm-without-deps",
"properties": [
{
"name": "cachi2:found_by",
"value": "cachi2"
}
],
"purl": "pkg:npm/cachito-npm-without-deps@1.0.0",
"type": "library",
"version": "1.0.0"
},
{
"name": "cachito-npm-without-deps",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#.yarn/patches/ccto-wo-deps-git@github.com-e0fce8c89c.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -662,6 +660,16 @@
},
{
"name": "fsevents",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#my-patches/fsevents.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down Expand Up @@ -961,6 +969,22 @@
},
{
"name": "left-pad",
"pedigree": {
"patches": [
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#my-patches/left-pad.patch"
},
"type": "unofficial"
},
{
"diff": {
"url": "git+https://github.com/cachito-testing/cachi2-yarn-berry.git@53a2bfe8d5ee7ed9c2f752fe75831a881d54895f#my-patches/left-pad-2.patch"
},
"type": "unofficial"
}
]
},
"properties": [
{
"name": "cachi2:found_by",
Expand Down
Loading

0 comments on commit fb53a2e

Please sign in to comment.