Skip to content

Commit

Permalink
Load CycloneDX SBOMs dependencies #1145 (#1344)
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <tdruez@nexb.com>
  • Loading branch information
tdruez authored Jul 30, 2024
1 parent 0654894 commit 71f3d45
Show file tree
Hide file tree
Showing 17 changed files with 377 additions and 12 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ v34.7.2 (unreleased)
"delivery".
https://github.com/nexB/scancode.io/issues/1325

- Add support for creating dependencies using the ``load_sboms`` pipeline on CycloneDX
SBOM inputs.
https://github.com/nexB/scancode.io/issues/1145

- Add a new Dependency view that renders the project dependencies as a tree.
https://github.com/nexB/scancode.io/issues/1145

v34.7.1 (2024-07-15)
--------------------

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ doc8:
@${ACTIVATE} doc8 --max-line-length 100 --ignore-path docs/_build/ --quiet docs/

valid:
@echo "-> Run Ruff linter"
@${ACTIVATE} ruff check --fix
@echo "-> Run Ruff format"
@${ACTIVATE} ruff format
@echo "-> Run Ruff linter"
@${ACTIVATE} ruff check --fix

check:
@echo "-> Run Ruff linter validation (pycodestyle, bandit, isort, and more)"
Expand Down
14 changes: 14 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2976,6 +2976,20 @@ def filter(self, *args, **kwargs):

return super().filter(*args, **kwargs)

def non_root_packages(self):
"""
Return packages that have at least one package parent.
Those are used as part of a ``Dependency.resolved_to`` FK.
"""
return self.filter(resolved_from_dependencies__isnull=False)

def root_packages(self):
"""
Return packages that are directly related to the Project.
Those packages are not used as part of a ``Dependency.resolved_to`` FK.
"""
return self.filter(resolved_from_dependencies__isnull=True)


class AbstractPackage(models.Model):
"""These fields should be kept in line with `packagedcode.models.PackageData`."""
Expand Down
7 changes: 6 additions & 1 deletion scanpipe/pipelines/load_sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def steps(cls):
cls.get_sbom_inputs,
cls.get_packages_from_sboms,
cls.create_packages_from_sboms,
cls.create_dependencies_from_sboms,
)

def get_sbom_inputs(self):
Expand All @@ -62,8 +63,12 @@ def get_packages_from_sboms(self):
)

def create_packages_from_sboms(self):
"""Create the packages and dependencies from the SBOM, in the database."""
"""Create the packages declared in the SBOMs."""
resolve.create_packages_and_dependencies(
project=self.project,
packages=self.packages,
)

def create_dependencies_from_sboms(self):
"""Create the dependency relationship declared in the SBOMs."""
resolve.create_dependencies_from_packages_extra_data(project=self.project)
2 changes: 1 addition & 1 deletion scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def update_or_create_dependency(
where Dependency data is imported from a scancode-toolkit scan, where the
root path segments are not stripped for `datafile_path`.
If the dependency is resolved and a resolved package is created, we have the
corresponsing package_uid at `resolved_to`.
corresponding package_uid at `resolved_to`.
"""
if ignore_dependency_scope(project, dependency_data):
return # Do not create the DiscoveredDependency record.
Expand Down
35 changes: 30 additions & 5 deletions scanpipe/pipes/cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,18 @@ def is_cyclonedx_bom(input_location):
return False


def cyclonedx_component_to_package_data(cdx_component):
def cyclonedx_component_to_package_data(cdx_component, dependencies=None):
"""Return package_data from CycloneDX component."""
dependencies = dependencies or {}
extra_data = {}

# Store the original bom_ref and dependencies for future processing.
bom_ref = str(cdx_component.bom_ref)
if bom_ref:
extra_data["bom_ref"] = bom_ref
if depends_on := dependencies.get(bom_ref):
extra_data["depends_on"] = depends_on

package_url_dict = {}
if cdx_component.purl:
package_url_dict = cdx_component.purl.to_dict(encode=True)
Expand Down Expand Up @@ -271,14 +279,15 @@ def is_empty(value):
return cyclonedx_document_json


def resolve_cyclonedx_packages(input_location):
"""Resolve the packages from the `input_location` CycloneDX document file."""
def get_bom_instance_from_file(input_location):
"""Return a Bom instance from the `input_location` CycloneDX document file."""
input_path = Path(input_location)
document_data = input_path.read_text()

if str(input_location).endswith(".xml"):
cyclonedx_document = SafeElementTree.fromstring(document_data)
cyclonedx_bom = Bom.from_xml(cyclonedx_document)
return cyclonedx_bom

elif str(input_location).endswith(".json"):
cyclonedx_document = json.loads(document_data)
Expand All @@ -294,9 +303,25 @@ def resolve_cyclonedx_packages(input_location):
raise ValueError(error_msg)

cyclonedx_bom = Bom.from_json(data=cyclonedx_document)
return cyclonedx_bom

else:

def resolve_cyclonedx_packages(input_location):
"""Resolve the packages from the `input_location` CycloneDX document file."""
cyclonedx_bom = get_bom_instance_from_file(input_location)
if not cyclonedx_bom:
return []

components = get_components(cyclonedx_bom)
return [cyclonedx_component_to_package_data(component) for component in components]

# Store the ``bom_ref`` and the ``depends_on`` values on the extra_data field for
# the dependency resolution that take place after the package creation.
dependencies = defaultdict(list)
for entry in cyclonedx_bom.dependencies:
if depends_on := [str(dep.ref) for dep in entry.dependencies]:
dependencies[str(entry.ref)].extend(depends_on)

return [
cyclonedx_component_to_package_data(component, dependencies)
for component in components
]
51 changes: 51 additions & 0 deletions scanpipe/pipes/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,19 @@

import json
import sys
import uuid
from pathlib import Path

from django.core.exceptions import MultipleObjectsReturned
from django.core.exceptions import ObjectDoesNotExist

from attributecode.model import About
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
from packagedcode.licensing import get_license_detections_and_expression
from packageurl import PackageURL
from python_inspector.api import resolve_dependencies

from scanpipe.models import DiscoveredDependency
from scanpipe.models import DiscoveredPackage
from scanpipe.pipes import cyclonedx
from scanpipe.pipes import flag
Expand Down Expand Up @@ -108,6 +113,52 @@ def create_packages_and_dependencies(project, packages, resolved=False):
update_or_create_dependency(project, dependency_data)


def create_dependencies_from_packages_extra_data(project):
"""
Create Dependency objects from the Package extra_data values.
The Package instances need to be saved first in the database before creating the
Dependency objects.
The dependencies declared in the SBOM are stored on the Package.extra_data field
and resolved as Dependency objects in this function.
"""
project_packages = project.discoveredpackages.all()
created_count = 0

packages_with_depends_on = project_packages.filter(
extra_data__has_key="depends_on"
).prefetch_related("codebase_resources")

for for_package in packages_with_depends_on:
datafile_resource = None
codebase_resources = for_package.codebase_resources.all()
if len(codebase_resources) == 1:
datafile_resource = codebase_resources[0]

for bom_ref in for_package.extra_data.get("depends_on", []):
try:
resolved_to_package = project_packages.get(extra_data__bom_ref=bom_ref)
except (ObjectDoesNotExist, MultipleObjectsReturned):
project.add_error(
description=f"Could not find resolved_to package entry: {bom_ref}.",
model="create_dependencies",
)
continue

DiscoveredDependency.objects.create(
project=project,
dependency_uid=str(uuid.uuid4()),
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
is_runtime=True,
is_resolved=True,
is_direct=True,
)
created_count += 1

return created_count


def get_packages_from_manifest(input_location, package_registry=None):
"""
Resolve packages or get packages data from a package manifest file/
Expand Down
6 changes: 6 additions & 0 deletions scanpipe/templates/scanpipe/dependency_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
</div>

<div class="container is-fluid mb-3">
<a href="{% url 'project_dependency_tree' project.slug %}" class="is-pulled-right">
<span class="icon">
<i class="fa-solid fa-sitemap"></i>
</span>
<span>View the dependency tree</span>
</a>
<table class="table is-bordered is-striped is-narrow is-hoverable is-fullwidth">
{% include 'scanpipe/includes/list_view_thead.html' %}
<tbody>
Expand Down
76 changes: 76 additions & 0 deletions scanpipe/templates/scanpipe/project_dependency_tree.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{% extends "scanpipe/base.html" %}

{% block title %}ScanCode.io: {{ project.name }} - Dependency tree{% endblock %}

{% block content %}
<div id="content-header" class="container is-max-widescreen mb-3">
{% include 'scanpipe/includes/navbar_header.html' %}
<section class="mx-5">
<div class="is-flex is-justify-content-space-between">
{% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Dependency tree" %}
</div>
</section>
</div>

<div class="container is-max-widescreen mb-3">
{% if recursion_error %}
<article class="message is-danger">
<div class="message-body">
The dependency tree cannot be rendered as it contains circular references.
{{ message|linebreaksbr }}
</div>
</article>
{% endif %}
<div id="tree"></div>
</div>
{% endblock %}

{% block scripts %}
<script src="https://d3js.org/d3.v7.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6"></script>
{{ dependency_tree|json_script:"dependency_tree" }}
{{ row_count|json_script:"row_count" }}
{{ max_depth|json_script:"max_depth" }}
<script>
const data = JSON.parse(document.getElementById("dependency_tree").textContent);
const hierarchyData = d3.hierarchy(data);
const columnWidth = 110;
const rowWidth = 25;
const columnCount = hierarchyData.height;
const rowCount = hierarchyData.links().length;
const width = columnWidth * (columnCount + 1);
const height = rowWidth * (rowCount + 1);

function indent() {
return (root) => {
root.eachBefore((node, i) => {
node.y = node.depth;
node.x = i;
});
};
}

// https://observablehq.com/plot/marks/tree
const plot = Plot.plot({
axis: null,
margin: 10,
marginLeft: 40,
marginRight: 160,
width: width,
height: height,
marks: [
Plot.tree(hierarchyData.leaves(), {
path: (node) => node.ancestors().reverse().map(({ data: { name } }) => name).join("|"),
delimiter: "|",
treeLayout: indent,
strokeWidth: 1,
curve: "step-before",
fontSize: 14,
textStroke: "none"
})
]
});

document.getElementById("tree").appendChild(plot);
</script>
{% endblock %}
35 changes: 34 additions & 1 deletion scanpipe/tests/pipes/test_cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def test_scanpipe_cyclonedx_component_to_package_data(self):
"extracted_license_statement": "OFL-1.1\nApache-2.0",
"version": "0.10.2",
"extra_data": {
"bom_ref": "pkg:pypi/toml@0.10.2?extension=tar.gz",
"externalReferences": {
"advisories": ["https://cyclonedx.org/advisories"],
"bom": ["https://cyclonedx.org/bom"],
Expand Down Expand Up @@ -198,6 +199,22 @@ def test_scanpipe_cyclonedx_component_to_package_data_encoded_purl_name(self):
expected = {"name": "a:/b:name", "version": "1.0", "type": "type"}
self.assertEqual(expected, package_data)

def test_scanpipe_cyclonedx_get_bom_instance_from_file(self):
input_location = self.data / "missing_schema.json"
with self.assertRaises(ValueError) as cm:
cyclonedx.get_bom_instance_from_file(input_location)
expected_error = (
'CycloneDX document "missing_schema.json" is not valid:\n'
"Additional properties are not allowed ('invalid_entry' was unexpected)"
)
self.assertIn(expected_error, str(cm.exception))

input_location = self.data / "laravel-7.12.0" / "bom.1.4.json"
bom = cyclonedx.get_bom_instance_from_file(input_location)
self.assertIsInstance(bom, Bom)
self.assertEqual(62, len(bom.components))
self.assertEqual(63, len(bom.dependencies))

def test_scanpipe_cyclonedx_resolve_cyclonedx_packages(self):
input_location = self.data / "missing_schema.json"
with self.assertRaises(ValueError) as cm:
Expand Down Expand Up @@ -236,12 +253,28 @@ def test_scanpipe_cyclonedx_resolve_cyclonedx_packages(self):
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
self.assertEqual(62, len(packages))

def test_scanpipe_cyclonedx_resolve_cyclonedx_packages_dependencies(self):
input_location = self.data / "laravel-7.12.0" / "bom.1.4.json"
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
self.assertEqual(62, len(packages))

extra_data = packages[0]["extra_data"]
self.assertEqual("asm89/stack-cors-1.3.0.0", extra_data["bom_ref"])
expected_depends_on = [
"symfony/http-foundation-5.4.16.0",
"symfony/http-kernel-5.4.16.0",
]
self.assertEqual(expected_depends_on, extra_data["depends_on"])

def test_scanpipe_cyclonedx_resolve_cyclonedx_packages_pre_validation(self):
# This SBOM includes multiple deserialization issues that are "fixed"
# by the pre-validation cleanup.
input_location = self.data / "broken_sbom.json"
package_data = cyclonedx.resolve_cyclonedx_packages(input_location)
self.assertEqual([{"name": "asgiref"}], package_data)
self.assertEqual(
[{"extra_data": {"bom_ref": "pkg:pypi/asgiref@3.3.0"}, "name": "asgiref"}],
package_data,
)

def test_scanpipe_cyclonedx_cleanup_components_properties(self):
cyclonedx_document_json = {
Expand Down
7 changes: 5 additions & 2 deletions scanpipe/tests/pipes/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,10 @@ def test_scanpipe_pipes_outputs_get_cyclonedx_bom_dependency_tree(self):
a = make_package(project, "pkg:type/a")
b = make_package(project, "pkg:type/b")
c = make_package(project, "pkg:type/c")
make_package(project, "pkg:type/z")

# A -> B -> C
# Project -> A -> B -> C
# Project -> Z
make_dependency(project, for_package=a, resolved_to_package=b)
make_dependency(project, for_package=b, resolved_to_package=c)

Expand All @@ -296,12 +298,13 @@ def test_scanpipe_pipes_outputs_get_cyclonedx_bom_dependency_tree(self):

expected = [
{
"dependsOn": ["pkg:type/a", "pkg:type/b", "pkg:type/c"],
"dependsOn": ["pkg:type/a", "pkg:type/b", "pkg:type/c", "pkg:type/z"],
"ref": str(project.uuid),
},
{"dependsOn": ["pkg:type/b"], "ref": "pkg:type/a"},
{"dependsOn": ["pkg:type/c"], "ref": "pkg:type/b"},
{"ref": "pkg:type/c"},
{"ref": "pkg:type/z"},
]
self.assertEqual(expected, results_json["dependencies"])

Expand Down
Loading

0 comments on commit 71f3d45

Please sign in to comment.