Skip to content

Commit

Permalink
Refactor the dependency inclusion in SPDX output #1145
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Jul 31, 2024
1 parent 406069c commit bcc4221
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 48 deletions.
78 changes: 41 additions & 37 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3579,6 +3579,15 @@ class DiscoveredDependency(
system and application packages discovered in the code under analysis.
Dependencies are usually collected from parsed package data such as a package
manifest or lockfile.
This class manages dependencies with the following considerations:
1. A dependency can be associated with a Package via the "for_package" field.
In this case, it is termed a "Package's dependency". If there is no such
association, the dependency is considered a "Project's dependency".
2. A dependency can also be linked to a Package through the "resolved_to_package"
field. When this link exists, the dependency is considered "resolved".
"""

# Overrides the `project` field to set the proper `related_name`.
Expand Down Expand Up @@ -3729,6 +3738,18 @@ def datafile_path(self):
if self.datafile_resource:
return self.datafile_resource.path

@property
def is_project_dependency(self):
return not bool(self.for_package_id)

@property
def is_for_package(self):
return bool(self.for_package_id)

@property
def is_resolved_to_package(self):
return bool(self.resolved_to_package_id)

@classmethod
def create_from_data(
cls,
Expand All @@ -3752,51 +3773,34 @@ def create_from_data(
not stripped for `datafile_path`.
"""
dependency_data = dependency_data.copy()
required_fields = ["purl", "dependency_uid"]
missing_values = [
field_name
for field_name in required_fields
if not dependency_data.get(field_name)
]

if missing_values:
message = (
f"No values for the following required fields: "
f"{', '.join(missing_values)}"
)
project_packages_qs = project.discoveredpackages

project.add_warning(description=message, model=cls, details=dependency_data)
return
if not dependency_data.get("dependency_uid"):
dependency_data["dependency_uid"] = str(uuid.uuid4())

if not for_package:
for_package_uid = dependency_data.get("for_package_uid")
if for_package_uid:
for_package = project.discoveredpackages.get(
package_uid=for_package_uid
)
for_package_uid = dependency_data.get("for_package_uid")
if not for_package and for_package_uid:
for_package = project_packages_qs.get(package_uid=for_package_uid)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)
resolved_to_uid = dependency_data.get("resolved_to_uid")
if not resolved_to_package and resolved_to_uid:
resolved_to_package = project_packages_qs.get(package_uid=resolved_to_uid)

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
if strip_datafile_path_root:
segments = datafile_path.split("/")
datafile_path = "/".join(segments[1:])
datafile_resource = project.codebaseresources.get(path=datafile_path)
datafile_path = dependency_data.get("datafile_path")
if not datafile_resource and datafile_path:
if strip_datafile_path_root:
segments = datafile_path.split("/")
datafile_path = "/".join(segments[1:])
datafile_resource = project.codebaseresources.get(path=datafile_path)

if datasource_id:
dependency_data["datasource_id"] = datasource_id

# Set purl fields from `purl`
# Set package_url fields from the ``purl`` string.
purl = dependency_data.get("purl")
purl_mapping = PackageURL.from_string(purl).to_dict()
dependency_data.update(**purl_mapping)
if purl:
purl_data_dict = PackageURL.from_string(purl).to_dict()
dependency_data.update(**purl_data_dict)

cleaned_data = {
field_name: value
Expand Down Expand Up @@ -3830,7 +3834,7 @@ def populate_dependency_uuid(cls, dependency_data):
def spdx_id(self):
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"

def as_spdx(self):
def as_spdx_package(self):
"""Return this Dependency as an SPDX Package entry."""
from scanpipe.pipes import spdx

Expand Down
1 change: 0 additions & 1 deletion scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ def get_dependencies(project, dependency_data):
Given a `dependency_data` mapping, get a list of DiscoveredDependency objects
for that `project` with similar dependency data.
"""
dependency = None
dependency_uid = dependency_data.get("dependency_uid")
extracted_requirement = dependency_data.get("extracted_requirement") or ""

Expand Down
39 changes: 30 additions & 9 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,28 @@ def _get_spdx_extracted_licenses(license_expressions):
return extracted_licenses


def get_dependency_as_spdx_relationship(dependency, document_spdx_id, packages_as_spdx):
"""Return a spdx.Relationship crafted from the provided ``dependency`` instance."""
if dependency.is_for_package: # Package dependency
parent_id = dependency.for_package.spdx_id
else: # Project dependency
parent_id = document_spdx_id

if dependency.is_resolved_to_package: # Resolved to a Package
child_id = dependency.resolved_to_package.spdx_id
else: # Not resolved to a Package (only package_url value is available)
dependency_as_package = dependency.as_spdx_package()
packages_as_spdx.append(dependency_as_package)
child_id = dependency_as_package.spdx_id

spdx_relationship = spdx.Relationship(
spdx_id=child_id,
related_spdx_id=parent_id,
relationship="DEPENDENCY_OF",
)
return spdx_relationship


def to_spdx(project, include_files=False):
"""
Generate output for the provided ``project`` in SPDX document format.
Expand All @@ -540,6 +562,7 @@ def to_spdx(project, include_files=False):
discoveredpackage_qs = get_queryset(project, "discoveredpackage")
discovereddependency_qs = get_queryset(project, "discovereddependency")

document_spdx_id = f"SPDXRef-DOCUMENT-{project.uuid}"
packages_as_spdx = []
license_expressions = []
relationships = []
Expand All @@ -550,15 +573,12 @@ def to_spdx(project, include_files=False):
license_expressions.append(license_expression)

for dependency in discovereddependency_qs:
packages_as_spdx.append(dependency.as_spdx())
if dependency.for_package:
relationships.append(
spdx.Relationship(
spdx_id=dependency.spdx_id,
related_spdx_id=dependency.for_package.spdx_id,
relationship="DEPENDENCY_OF",
)
)
spdx_relationship = get_dependency_as_spdx_relationship(
dependency,
document_spdx_id,
packages_as_spdx,
)
relationships.append(spdx_relationship)

files_as_spdx = []
if include_files:
Expand All @@ -568,6 +588,7 @@ def to_spdx(project, include_files=False):
]

document = spdx.Document(
spdx_id=document_spdx_id,
name=f"scancodeio_{project.name}",
namespace=f"https://scancode.io/spdxdocs/{project.uuid}",
creation_info=spdx.CreationInfo(tool=f"ScanCode.io-{scancodeio_version}"),
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"spdxVersion": "SPDX-2.3",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"SPDXID": "SPDXRef-DOCUMENT-2f5f5927-2cad-4ecb-9043-fda5337bd501",
"name": "scancodeio_asgiref",
"documentNamespace": "https://scancode.io/spdxdocs/2f5f5927-2cad-4ecb-9043-fda5337bd501",
"creationInfo": {
Expand Down

0 comments on commit bcc4221

Please sign in to comment.