Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SPDX format support for SBOM #608

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 2 additions & 123 deletions cachi2/core/models/property_semantics.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import functools
from dataclasses import dataclass, field
from itertools import groupby
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple
from typing import TYPE_CHECKING, Iterable, Optional

if TYPE_CHECKING:
from typing_extensions import Self, assert_never

from cachi2.core.models.sbom import Component, Property, SPDXPackage, SPDXRelation
from cachi2.core.models.sbom import Component, Property


def merge_component_properties(components: Iterable[Component]) -> list[Component]:
Expand Down Expand Up @@ -101,124 +101,3 @@ def merge(self, other: "Self") -> "Self":
pip_package_binary=self.pip_package_binary or other.pip_package_binary,
bundler_package_binary=self.bundler_package_binary or other.bundler_package_binary,
)


def merge_relationships(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels really weird for a reviewer to have to read "initial implementation", trying to get a grasp of it only to realize a few commits later in the same patch series that much of it is simply dropped.
My guess is that this commit being proposed standalone is supposed to be a way of giving credit to the original author of "initial implementation" commit? Be it as it may and while a noble gesture, strictly from review POV these line dropping changes should be squashed to the original commit to reduce the confusion and declutter the overall diff.
The common practice is to just use a bunch of Signed-off-bys with every contributor's name/email. It's not like you need to take the authorship away from commit 1. I believe readability of the changes in such a complex topic as adding SPDX support should take precedence over any noble gestures in the name of collaboration :).

relationships_list: List[List[SPDXRelation]], doc_ids: List[str], packages: List[SPDXPackage]
) -> Tuple[List[SPDXRelation], List[SPDXPackage]]:
"""Merge SPDX relationships.
Function takes relationships lists, list of spdx document ids and unified list of packages.
For all relationships lists, map and inverse map of relations are created.
These maps are used to find root elements of the SPDX document.
For relationhips lists, map and inverse map of relations are created. SPDX document usually
contains root package containing all real packages. Root element is found by searching
through map and inverse map of relationships. Element which has entry in map containing
other elements and has entry in inverse map containing entry pointing to root element is
considered as root element.
packages are searched in the relationships and their ID is stored as middle element.
"""

def map_relationships(
relationships: List[SPDXRelation],
) -> Tuple[Dict[str, List[str]], Dict[str, str]]:
"""Return (map and inverse map) for given relationships.
Map is where key is SPDXID of element in relationship which is refered by spdxElementId
and value is list of elements refered by relatedSpdxElement in the relationship with the
element.
Inverse map is opposite of map where key is relatedSpdxElement and value is spdxElementId.
"""
relations_map: Dict[str, List[str]] = {}
inverse_map: Dict[str, str] = {}

for rel in relationships:
spdx_id, related_spdx = rel.spdxElementId, rel.relatedSpdxElement
relations_map.setdefault(spdx_id, []).append(related_spdx)
inverse_map[related_spdx] = spdx_id

return relations_map, inverse_map

def process_relation(
rel: SPDXRelation,
doc_main: Optional[str],
doc_other: Optional[str],
root_package_main: str,
root_package_other: Optional[str],
merged_relationships: List[SPDXRelation],
) -> None:
"""Process a single SPDX relationship.
Add relatationship to merged relationships list while replacing spdxElementId and
relatedSpdxElement with id of primary root package if original elements refers to
other root package.
Relationship is added only if it refers to package in the list of packages.
"""
new_rel = SPDXRelation(
spdxElementId=(
root_package_main if rel.spdxElementId == root_package_other else rel.spdxElementId
),
relatedSpdxElement=(
doc_main if rel.relatedSpdxElement == root_package_other else rel.relatedSpdxElement
),
relationshipType=rel.relationshipType,
)
if new_rel.spdxElementId == root_package_other:
new_rel.spdxElementId = root_package_main
if new_rel.spdxElementId in package_ids or new_rel.relatedSpdxElement in package_ids:
merged_relationships.append(new_rel)

package_ids = {pkg.SPDXID for pkg in packages}
_packages = packages[:]
maps = []
inv_maps = []
root_package_ids = []
for relationships in relationships_list:
_map, inv_map = map_relationships(relationships)
maps.append(_map)
inv_maps.append(inv_map)

for _map, _inv_map, doc_id in zip(maps, inv_maps, doc_ids):
root_package_id = next((r for r, c in _map.items() if _inv_map.get(r) == doc_id), None)
root_package_ids.append(root_package_id)

merged_relationships = []

root_package_main = root_package_ids[0]
if not root_package_main:
_packages.append(
SPDXPackage(
SPDXID="SPDXRef-DocumentRoot-File-",
name="",
)
)
root_package_main = "SPDXRef-DocumentRoot-File-"
merged_relationships.append(
SPDXRelation(
spdxElementId=doc_ids[0],
relatedSpdxElement=root_package_main,
relationshipType="DESCRIBES",
)
)

doc_main = doc_ids[0]

for relationships, doc_id, root_package_id in zip(
relationships_list, doc_ids, root_package_ids
):
for rel in relationships:
process_relation(
rel, doc_main, doc_id, root_package_main, root_package_id, merged_relationships
)

# Remove root packages of other elements from the list of packages
for _root_package in root_package_ids[1:]:
found_root_packages: List[Optional[SPDXPackage]] = [
x for x in _packages if x.SPDXID == _root_package
]
root_package: Optional[SPDXPackage] = (found_root_packages or [None])[0]
if root_package:
_packages.pop(_packages.index(root_package))
return merged_relationships, _packages
Loading