Skip to content

Commit

Permalink
Add a "Improve Packages from PurlDB" action in the Product details view
Browse files Browse the repository at this point in the history
#45

Signed-off-by: tdruez <tdruez@nexb.com>
  • Loading branch information
tdruez authored Sep 2, 2024
1 parent 45cc6ba commit a8cb6f8
Show file tree
Hide file tree
Showing 14 changed files with 409 additions and 26 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ Release notes
- Add a Vulnerabilities tab in the Product details view.
https://github.com/aboutcode-org/dejacode/issues/95

- Add a "Improve Packages from PurlDB" action in the Product details view.
https://github.com/aboutcode-org/dejacode/issues/45

### Version 5.1.0

- Upgrade Python version to 3.12 and Django to 5.0.x
Expand Down
24 changes: 21 additions & 3 deletions component_catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2490,7 +2490,7 @@ def create_from_url(cls, url, user):
package = cls.create_from_data(user, package_data)
return package

def get_purldb_entries(self, user, max_request_call=0, timeout=None):
def get_purldb_entries(self, user, max_request_call=0, timeout=10):
"""
Return the PurlDB entries that correspond to this Package instance.
Expand All @@ -2515,14 +2515,32 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=None):
if self.download_url:
payloads.append({"download_url": self.download_url})

purldb = PurlDB(user.dataspace)
for index, payload in enumerate(payloads):
if max_request_call and index >= max_request_call:
return

packages_data = PurlDB(user.dataspace).find_packages(payload, timeout)
if packages_data:
if packages_data := purldb.find_packages(payload, timeout):
return packages_data

def update_from_purldb(self, user):
"""
Find this Package in the PurlDB and update empty fields with PurlDB data
when available.
"""
purldb_entries = self.get_purldb_entries(user)
if not purldb_entries:
return

package_data = purldb_entries[0]
# The format from PURLDB is "2019-11-18T00:00:00Z"
if release_date := package_data.get("release_date"):
package_data["release_date"] = release_date.split("T")[0]
package_data["license_expression"] = package_data.get("declared_license_expression")

updated_fields = self.update_from_data(user, package_data, override=False)
return updated_fields


class PackageAssignedLicense(DataspacedModel):
package = models.ForeignKey(
Expand Down
60 changes: 53 additions & 7 deletions component_catalog/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,22 +1670,22 @@ def test_package_model_update_from_data(self):
self.assertEqual([], updated_fields)

new_data = {
"name": "new name",
"version": "1.0",
"filename": "new_filename",
"notes": "Some notes",
"unknown_field": "value",
}
updated_fields = package.update_from_data(self.user, data=new_data)
self.assertEqual(["version"], updated_fields)
self.assertEqual(["notes"], updated_fields)
package.refresh_from_db()
# Already has a value, not updated
self.assertEqual("name", package.name)
self.assertEqual("package.zip", package.filename)
# Empty field, updated
self.assertEqual(new_data["version"], package.version)
self.assertEqual(new_data["notes"], package.notes)

updated_fields = package.update_from_data(self.user, data=new_data, override=True)
self.assertEqual(["name"], updated_fields)
self.assertEqual(["filename"], updated_fields)
package.refresh_from_db()
self.assertEqual(new_data["name"], package.name)
self.assertEqual(new_data["filename"], package.filename)

@mock.patch("component_catalog.models.collect_package_data")
def test_package_model_create_from_url(self, mock_collect):
Expand Down Expand Up @@ -2523,6 +2523,52 @@ def test_package_model_inferred_url_property(self):
expected = "https://github.com/package-url/packageurl-python/tree/v0.10.4"
self.assertEqual(expected, package1.inferred_url)

@mock.patch("component_catalog.models.Package.get_purldb_entries")
def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
purldb_entry = {
"uuid": "326aa7a8-4f28-406d-89f9-c1404916925b",
"purl": "pkg:pypi/django@3.0",
"type": "pypi",
"name": "django",
"version": "3.0",
"primary_language": "Python",
"description": "Description",
"release_date": "2019-11-18T00:00:00Z",
"parties": [],
"keywords": ["Keyword1", "Keyword2"],
"download_url": "https://files.pythonhosted.org/packages/38/Django-3.0.tar.gz",
"sha1": "96ae8d8dd673d4fc92ce2cb2df9cdab6f6fd7d9f",
"sha256": "0a1efde1b685a6c30999ba00902f23613cf5db864c5a1532d2edf3eda7896a37",
"copyright": "(c) Copyright",
"declared_license_expression": "(bsd-simplified AND bsd-new) AND unknown",
}

mock_get_purldb_entries.return_value = [purldb_entry]
package1 = Package.objects.create(filename="package", dataspace=self.dataspace)
updated_fields = package1.update_from_purldb(self.user)
# Note: PURL fields are never updated.
expected = [
"primary_language",
"description",
"release_date",
"keywords",
"download_url",
"sha1",
"sha256",
"copyright",
"declared_license_expression",
"license_expression",
]
self.assertEqual(expected, updated_fields)

package1.refresh_from_db()
# Handle release_date separatly
updated_fields.remove("release_date")
self.assertEqual(purldb_entry["release_date"], str(package1.release_date))

for field_name in updated_fields:
self.assertEqual(purldb_entry[field_name], getattr(package1, field_name))

def test_package_model_vulnerability_queryset_mixin(self):
package1 = make_package(self.dataspace, is_vulnerable=True)
package2 = make_package(self.dataspace)
Expand Down
4 changes: 2 additions & 2 deletions component_catalog/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3364,7 +3364,7 @@ def test_component_catalog_package_add_view_initial_data(

puyrldb_entry = {
"filename": "abbot-1.4.0.jar",
"release_date": "2015-09-22",
"release_date": "2010-05-24T00:00:00Z",
"type": "maven",
"namespace": "abbot",
"name": "abbot",
Expand All @@ -3388,7 +3388,7 @@ def test_component_catalog_package_add_view_initial_data(
expected = {
"filename": "abbot-1.4.0.jar",
"keywords": ["keyword1", "keyword2"],
"release_date": "2015-09-22",
"release_date": "2010-05-24T00:00:00Z",
"type": "maven",
"namespace": "abbot",
"name": "abbot",
Expand Down
10 changes: 9 additions & 1 deletion dje/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
" either nexB master reference data or installation-specific data."
)

# PackageURL._fields
PURL_FIELDS = ("type", "namespace", "name", "version", "qualifiers", "subpath")


def is_dataspace_related(model_class):
"""
Expand Down Expand Up @@ -779,7 +782,12 @@ def update_from_data(self, user, data, override=False):
updated_fields = []

for field_name, value in data.items():
if value in EMPTY_VALUES or field_name not in model_fields:
skip_reasons = [
value in EMPTY_VALUES,
field_name not in model_fields,
field_name in PURL_FIELDS,
]
if any(skip_reasons):
continue

current_value = getattr(self, field_name, None)
Expand Down
81 changes: 77 additions & 4 deletions dje/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from django.template.defaultfilters import pluralize

from django_rq import job
from guardian.shortcuts import get_perms as guardian_get_perms
from notifications.signals import notify

from dejacode_toolkit.scancodeio import ScanCodeIO
from dje.utils import is_available
Expand Down Expand Up @@ -92,16 +94,17 @@ def scancodeio_submit_scan(uris, user_uuid, dataspace_uuid):
Submit the provided `uris` to ScanCode.io as an asynchronous task.
Only publicly available URLs are sent to ScanCode.io.
"""
from dje.models import DejacodeUser

logger.info(
f"Entering scancodeio_submit_scan task with "
f"uris={uris} user_uuid={user_uuid} dataspace_uuid={dataspace_uuid}"
)

DejacodeUser = apps.get_model("dje", "DejacodeUser")

try:
user = DejacodeUser.objects.get(uuid=user_uuid, dataspace__uuid=dataspace_uuid)
except ObjectDoesNotExist:
logger.error(f"[scancodeio_submit_scan]: User uuid={user_uuid} does not exists.")
return

if not isinstance(uris, list):
Expand All @@ -118,20 +121,20 @@ def scancodeio_submit_scan(uris, user_uuid, dataspace_uuid):
@job
def scancodeio_submit_project(scancodeproject_uuid, user_uuid, pipeline_name):
"""Submit the provided SBOM file to ScanCode.io as an asynchronous task."""
from dje.models import DejacodeUser

logger.info(
f"Entering scancodeio_submit_project task with "
f"scancodeproject_uuid={scancodeproject_uuid} user_uuid={user_uuid} "
f"pipeline_name={pipeline_name}"
)

DejacodeUser = apps.get_model("dje", "DejacodeUser")
ScanCodeProject = apps.get_model("product_portfolio", "scancodeproject")
scancode_project = ScanCodeProject.objects.get(uuid=scancodeproject_uuid)

try:
user = DejacodeUser.objects.get(uuid=user_uuid)
except ObjectDoesNotExist:
logger.error(f"[scancodeio_submit_project]: User uuid={user_uuid} does not exists.")
return

scancodeio = ScanCodeIO(user.dataspace)
Expand Down Expand Up @@ -228,6 +231,76 @@ def pull_project_data_from_scancodeio(scancodeproject_uuid):
scancode_project.notify(verb=notification_verb, description=description)


@job("default", timeout=1200)
def improve_packages_from_purldb(product_uuid, user_uuid):
logger.info(
f"Entering improve_packages_from_purldb task with "
f"product_uuid={product_uuid} user_uuid={user_uuid}"
)

DejacodeUser = apps.get_model("dje", "DejacodeUser")
History = apps.get_model("dje", "History")
Product = apps.get_model("product_portfolio", "product")
ScanCodeProject = apps.get_model("product_portfolio", "scancodeproject")

try:
user = DejacodeUser.objects.get(uuid=user_uuid)
except ObjectDoesNotExist:
logger.error(f"[improve_packages_from_purldb]: User uuid={user_uuid} does not exists.")
return

try:
product = Product.objects.get_queryset(user).get(uuid=product_uuid)
except ObjectDoesNotExist:
logger.error(
f"[improve_packages_from_purldb]: Product uuid={product_uuid} does not exists."
)
return

perms = guardian_get_perms(user, product)
has_change_permission = "change_product" in perms
if not has_change_permission:
logger.error("[improve_packages_from_purldb]: Permission denied.")
return

scancode_project = ScanCodeProject.objects.create(
product=product,
dataspace=product.dataspace,
type=ScanCodeProject.ProjectType.IMPROVE_FROM_PURLDB,
status=ScanCodeProject.Status.IMPORT_STARTED,
created_by=user,
)

try:
updated_packages = product.improve_packages_from_purldb(user)
except Exception as e:
scancode_project.update(
status=ScanCodeProject.Status.FAILURE,
import_log=str(e),
)

logger.info(f"[improve_packages_from_purldb]: {len(updated_packages)} updated from PurlDB.")
verb = "Improved packages from PurlDB:"
if updated_packages:
description = ", ".join([str(package) for package in updated_packages])
History.log_change(user, product, message=f"{verb} {description}")
else:
description = "No packages updated from PurlDB data."

scancode_project.update(
status=ScanCodeProject.Status.SUCCESS,
import_log=[verb, description],
)

notify.send(
sender=user,
verb=verb,
action_object=product,
recipient=user,
description=description,
)


@job("default", timeout="3h")
def update_vulnerabilities():
"""Fetch vulnerabilities for all Dataspaces that enable vulnerablecodedb access."""
Expand Down
18 changes: 18 additions & 0 deletions product_portfolio/migrations/0007_alter_scancodeproject_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.0.6 on 2024-09-02 15:24

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('product_portfolio', '0006_productdependency'),
]

operations = [
migrations.AlterField(
model_name='scancodeproject',
name='type',
field=models.CharField(choices=[('IMPORT_FROM_MANIFEST', 'Import from Manifest'), ('LOAD_SBOMS', 'Load SBOMs'), ('PULL_FROM_SCANCODEIO', 'Pull from ScanCode.io'), ('IMPROVE_FROM_PURLDB', 'Improve from PurlDB')], db_index=True, help_text='The type of import, for the ProjectType choices.', max_length=50),
),
]
24 changes: 24 additions & 0 deletions product_portfolio/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,9 @@ def get_import_manifests_url(self):
def get_pull_project_data_url(self):
return self.get_url("pull_project_data")

def get_improve_packages_from_purldb_url(self):
return self.get_url("improve_packages_from_purldb")

def can_be_changed_by(self, user):
perms = guardian.shortcuts.get_perms(user, self)
has_change_permission_on_product = "change_product" in perms
Expand Down Expand Up @@ -492,6 +495,15 @@ def scan_all_packages_task(self, user):
dataspace_uuid=user.dataspace.uuid,
)

def improve_packages_from_purldb(self, user):
"""Update all Packages assigned to the Product using PurlDB data."""
updated_packages = []
for package in self.packages.all():
updated_fields = package.update_from_purldb(user)
if updated_fields:
updated_packages.append(package)
return updated_packages

def fetch_vulnerabilities(self):
"""Fetch and update the vulnerabilties of all the Package of this Product."""
return fetch_for_queryset(self.all_packages, self.dataspace)
Expand Down Expand Up @@ -1204,13 +1216,23 @@ def generate_input_file_path(instance, filename):
return f"{dataspace}/scancode_project/{instance.uuid}/{filename}"


class ScanCodeProjectQuerySet(ProductSecuredQuerySet):
def in_progress(self):
in_progress_statuses = [
ScanCodeProject.Status.SUBMITTED,
ScanCodeProject.Status.IMPORT_STARTED,
]
return self.filter(status__in=in_progress_statuses)


class ScanCodeProject(HistoryFieldsMixin, DataspacedModel):
"""Wrap a ScanCode.io Project."""

class ProjectType(models.TextChoices):
IMPORT_FROM_MANIFEST = "IMPORT_FROM_MANIFEST", _("Import from Manifest")
LOAD_SBOMS = "LOAD_SBOMS", _("Load SBOMs")
PULL_FROM_SCANCODEIO = "PULL_FROM_SCANCODEIO", _("Pull from ScanCode.io")
IMPROVE_FROM_PURLDB = "IMPROVE_FROM_PURLDB", _("Improve from PurlDB")

class Status(models.TextChoices):
SUBMITTED = "submitted"
Expand Down Expand Up @@ -1266,6 +1288,8 @@ class Status(models.TextChoices):
default=dict,
)

objects = DataspacedManager.from_queryset(ScanCodeProjectQuerySet)()

class Meta:
unique_together = ("dataspace", "uuid")
ordering = ["-created_date"]
Expand Down
Loading

0 comments on commit a8cb6f8

Please sign in to comment.