diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9af9b172..1e1e24da 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -107,6 +107,9 @@ Release notes - Add a Vulnerabilities tab in the Product details view. https://github.com/aboutcode-org/dejacode/issues/95 +- Add a "Improve Packages from PurlDB" action in the Product details view. + https://github.com/aboutcode-org/dejacode/issues/45 + ### Version 5.1.0 - Upgrade Python version to 3.12 and Django to 5.0.x diff --git a/component_catalog/models.py b/component_catalog/models.py index a01bb891..eb099fc9 100644 --- a/component_catalog/models.py +++ b/component_catalog/models.py @@ -2490,7 +2490,7 @@ def create_from_url(cls, url, user): package = cls.create_from_data(user, package_data) return package - def get_purldb_entries(self, user, max_request_call=0, timeout=None): + def get_purldb_entries(self, user, max_request_call=0, timeout=10): """ Return the PurlDB entries that correspond to this Package instance. @@ -2515,14 +2515,32 @@ def get_purldb_entries(self, user, max_request_call=0, timeout=None): if self.download_url: payloads.append({"download_url": self.download_url}) + purldb = PurlDB(user.dataspace) for index, payload in enumerate(payloads): if max_request_call and index >= max_request_call: return - packages_data = PurlDB(user.dataspace).find_packages(payload, timeout) - if packages_data: + if packages_data := purldb.find_packages(payload, timeout): return packages_data + def update_from_purldb(self, user): + """ + Find this Package in the PurlDB and update empty fields with PurlDB data + when available. + """ + purldb_entries = self.get_purldb_entries(user) + if not purldb_entries: + return + + package_data = purldb_entries[0] + # The format from PURLDB is "2019-11-18T00:00:00Z" + if release_date := package_data.get("release_date"): + package_data["release_date"] = release_date.split("T")[0] + package_data["license_expression"] = package_data.get("declared_license_expression") + + updated_fields = self.update_from_data(user, package_data, override=False) + return updated_fields + class PackageAssignedLicense(DataspacedModel): package = models.ForeignKey( diff --git a/component_catalog/tests/test_models.py b/component_catalog/tests/test_models.py index 1cdb302e..98ead008 100644 --- a/component_catalog/tests/test_models.py +++ b/component_catalog/tests/test_models.py @@ -1670,22 +1670,22 @@ def test_package_model_update_from_data(self): self.assertEqual([], updated_fields) new_data = { - "name": "new name", - "version": "1.0", + "filename": "new_filename", + "notes": "Some notes", "unknown_field": "value", } updated_fields = package.update_from_data(self.user, data=new_data) - self.assertEqual(["version"], updated_fields) + self.assertEqual(["notes"], updated_fields) package.refresh_from_db() # Already has a value, not updated - self.assertEqual("name", package.name) + self.assertEqual("package.zip", package.filename) # Empty field, updated - self.assertEqual(new_data["version"], package.version) + self.assertEqual(new_data["notes"], package.notes) updated_fields = package.update_from_data(self.user, data=new_data, override=True) - self.assertEqual(["name"], updated_fields) + self.assertEqual(["filename"], updated_fields) package.refresh_from_db() - self.assertEqual(new_data["name"], package.name) + self.assertEqual(new_data["filename"], package.filename) @mock.patch("component_catalog.models.collect_package_data") def test_package_model_create_from_url(self, mock_collect): @@ -2523,6 +2523,52 @@ def test_package_model_inferred_url_property(self): expected = "https://github.com/package-url/packageurl-python/tree/v0.10.4" self.assertEqual(expected, package1.inferred_url) + @mock.patch("component_catalog.models.Package.get_purldb_entries") + def test_package_model_update_from_purldb(self, mock_get_purldb_entries): + purldb_entry = { + "uuid": "326aa7a8-4f28-406d-89f9-c1404916925b", + "purl": "pkg:pypi/django@3.0", + "type": "pypi", + "name": "django", + "version": "3.0", + "primary_language": "Python", + "description": "Description", + "release_date": "2019-11-18T00:00:00Z", + "parties": [], + "keywords": ["Keyword1", "Keyword2"], + "download_url": "https://files.pythonhosted.org/packages/38/Django-3.0.tar.gz", + "sha1": "96ae8d8dd673d4fc92ce2cb2df9cdab6f6fd7d9f", + "sha256": "0a1efde1b685a6c30999ba00902f23613cf5db864c5a1532d2edf3eda7896a37", + "copyright": "(c) Copyright", + "declared_license_expression": "(bsd-simplified AND bsd-new) AND unknown", + } + + mock_get_purldb_entries.return_value = [purldb_entry] + package1 = Package.objects.create(filename="package", dataspace=self.dataspace) + updated_fields = package1.update_from_purldb(self.user) + # Note: PURL fields are never updated. + expected = [ + "primary_language", + "description", + "release_date", + "keywords", + "download_url", + "sha1", + "sha256", + "copyright", + "declared_license_expression", + "license_expression", + ] + self.assertEqual(expected, updated_fields) + + package1.refresh_from_db() + # Handle release_date separatly + updated_fields.remove("release_date") + self.assertEqual(purldb_entry["release_date"], str(package1.release_date)) + + for field_name in updated_fields: + self.assertEqual(purldb_entry[field_name], getattr(package1, field_name)) + def test_package_model_vulnerability_queryset_mixin(self): package1 = make_package(self.dataspace, is_vulnerable=True) package2 = make_package(self.dataspace) diff --git a/component_catalog/tests/test_views.py b/component_catalog/tests/test_views.py index 78f8b79d..7e3f31d8 100644 --- a/component_catalog/tests/test_views.py +++ b/component_catalog/tests/test_views.py @@ -3364,7 +3364,7 @@ def test_component_catalog_package_add_view_initial_data( puyrldb_entry = { "filename": "abbot-1.4.0.jar", - "release_date": "2015-09-22", + "release_date": "2010-05-24T00:00:00Z", "type": "maven", "namespace": "abbot", "name": "abbot", @@ -3388,7 +3388,7 @@ def test_component_catalog_package_add_view_initial_data( expected = { "filename": "abbot-1.4.0.jar", "keywords": ["keyword1", "keyword2"], - "release_date": "2015-09-22", + "release_date": "2010-05-24T00:00:00Z", "type": "maven", "namespace": "abbot", "name": "abbot", diff --git a/dje/models.py b/dje/models.py index 315bcd90..6d4fa452 100644 --- a/dje/models.py +++ b/dje/models.py @@ -61,6 +61,9 @@ " either nexB master reference data or installation-specific data." ) +# PackageURL._fields +PURL_FIELDS = ("type", "namespace", "name", "version", "qualifiers", "subpath") + def is_dataspace_related(model_class): """ @@ -779,7 +782,12 @@ def update_from_data(self, user, data, override=False): updated_fields = [] for field_name, value in data.items(): - if value in EMPTY_VALUES or field_name not in model_fields: + skip_reasons = [ + value in EMPTY_VALUES, + field_name not in model_fields, + field_name in PURL_FIELDS, + ] + if any(skip_reasons): continue current_value = getattr(self, field_name, None) diff --git a/dje/tasks.py b/dje/tasks.py index 1e0e0d7c..a13f1cba 100644 --- a/dje/tasks.py +++ b/dje/tasks.py @@ -20,6 +20,8 @@ from django.template.defaultfilters import pluralize from django_rq import job +from guardian.shortcuts import get_perms as guardian_get_perms +from notifications.signals import notify from dejacode_toolkit.scancodeio import ScanCodeIO from dje.utils import is_available @@ -92,16 +94,17 @@ def scancodeio_submit_scan(uris, user_uuid, dataspace_uuid): Submit the provided `uris` to ScanCode.io as an asynchronous task. Only publicly available URLs are sent to ScanCode.io. """ - from dje.models import DejacodeUser - logger.info( f"Entering scancodeio_submit_scan task with " f"uris={uris} user_uuid={user_uuid} dataspace_uuid={dataspace_uuid}" ) + DejacodeUser = apps.get_model("dje", "DejacodeUser") + try: user = DejacodeUser.objects.get(uuid=user_uuid, dataspace__uuid=dataspace_uuid) except ObjectDoesNotExist: + logger.error(f"[scancodeio_submit_scan]: User uuid={user_uuid} does not exists.") return if not isinstance(uris, list): @@ -118,20 +121,20 @@ def scancodeio_submit_scan(uris, user_uuid, dataspace_uuid): @job def scancodeio_submit_project(scancodeproject_uuid, user_uuid, pipeline_name): """Submit the provided SBOM file to ScanCode.io as an asynchronous task.""" - from dje.models import DejacodeUser - logger.info( f"Entering scancodeio_submit_project task with " f"scancodeproject_uuid={scancodeproject_uuid} user_uuid={user_uuid} " f"pipeline_name={pipeline_name}" ) + DejacodeUser = apps.get_model("dje", "DejacodeUser") ScanCodeProject = apps.get_model("product_portfolio", "scancodeproject") scancode_project = ScanCodeProject.objects.get(uuid=scancodeproject_uuid) try: user = DejacodeUser.objects.get(uuid=user_uuid) except ObjectDoesNotExist: + logger.error(f"[scancodeio_submit_project]: User uuid={user_uuid} does not exists.") return scancodeio = ScanCodeIO(user.dataspace) @@ -228,6 +231,76 @@ def pull_project_data_from_scancodeio(scancodeproject_uuid): scancode_project.notify(verb=notification_verb, description=description) +@job("default", timeout=1200) +def improve_packages_from_purldb(product_uuid, user_uuid): + logger.info( + f"Entering improve_packages_from_purldb task with " + f"product_uuid={product_uuid} user_uuid={user_uuid}" + ) + + DejacodeUser = apps.get_model("dje", "DejacodeUser") + History = apps.get_model("dje", "History") + Product = apps.get_model("product_portfolio", "product") + ScanCodeProject = apps.get_model("product_portfolio", "scancodeproject") + + try: + user = DejacodeUser.objects.get(uuid=user_uuid) + except ObjectDoesNotExist: + logger.error(f"[improve_packages_from_purldb]: User uuid={user_uuid} does not exists.") + return + + try: + product = Product.objects.get_queryset(user).get(uuid=product_uuid) + except ObjectDoesNotExist: + logger.error( + f"[improve_packages_from_purldb]: Product uuid={product_uuid} does not exists." + ) + return + + perms = guardian_get_perms(user, product) + has_change_permission = "change_product" in perms + if not has_change_permission: + logger.error("[improve_packages_from_purldb]: Permission denied.") + return + + scancode_project = ScanCodeProject.objects.create( + product=product, + dataspace=product.dataspace, + type=ScanCodeProject.ProjectType.IMPROVE_FROM_PURLDB, + status=ScanCodeProject.Status.IMPORT_STARTED, + created_by=user, + ) + + try: + updated_packages = product.improve_packages_from_purldb(user) + except Exception as e: + scancode_project.update( + status=ScanCodeProject.Status.FAILURE, + import_log=str(e), + ) + + logger.info(f"[improve_packages_from_purldb]: {len(updated_packages)} updated from PurlDB.") + verb = "Improved packages from PurlDB:" + if updated_packages: + description = ", ".join([str(package) for package in updated_packages]) + History.log_change(user, product, message=f"{verb} {description}") + else: + description = "No packages updated from PurlDB data." + + scancode_project.update( + status=ScanCodeProject.Status.SUCCESS, + import_log=[verb, description], + ) + + notify.send( + sender=user, + verb=verb, + action_object=product, + recipient=user, + description=description, + ) + + @job("default", timeout="3h") def update_vulnerabilities(): """Fetch vulnerabilities for all Dataspaces that enable vulnerablecodedb access.""" diff --git a/product_portfolio/migrations/0007_alter_scancodeproject_type.py b/product_portfolio/migrations/0007_alter_scancodeproject_type.py new file mode 100644 index 00000000..52dd9681 --- /dev/null +++ b/product_portfolio/migrations/0007_alter_scancodeproject_type.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-09-02 15:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('product_portfolio', '0006_productdependency'), + ] + + operations = [ + migrations.AlterField( + model_name='scancodeproject', + name='type', + field=models.CharField(choices=[('IMPORT_FROM_MANIFEST', 'Import from Manifest'), ('LOAD_SBOMS', 'Load SBOMs'), ('PULL_FROM_SCANCODEIO', 'Pull from ScanCode.io'), ('IMPROVE_FROM_PURLDB', 'Improve from PurlDB')], db_index=True, help_text='The type of import, for the ProjectType choices.', max_length=50), + ), + ] diff --git a/product_portfolio/models.py b/product_portfolio/models.py index 09255964..2715bf47 100644 --- a/product_portfolio/models.py +++ b/product_portfolio/models.py @@ -298,6 +298,9 @@ def get_import_manifests_url(self): def get_pull_project_data_url(self): return self.get_url("pull_project_data") + def get_improve_packages_from_purldb_url(self): + return self.get_url("improve_packages_from_purldb") + def can_be_changed_by(self, user): perms = guardian.shortcuts.get_perms(user, self) has_change_permission_on_product = "change_product" in perms @@ -492,6 +495,15 @@ def scan_all_packages_task(self, user): dataspace_uuid=user.dataspace.uuid, ) + def improve_packages_from_purldb(self, user): + """Update all Packages assigned to the Product using PurlDB data.""" + updated_packages = [] + for package in self.packages.all(): + updated_fields = package.update_from_purldb(user) + if updated_fields: + updated_packages.append(package) + return updated_packages + def fetch_vulnerabilities(self): """Fetch and update the vulnerabilties of all the Package of this Product.""" return fetch_for_queryset(self.all_packages, self.dataspace) @@ -1204,6 +1216,15 @@ def generate_input_file_path(instance, filename): return f"{dataspace}/scancode_project/{instance.uuid}/{filename}" +class ScanCodeProjectQuerySet(ProductSecuredQuerySet): + def in_progress(self): + in_progress_statuses = [ + ScanCodeProject.Status.SUBMITTED, + ScanCodeProject.Status.IMPORT_STARTED, + ] + return self.filter(status__in=in_progress_statuses) + + class ScanCodeProject(HistoryFieldsMixin, DataspacedModel): """Wrap a ScanCode.io Project.""" @@ -1211,6 +1232,7 @@ class ProjectType(models.TextChoices): IMPORT_FROM_MANIFEST = "IMPORT_FROM_MANIFEST", _("Import from Manifest") LOAD_SBOMS = "LOAD_SBOMS", _("Load SBOMs") PULL_FROM_SCANCODEIO = "PULL_FROM_SCANCODEIO", _("Pull from ScanCode.io") + IMPROVE_FROM_PURLDB = "IMPROVE_FROM_PURLDB", _("Improve from PurlDB") class Status(models.TextChoices): SUBMITTED = "submitted" @@ -1266,6 +1288,8 @@ class Status(models.TextChoices): default=dict, ) + objects = DataspacedManager.from_queryset(ScanCodeProjectQuerySet)() + class Meta: unique_together = ("dataspace", "uuid") ordering = ["-created_date"] diff --git a/product_portfolio/templates/product_portfolio/product_details.html b/product_portfolio/templates/product_portfolio/product_details.html index beec6a38..51487456 100644 --- a/product_portfolio/templates/product_portfolio/product_details.html +++ b/product_portfolio/templates/product_portfolio/product_details.html @@ -33,8 +33,9 @@ {% if has_scan_all_packages or has_change_permission %}
{% endif %} @@ -311,5 +317,12 @@ }) }); + {% endif %} {% endblock %} \ No newline at end of file diff --git a/product_portfolio/tests/__init__.py b/product_portfolio/tests/__init__.py index 7c7b3ddd..2d8b9197 100644 --- a/product_portfolio/tests/__init__.py +++ b/product_portfolio/tests/__init__.py @@ -8,6 +8,8 @@ from component_catalog.models import Component from component_catalog.models import Package +from component_catalog.tests import make_component +from component_catalog.tests import make_package from dje.tests import make_string from product_portfolio.models import Product from product_portfolio.models import ProductComponent @@ -30,10 +32,34 @@ def make_product(dataspace, inventory=None, **data): for instance in inventory: if isinstance(instance, Package): - ProductPackage.objects.create(product=product, package=instance, dataspace=dataspace) + make_product_package(product, package=instance) if isinstance(instance, Component): - ProductComponent.objects.create( - product=product, component=instance, dataspace=dataspace - ) + make_product_component(product, component=instance) return product + + +def make_product_package(product, package=None): + dataspace = product.dataspace + + if not package: + package = make_package(dataspace) + + return ProductPackage.objects.create( + product=product, + package=package, + dataspace=dataspace, + ) + + +def make_product_component(product, component=None): + dataspace = product.dataspace + + if not component: + component = make_component(dataspace) + + return ProductComponent.objects.create( + product=product, + component=component, + dataspace=dataspace, + ) diff --git a/product_portfolio/tests/test_models.py b/product_portfolio/tests/test_models.py index 7ddb42e2..f93085d3 100644 --- a/product_portfolio/tests/test_models.py +++ b/product_portfolio/tests/test_models.py @@ -6,6 +6,8 @@ # See https://aboutcode.org for more information about AboutCode FOSS projects. # +from unittest import mock + from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ValidationError from django.db.models import ProtectedError @@ -37,6 +39,7 @@ from product_portfolio.models import ProductSecuredManager from product_portfolio.models import ProductStatus from product_portfolio.models import ScanCodeProject +from product_portfolio.tests import make_product_package from workflow.models import RequestTemplate @@ -483,6 +486,18 @@ def test_product_model_actions_on_status_change(self): product.refresh_from_db() self.assertEqual(1, product.request_count) + @mock.patch("component_catalog.models.Package.update_from_purldb") + def test_product_model_improve_packages_from_purldb(self, mock_update_from_purldb): + mock_update_from_purldb.return_value = 1 + + make_product_package(self.product1) + make_product_package(self.product1) + self.assertEqual(2, self.product1.packages.count()) + + updated_packages = self.product1.improve_packages_from_purldb(self.super_user) + self.assertEqual(2, len(updated_packages)) + self.assertEqual(2, mock_update_from_purldb.call_count) + def test_productcomponent_model_license_expression_handle_assigned_licenses(self): p1 = ProductComponent.objects.create( product=self.product1, name="p1", dataspace=self.dataspace diff --git a/product_portfolio/tests/test_views.py b/product_portfolio/tests/test_views.py index 5d5d1baa..23daf1c7 100644 --- a/product_portfolio/tests/test_views.py +++ b/product_portfolio/tests/test_views.py @@ -34,6 +34,7 @@ from dje.models import Dataspace from dje.models import History from dje.outputs import get_spdx_extracted_licenses +from dje.tasks import improve_packages_from_purldb from dje.tasks import logger as tasks_logger from dje.tasks import pull_project_data_from_scancodeio from dje.tasks import scancodeio_submit_project @@ -57,6 +58,7 @@ from product_portfolio.models import ProductStatus from product_portfolio.models import ScanCodeProject from product_portfolio.tests import make_product +from product_portfolio.tests import make_product_package from product_portfolio.views import ManageComponentGridView from workflow.models import Request from workflow.models import RequestTemplate @@ -583,7 +585,7 @@ def test_product_portfolio_detail_view_inventory_tab_display_scan_features( self.assertContains(response, expected1) self.assertContains(response, expected2) - def test_product_detail_view_inventory_tab_display_vulnerabilities(self): + def test_product_portfolio_detail_view_inventory_tab_display_vulnerabilities(self): ProductPackage.objects.create( product=self.product1, package=self.package1, dataspace=self.dataspace ) @@ -637,7 +639,7 @@ def test_product_portfolio_detail_view_feature_field_grouping_in_inventory_tab(s response, '