diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a854bc7aa..0c25e11ba 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -99,16 +99,42 @@ v31.0.0 (2022-08-25) - Package data detected from a file are now stored on the CodebaseResource.package_data field. - A second processing step is now done after scanning for Package data, where - Package Resources are determined and DiscoveredPackages are created. + Package Resources are determined and DiscoveredPackages and + DiscoveredDependencies are created. https://github.com/nexB/scancode.io/issues/444 - ``CodebaseResource.for_packages`` now returns a list of ``DiscoveredPackage.package_uid`` or ``DiscoveredPackage.package_url`` if ``DiscoveredPackage.package_uid`` is not present. This is done to reflect the - how scancode-toolkit's JSON output returns ``package_uid`` in the + how scancode-toolkit's JSON output returns ``package_uid``s in the ``for_packages`` field for Resources. +- Add the model DiscoveredDependency. This represents Package dependencies + discovered in a Project. The ``scan_codebase`` and ``scan_packages`` pipelines + have been updated to create DiscoveredDepdendency objects. The Project API has + been updated with new fields: + + - ``dependency_count`` + - The number of DiscoveredDependencies associated with the project. + + - ``discovered_dependencies_summary`` + - A mapping that contains following fields: + + - ``total`` + - The number of DiscoveredDependencies associated with the project. + - ``is_runtime`` + - The number of runtime dependencies. + - ``is_optional`` + - The number of optional dependencies. + - ``is_resolved`` + - The number of resolved dependencies. + + These values are also available on the Project view. + https://github.com/nexB/scancode.io/issues/447 + +- The ``dependencies`` field has been removed from the DiscoveredPackage model. + v30.2.0 (2021-12-17) -------------------- diff --git a/docs/rest-api.rst b/docs/rest-api.rst index 051536fb7..ec4189140 100644 --- a/docs/rest-api.rst +++ b/docs/rest-api.rst @@ -188,7 +188,7 @@ The project details view returns all information available about a project. "codebase_resources_summary": { "application-package": 1 }, - "discovered_package_summary": { + "discovered_packages_summary": { "total": 1, "with_missing_resources": 0, "with_modified_resources": 0 diff --git a/scanpipe/api/serializers.py b/scanpipe/api/serializers.py index 07917a709..166adfd76 100644 --- a/scanpipe/api/serializers.py +++ b/scanpipe/api/serializers.py @@ -26,6 +26,7 @@ from scanpipe.api import ExcludeFromListViewMixin from scanpipe.models import CodebaseResource +from scanpipe.models import DiscoveredDependency from scanpipe.models import DiscoveredPackage from scanpipe.models import Project from scanpipe.models import ProjectError @@ -112,7 +113,8 @@ class ProjectSerializer( runs = RunSerializer(many=True, read_only=True) input_sources = serializers.JSONField(source="input_sources_list", read_only=True) codebase_resources_summary = serializers.SerializerMethodField() - discovered_package_summary = serializers.SerializerMethodField() + discovered_packages_summary = serializers.SerializerMethodField() + discovered_dependencies_summary = serializers.SerializerMethodField() class Meta: model = Project @@ -136,8 +138,10 @@ class Meta: "error_count", "resource_count", "package_count", + "dependency_count", "codebase_resources_summary", - "discovered_package_summary", + "discovered_packages_summary", + "discovered_dependencies_summary", ) exclude_from_list_view = [ @@ -147,15 +151,17 @@ class Meta: "error_count", "resource_count", "package_count", + "dependency_count", "codebase_resources_summary", - "discovered_package_summary", + "discovered_packages_summary", + "discovered_dependencies_summary", ] def get_codebase_resources_summary(self, project): queryset = project.codebaseresources.all() return count_group_by(queryset, "status") - def get_discovered_package_summary(self, project): + def get_discovered_packages_summary(self, project): base_qs = project.discoveredpackages return { "total": base_qs.count(), @@ -163,6 +169,15 @@ def get_discovered_package_summary(self, project): "with_modified_resources": base_qs.exclude(modified_resources=[]).count(), } + def get_discovered_dependencies_summary(self, project): + base_qs = project.discovereddependencies + return { + "total": base_qs.count(), + "is_runtime": base_qs.filter(is_runtime=True).count(), + "is_optional": base_qs.filter(is_optional=True).count(), + "is_resolved": base_qs.filter(is_resolved=True).count(), + } + def create(self, validated_data): """ Creates a new `project` with `upload_file` and `pipeline` as optional. @@ -219,6 +234,29 @@ class Meta: ] +class DiscoveredDependencySerializer(serializers.ModelSerializer): + purl = serializers.ReadOnlyField() + for_package_uid = serializers.ReadOnlyField() + datafile_path = serializers.ReadOnlyField() + package_type = serializers.ReadOnlyField(source="type") + + class Meta: + model = DiscoveredDependency + fields = [ + "purl", + "package_type", + "extracted_requirement", + "scope", + "is_runtime", + "is_optional", + "is_resolved", + "dependency_uid", + "for_package_uid", + "datafile_path", + "datasource_id", + ] + + class ProjectErrorSerializer(serializers.ModelSerializer): traceback = serializers.SerializerMethodField() @@ -257,6 +295,7 @@ def get_model_serializer(model_class): serializer = { CodebaseResource: CodebaseResourceSerializer, DiscoveredPackage: DiscoveredPackageSerializer, + DiscoveredDependency: DiscoveredDependencySerializer, ProjectError: ProjectErrorSerializer, }.get(model_class, None) diff --git a/scanpipe/api/views.py b/scanpipe/api/views.py index 6428f8e27..8ed93ec6e 100644 --- a/scanpipe/api/views.py +++ b/scanpipe/api/views.py @@ -36,6 +36,7 @@ from rest_framework.response import Response from scanpipe.api.serializers import CodebaseResourceSerializer +from scanpipe.api.serializers import DiscoveredDependencySerializer from scanpipe.api.serializers import DiscoveredPackageSerializer from scanpipe.api.serializers import PipelineSerializer from scanpipe.api.serializers import ProjectErrorSerializer @@ -180,6 +181,16 @@ def packages(self, request, *args, **kwargs): return Response(serializer.data) + @action(detail=True) + def dependencies(self, request, *args, **kwargs): + project = self.get_object() + queryset = project.discovereddependencies.all() + + paginated_qs = self.paginate_queryset(queryset) + serializer = DiscoveredDependencySerializer(paginated_qs, many=True) + + return Response(serializer.data) + @action(detail=True) def errors(self, request, *args, **kwargs): project = self.get_object() diff --git a/scanpipe/filters.py b/scanpipe/filters.py index f954f1f4c..8efbd8338 100644 --- a/scanpipe/filters.py +++ b/scanpipe/filters.py @@ -32,6 +32,7 @@ from packageurl.contrib.django.filters import PackageURLFilter from scanpipe.models import CodebaseResource +from scanpipe.models import DiscoveredDependency from scanpipe.models import DiscoveredPackage from scanpipe.models import Project from scanpipe.models import ProjectError @@ -159,6 +160,7 @@ class ProjectFilterSet(FilterSetUtilsMixin, django_filters.FilterSet): "created_date", "name", "discoveredpackages_count", + "discovereddependencies_count", "codebaseresources_count", "projecterrors_count", ], @@ -169,6 +171,8 @@ class ProjectFilterSet(FilterSetUtilsMixin, django_filters.FilterSet): ("-name", "Name (z-A)"), ("-discoveredpackages_count", "Packages (+)"), ("discoveredpackages_count", "Packages (-)"), + ("-discovereddependencies_count", "Dependencies (+)"), + ("discovereddependencies_count", "Dependencies (-)"), ("-codebaseresources_count", "Resources (+)"), ("codebaseresources_count", "Resources (-)"), ("-projecterrors_count", "Errors (+)"), @@ -364,6 +368,32 @@ class Meta: ] +class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet): + search = django_filters.CharFilter( + label="Search", field_name="name", lookup_expr="icontains" + ) + purl = PackageURLFilter(label="Package URL") + + class Meta: + model = DiscoveredDependency + fields = [ + "search", + "purl", + "dependency_uid", + "type", + "namespace", + "name", + "version", + "qualifiers", + "subpath", + "scope", + "is_runtime", + "is_optional", + "is_resolved", + "datasource_id", + ] + + class ErrorFilterSet(FilterSetUtilsMixin, django_filters.FilterSet): search = django_filters.CharFilter( label="Search", field_name="message", lookup_expr="icontains" diff --git a/scanpipe/migrations/0022_rename_dependencies_discoveredpackage_dependencies_data_and_more.py b/scanpipe/migrations/0022_rename_dependencies_discoveredpackage_dependencies_data_and_more.py new file mode 100644 index 000000000..5744091ec --- /dev/null +++ b/scanpipe/migrations/0022_rename_dependencies_discoveredpackage_dependencies_data_and_more.py @@ -0,0 +1,93 @@ +# Generated by Django 4.0.6 on 2022-08-13 00:27 + +from django.db import migrations, models +import django.db.models.deletion +import scanpipe.models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0021_codebaseresource_package_data'), + ] + + operations = [ + migrations.RenameField( + model_name='discoveredpackage', + old_name='dependencies', + new_name='dependencies_data', + ), + migrations.CreateModel( + name='DiscoveredDependency', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('dependency_uid', models.CharField(help_text='The unique identifier of this dependency.', max_length=1024)), + ('purl', models.CharField(help_text='The Package URL of this dependency.', max_length=1024)), + ('extracted_requirement', models.CharField(blank=True, help_text='The version requirements of this dependency.', max_length=64)), + ('scope', models.CharField(blank=True, help_text='The scope of this dependency, how it is used in a project.', max_length=64)), + ('is_runtime', models.BooleanField(default=False)), + ('is_optional', models.BooleanField(default=False)), + ('is_resolved', models.BooleanField(default=False)), + ('datasource_id', models.CharField(blank=True, help_text='The identifier for the datafile handler used to obtain this dependency.', max_length=64)), + ('datafile_resource', models.ForeignKey(blank=True, editable=False, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='dependencies', to='scanpipe.codebaseresource')), + ('for_package', models.ForeignKey(blank=True, editable=False, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='dependencies', to='scanpipe.discoveredpackage')), + ('project', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='discovereddependencies', to='scanpipe.project')), + ], + options={ + 'verbose_name': 'discovered dependency', + 'verbose_name_plural': 'discovered dependencies', + 'ordering': ['-is_runtime', '-is_resolved', 'is_optional', 'dependency_uid'], + }, + bases=(models.Model, scanpipe.models.SaveProjectErrorMixin), + ), + migrations.AddConstraint( + model_name='discovereddependency', + constraint=models.UniqueConstraint(condition=models.Q(('dependency_uid', ''), _negated=True), fields=('project', 'dependency_uid'), name='scanpipe_discovereddependency_unique_dependency_uid_within_project'), + ), + migrations.AddField( + model_name='discovereddependency', + name='name', + field=models.CharField(blank=True, help_text='Name of the package.', max_length=100), + ), + migrations.AddField( + model_name='discovereddependency', + name='namespace', + field=models.CharField(blank=True, help_text='Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.', max_length=255), + ), + migrations.AddField( + model_name='discovereddependency', + name='qualifiers', + field=models.CharField(blank=True, help_text='Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.', max_length=1024), + ), + migrations.AddField( + model_name='discovereddependency', + name='subpath', + field=models.CharField(blank=True, help_text='Extra subpath within a package, relative to the package root.', max_length=200), + ), + migrations.AddField( + model_name='discovereddependency', + name='type', + field=models.CharField(blank=True, help_text='A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.', max_length=16), + ), + migrations.AddField( + model_name='discovereddependency', + name='version', + field=models.CharField(blank=True, help_text='Version of the package.', max_length=100), + ), + migrations.AlterModelOptions( + name="discovereddependency", + options={ + "ordering": [ + "-is_runtime", + "-is_resolved", + "is_optional", + "dependency_uid", + "for_package", + "datafile_resource", + "datasource_id", + ], + "verbose_name": "discovered dependency", + "verbose_name_plural": "discovered dependencies", + }, + ), + ] diff --git a/scanpipe/migrations/0023_migrate_dependencies.py b/scanpipe/migrations/0023_migrate_dependencies.py new file mode 100644 index 000000000..de412e5a8 --- /dev/null +++ b/scanpipe/migrations/0023_migrate_dependencies.py @@ -0,0 +1,49 @@ +# Generated by Django 4.0.6 on 2022-08-12 23:13 + +from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist +from django.db import migrations + + +def migrate_dependencies_to_discovereddependencies(apps, schema_editor): + DiscoveredPackage = apps.get_model('scanpipe', 'DiscoveredPackage') + DiscoveredDependency = apps.get_model('scanpipe', 'DiscoveredDependency') + + package_with_dependencies = DiscoveredPackage.objects.exclude(dependencies_data=[]) + + for package in package_with_dependencies: + for dependency_data in package.dependencies_data: + project = package.project + + # Remove non-supported fields from the data dict + dependency_data.pop("extra_data", None) + dependency_data.pop("resolved_package", None) + + for_package_uid = dependency_data.get("for_package_uid") + try: + for_package = project.discoveredpackages.get(package_uid=for_package_uid) + except (ObjectDoesNotExist, MultipleObjectsReturned): + for_package = None + + datafile_path = dependency_data.get("datafile_path") + try: + datafile_resource = project.codebaseresources.get(path=datafile_path) + except (ObjectDoesNotExist, MultipleObjectsReturned): + datafile_resource = None + + DiscoveredDependency.objects.create( + project=project, + for_package=for_package, + datafile_resource=datafile_resource, + **dependency_data, + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0022_rename_dependencies_discoveredpackage_dependencies_data_and_more'), + ] + + operations = [ + migrations.RunPython(migrate_dependencies_to_discovereddependencies), + ] diff --git a/scanpipe/migrations/0024_migrate_dependency_uid_to_purl_fields.py b/scanpipe/migrations/0024_migrate_dependency_uid_to_purl_fields.py new file mode 100644 index 000000000..58168d4de --- /dev/null +++ b/scanpipe/migrations/0024_migrate_dependency_uid_to_purl_fields.py @@ -0,0 +1,31 @@ +# Generated by Django 4.0.6 on 2022-08-15 22:25 + +from django.db import migrations +from django.db.models import Q +from packageurl import PackageURL + + +def migrate_dependency_uids_to_purl_fields(apps, schema_editor): + DiscoveredDependency = apps.get_model('scanpipe', 'DiscoveredDependency') + + qs = DiscoveredDependency.objects.exclude( + Q(purl="") | Q(purl__isnull=True) + ) + for dependency in qs: + purl_mapping = PackageURL.from_string(dependency.purl).to_dict() + for field_name, value in purl_mapping.items(): + if not value: + continue + setattr(dependency, field_name, value) + dependency.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0023_migrate_dependencies'), + ] + + operations = [ + migrations.RunPython(migrate_dependency_uids_to_purl_fields), + ] diff --git a/scanpipe/migrations/0025_remove_discovereddependency_purl_and_more.py b/scanpipe/migrations/0025_remove_discovereddependency_purl_and_more.py new file mode 100644 index 000000000..3b53d1195 --- /dev/null +++ b/scanpipe/migrations/0025_remove_discovereddependency_purl_and_more.py @@ -0,0 +1,21 @@ +# Generated by Django 4.0.6 on 2022-08-16 19:37 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0024_migrate_dependency_uid_to_purl_fields'), + ] + + operations = [ + migrations.RemoveField( + model_name='discoveredpackage', + name='dependencies_data', + ), + migrations.RemoveField( + model_name='discovereddependency', + name='purl', + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 6649fc9ad..0c693329d 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -62,6 +62,7 @@ from commoncode.hash import multi_checksums from packageurl import PackageURL from packageurl import normalize_qualifiers +from packageurl.contrib.django.models import PackageURLMixin from packageurl.contrib.django.models import PackageURLQuerySetMixin from rest_framework.authtoken.models import Token from rq.command import send_stop_job_command @@ -82,6 +83,10 @@ class RunInProgressError(Exception): """Run are in progress or queued on this project.""" +# PackageURL._fields +PURL_FIELDS = ("type", "namespace", "name", "version", "qualifiers", "subpath") + + class UUIDPKModel(models.Model): uuid = models.UUIDField( verbose_name=_("UUID"), @@ -511,6 +516,7 @@ def reset(self, keep_input=True): self.projecterrors, self.runs, self.discoveredpackages, + self.discovereddependencies, self.codebaseresources, ] @@ -877,6 +883,13 @@ def package_count(self): """ return self.discoveredpackages.count() + @cached_property + def dependency_count(self): + """ + Returns the number of dependencies related to this project. + """ + return self.discovereddependencies.count() + @cached_property def error_count(self): """ @@ -997,6 +1010,39 @@ def add_errors(self, errors): self.add_error(error) +class UpdateFromDataMixin: + """ + Adds a method to update an object instance from a `data` dict. + """ + + def update_from_data(self, data, override=False): + """ + Update this object instance with the provided `data`. + The `save()` is called only if at least one field was modified. + """ + model_fields = self.__class__.model_fields() + updated_fields = [] + + for field_name, value in data.items(): + skip_reasons = [ + not value, + field_name not in model_fields, + field_name in PURL_FIELDS, + ] + if any(skip_reasons): + continue + + current_value = getattr(self, field_name, None) + if not current_value or (current_value != value and override): + setattr(self, field_name, value) + updated_fields.append(field_name) + + if updated_fields: + self.save() + + return updated_fields + + class RunQuerySet(ProjectRelatedQuerySet): def not_started(self): """ @@ -1508,6 +1554,7 @@ class Compliance(models.TextChoices): "provided policies." ), ) + package_data = models.JSONField( default=list, blank=True, @@ -1804,6 +1851,7 @@ class DiscoveredPackage( ProjectRelatedModel, ExtraDataFieldMixin, SaveProjectErrorMixin, + UpdateFromDataMixin, AbstractPackage, ): """ @@ -1820,11 +1868,6 @@ class DiscoveredPackage( ) missing_resources = models.JSONField(default=list, blank=True) modified_resources = models.JSONField(default=list, blank=True) - dependencies = models.JSONField( - default=list, - blank=True, - help_text=_("A list of dependencies for this package."), - ) package_uid = models.CharField( max_length=1024, blank=True, @@ -1868,15 +1911,11 @@ def purl(self): """ return self.package_url - @classmethod - def purl_fields(cls): - return PackageURL._fields - @classmethod def extract_purl_data(cls, package_data): purl_data = {} - for field_name in cls.purl_fields(): + for field_name in PURL_FIELDS: value = package_data.get(field_name) if field_name == "qualifiers": value = normalize_qualifiers(value, encode=True) @@ -1924,32 +1963,190 @@ def create_from_data(cls, project, package_data): discovered_package.save(save_error=False, capture_exception=False) return discovered_package - def update_from_data(self, package_data, override=False): - """ - Update this discovered package instance with the provided `package_data`. - The `save()` is called only if at least one field was modified. - """ - model_fields = DiscoveredPackage.model_fields() - updated_fields = [] - for field_name, value in package_data.items(): - skip_reasons = [ - not value, - field_name not in model_fields, - field_name in self.purl_fields(), - ] - if any(skip_reasons): - continue +class DiscoveredDependencyQuerySet(ProjectRelatedQuerySet): + pass - current_value = getattr(self, field_name, None) - if not current_value or (current_value != value and override): - setattr(self, field_name, value) - updated_fields.append(field_name) - if updated_fields: - self.save() +class DiscoveredDependency( + ProjectRelatedModel, + SaveProjectErrorMixin, + UpdateFromDataMixin, + PackageURLMixin, +): + """ + A project's Discovered Dependencies are records of the dependencies used by + system and application packages discovered in the code under analysis. + """ - return updated_fields + # Overrides the `project` field from `ProjectRelatedModel` to set the proper + # `related_name`. + project = models.ForeignKey( + Project, + related_name="discovereddependencies", + on_delete=models.CASCADE, + editable=False, + ) + + dependency_uid = models.CharField( + max_length=1024, + help_text=_("The unique identifier of this dependency."), + ) + for_package = models.ForeignKey( + DiscoveredPackage, + related_name="dependencies", + on_delete=models.CASCADE, + editable=False, + blank=True, + null=True, + ) + datafile_resource = models.ForeignKey( + CodebaseResource, + related_name="dependencies", + on_delete=models.CASCADE, + editable=False, + blank=True, + null=True, + ) + extracted_requirement = models.CharField( + max_length=64, + blank=True, + help_text=_("The version requirements of this dependency."), + ) + scope = models.CharField( + max_length=64, + blank=True, + help_text=_("The scope of this dependency, how it is used in a project."), + ) + + is_runtime = models.BooleanField(default=False) + is_optional = models.BooleanField(default=False) + is_resolved = models.BooleanField(default=False) + + datasource_id = models.CharField( + max_length=64, + blank=True, + help_text=_( + "The identifier for the datafile handler used to obtain this dependency." + ), + ) + + objects = DiscoveredDependencyQuerySet.as_manager() + + class Meta: + verbose_name = "discovered dependency" + verbose_name_plural = "discovered dependencies" + ordering = [ + "-is_runtime", + "-is_resolved", + "is_optional", + "dependency_uid", + "for_package", + "datafile_resource", + "datasource_id", + ] + constraints = [ + models.UniqueConstraint( + fields=["project", "dependency_uid"], + condition=~Q(dependency_uid=""), + name="%(app_label)s_%(class)s_unique_dependency_uid_within_project", + ), + ] + + def __str__(self): + return self.dependency_uid + + def get_absolute_url(self): + return reverse("dependency_detail", args=[self.project_id, self.pk]) + + @property + def purl(self): + return self.package_url + + @property + def package_type(self): + return self.type + + @cached_property + def for_package_uid(self): + if self.for_package: + return self.for_package.package_uid + + @cached_property + def datafile_path(self): + if self.datafile_resource: + return self.datafile_resource.path + + @classmethod + def create_from_data( + cls, + project, + dependency_data, + for_package=None, + datafile_resource=None, + strip_datafile_path_root=False, + ): + """ + Creates and returns a DiscoveredDependency for a `project` from the + `dependency_data`. + + If `strip_datafile_path_root` is True, then `create_from_data()` will + strip the root path segment from the `datafile_path` of + `dependency_data` before looking up the corresponding CodebaseResource + for `datafile_path`. This is used in the case where Dependency data is + imported from a scancode-toolkit scan, where the root path segments are + not stripped for `datafile_path`s. + """ + required_fields = ["purl", "dependency_uid"] + missing_values = [ + field_name + for field_name in required_fields + if not dependency_data.get(field_name) + ] + + if missing_values: + message = ( + f"No values for the following required fields: " + f"{', '.join(missing_values)}" + ) + + project.add_error(error=message, model=cls, details=dependency_data) + return + + if not for_package: + for_package_uid = dependency_data.get("for_package_uid") + if for_package_uid: + for_package = project.discoveredpackages.get( + package_uid=for_package_uid + ) + + if not datafile_resource: + datafile_path = dependency_data.get("datafile_path") + if datafile_path: + if strip_datafile_path_root: + segments = datafile_path.split("/") + datafile_path = "/".join(segments[1:]) + datafile_resource = project.codebaseresources.get(path=datafile_path) + + # Set purl fields from `purl` + purl = dependency_data.get("purl") + purl_mapping = PackageURL.from_string(purl).to_dict() + dependency_data.update(**purl_mapping) + + cleaned_dependency_data = { + field_name: value + for field_name, value in dependency_data.items() + if field_name in DiscoveredDependency.model_fields() and value + } + discovered_dependency = cls( + project=project, + for_package=for_package, + datafile_resource=datafile_resource, + **cleaned_dependency_data, + ) + discovered_dependency.save() + + return discovered_dependency class WebhookSubscription(UUIDPKModel, ProjectRelatedModel): diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 50f42d4a4..6a702a78f 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -30,6 +30,7 @@ from django.db.models import Count from scanpipe.models import CodebaseResource +from scanpipe.models import DiscoveredDependency from scanpipe.models import DiscoveredPackage from scanpipe.pipes import scancode @@ -104,6 +105,40 @@ def update_or_create_package(project, package_data, codebase_resource=None): return package +def update_or_create_dependencies( + project, dependency_data, strip_datafile_path_root=False +): + """ + Gets, updates or creates a DiscoveredDependency then returns it. + Uses the `project` and `dependency_data` mapping to lookup and creates the + DiscoveredDependency using its dependency_uid and for_package_uid as a unique key. + + If `strip_datafile_path_root` is True, then + `DiscoveredDependency.create_from_data()` will strip the root path segment + from the `datafile_path` of `dependency_data` before looking up the + corresponding CodebaseResource for `datafile_path`. This is used in the case + where Dependency data is imported from a scancode-toolkit scan, where the + root path segments are not stripped for `datafile_path`s. + """ + try: + dependency = project.discovereddependencies.get( + dependency_uid=dependency_data.get("dependency_uid") + ) + except DiscoveredDependency.DoesNotExist: + dependency = None + + if dependency: + dependency.update_from_data(dependency_data) + else: + dependency = DiscoveredDependency.create_from_data( + project, + dependency_data, + strip_datafile_path_root=strip_datafile_path_root, + ) + + return dependency + + def analyze_scanned_files(project): """ Sets the status for CodebaseResource to unknown or no license. diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index ba2df69e7..464a76eaa 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -82,6 +82,9 @@ def to_csv(project): querysets = [ project.discoveredpackages.all(), + project.discovereddependencies.all().prefetch_related( + "for_package", "datafile_resource" + ), project.codebaseresources.without_symlinks(), ] @@ -125,6 +128,7 @@ def __iter__(self): yield "{\n" yield from self.serialize(label="headers", generator=self.get_headers) yield from self.serialize(label="packages", generator=self.get_packages) + yield from self.serialize(label="dependencies", generator=self.get_dependencies) yield from self.serialize(label="files", generator=self.get_files, latest=True) yield "}" @@ -178,6 +182,24 @@ def get_packages(self, project): for obj in packages.iterator(): yield self.encode(DiscoveredPackageSerializer(obj).data) + def get_dependencies(self, project): + from scanpipe.api.serializers import DiscoveredDependencySerializer + + dependencies = ( + project.discovereddependencies.all() + .prefetch_related("for_package", "datafile_resource") + .order_by( + "type", + "namespace", + "name", + "version", + "datasource_id", + ) + ) + + for obj in dependencies.iterator(): + yield self.encode(DiscoveredDependencySerializer(obj).data) + def get_files(self, project): from scanpipe.api.serializers import CodebaseResourceSerializer @@ -381,6 +403,9 @@ def to_xlsx(project): querysets = [ project.discoveredpackages.all(), + project.discovereddependencies.all().prefetch_related( + "for_package", "datafile_resource" + ), project.codebaseresources.without_symlinks(), project.projecterrors.all(), ] diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 822b3842b..b9a88b745 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -45,6 +45,7 @@ from scanpipe import pipes from scanpipe.models import CodebaseResource +from scanpipe.models import DiscoveredPackage logger = logging.getLogger("scanpipe.pipes") @@ -311,11 +312,12 @@ def scan_for_files(project): def scan_for_application_packages(project): """ - Runs a package scan on files without a status for a `project`, then create - DiscoveredPackage instances from the detected package data. + Runs a package scan on files without a status for a `project`, + then create DiscoveredPackage and DiscoveredDependency instances + from the detected package data - Multiprocessing is enabled by default on this pipe, the number of processes - can be controlled through the SCANCODEIO_PROCESSES setting. + Multiprocessing is enabled by default on this pipe, the number of processes can be + controlled through the SCANCODEIO_PROCESSES setting. """ resource_qs = project.codebaseresources.no_status() @@ -378,9 +380,11 @@ def assemble_packages(project): package_data = item.to_dict() pipes.update_or_create_package(project, package_data) elif isinstance(item, packagedcode_models.Dependency): - # We will handle Dependencies when we properly implement the - # DiscoveredDependency model - pass + dependency_data = item.to_dict() + pipes.update_or_create_dependencies( + project, + dependency_data, + ) elif isinstance(item, CodebaseResource): seen_resource_paths.add(item.path) else: @@ -475,6 +479,29 @@ def create_discovered_packages(project, scanned_codebase): pipes.update_or_create_package(project, package_data) +def create_discovered_dependencies( + project, scanned_codebase, strip_datafile_path_root=False +): + """ + Saves the dependencies of a ScanCode `scanned_codebase` scancode.resource.Codebase + object to the database as a DiscoveredDependency of `project`. + + If `strip_datafile_path_root` is True, then + `DiscoveredDependency.create_from_data()` will strip the root path segment + from the `datafile_path` of `dependency_data` before looking up the + corresponding CodebaseResource for `datafile_path`. This is used in the case + where Dependency data is imported from a scancode-toolkit scan, where the + root path segments are not stripped for `datafile_path`s. + """ + if hasattr(scanned_codebase.attributes, "dependencies"): + for dependency_data in scanned_codebase.attributes.dependencies: + pipes.update_or_create_dependencies( + project, + dependency_data, + strip_datafile_path_root=strip_datafile_path_root, + ) + + def set_codebase_resource_for_package(codebase_resource, discovered_package): """ Assigns the `discovered_package` to the `codebase_resource` and set its @@ -566,3 +593,6 @@ def create_inventory_from_scan(project, input_location): scanned_codebase = get_virtual_codebase(project, input_location) create_discovered_packages(project, scanned_codebase) create_codebase_resources(project, scanned_codebase) + create_discovered_dependencies( + project, scanned_codebase, strip_datafile_path_root=True + ) diff --git a/scanpipe/templates/scanpipe/base.html b/scanpipe/templates/scanpipe/base.html index d31361dab..c48d2617f 100644 --- a/scanpipe/templates/scanpipe/base.html +++ b/scanpipe/templates/scanpipe/base.html @@ -20,6 +20,7 @@ .tag-header {border-radius: 4px 4px 0 0!important;} .border-bottom-radius {border-bottom-left-radius: 6px; border-bottom-right-radius: 6px;} .tabs .border-left-radius {border-bottom-left-radius: 4px; border-top-left-radius: 4px;} + .tab-content pre {border-radius: 4px;} .border-no-top-left-radius {border-bottom-left-radius: 6px; border-bottom-right-radius: 6px; border-top-right-radius: 6px;} .border-dashed {border-width: 2px; border-style: dashed; border-color: rgb(229, 231, 235);} .is-height-auto {height: auto;} diff --git a/scanpipe/templates/scanpipe/dependency_detail.html b/scanpipe/templates/scanpipe/dependency_detail.html new file mode 100644 index 000000000..23fbf86c2 --- /dev/null +++ b/scanpipe/templates/scanpipe/dependency_detail.html @@ -0,0 +1,24 @@ +{% extends "scanpipe/base.html" %} +{% load static humanize %} + +{% block title %}ScanCode.io: {{ project.name }} - {{ object.name }}{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
{% include 'scanpipe/includes/messages.html' %}
+ +
+ +
+ + {% include 'scanpipe/tabset/tabset.html' %} +
+{% endblock %} \ No newline at end of file diff --git a/scanpipe/templates/scanpipe/dependency_list.html b/scanpipe/templates/scanpipe/dependency_list.html new file mode 100644 index 000000000..8a84ffa40 --- /dev/null +++ b/scanpipe/templates/scanpipe/dependency_list.html @@ -0,0 +1,69 @@ +{% extends "scanpipe/base.html" %} + +{% block title %}ScanCode.io: {{ project.name }} - Dependencies{% endblock %} + +{% block content %} +
+ {% include 'scanpipe/includes/navbar_header.html' %} +
+
+ {% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Dependencies" %} + {% include 'scanpipe/includes/search_field.html' with extra_class="is-small" %} +
+ {% include 'scanpipe/includes/pagination_header.html' %} + {% include 'scanpipe/includes/filters_breadcrumb.html' with filterset=filter only %} +
+
+ +
+
+ + {% include 'scanpipe/includes/list_view_thead.html' %} + + {% for dependency in object_list %} + + + + + + + + + + + + + {% endfor %} + +
+ {{ dependency.purl }} + + {{ dependency.type }} + + {{ dependency.extracted_requirement }} + + {{ dependency.scope }} + + {{ dependency.is_runtime }} + + {{ dependency.is_optional }} + + {{ dependency.is_resolved }} + + {% if dependency.for_package %} + {{ dependency.for_package.purl }} + {% endif %} + + {% if dependency.datafile_resource %} + {{ dependency.datafile_resource.name }} + {% endif %} + + {{ dependency.datasource_id }} +
+
+ + {% if is_paginated %} + {% include 'scanpipe/includes/pagination.html' with page_obj=page_obj %} + {% endif %} +
+{% endblock %} \ No newline at end of file diff --git a/scanpipe/templates/scanpipe/error_list.html b/scanpipe/templates/scanpipe/error_list.html index 0eb9e2d30..68ce8facf 100644 --- a/scanpipe/templates/scanpipe/error_list.html +++ b/scanpipe/templates/scanpipe/error_list.html @@ -1,5 +1,7 @@ {% extends "scanpipe/base.html" %} +{% block title %}ScanCode.io: {{ project.name }} - Errors{% endblock %} + {% block content %}
{% include 'scanpipe/includes/navbar_header.html' %} diff --git a/scanpipe/templates/scanpipe/includes/project_list_table.html b/scanpipe/templates/scanpipe/includes/project_list_table.html index f9bfb6ba1..50aa9e5ed 100644 --- a/scanpipe/templates/scanpipe/includes/project_list_table.html +++ b/scanpipe/templates/scanpipe/includes/project_list_table.html @@ -17,6 +17,15 @@ 0 {% endif %} + + {% if project.discovereddependencies_count %} + + {{ project.discovereddependencies_count|intcomma }} + + {% else %} + 0 + {% endif %} + {% if project.codebaseresources_count %} diff --git a/scanpipe/templates/scanpipe/includes/project_summary_level.html b/scanpipe/templates/scanpipe/includes/project_summary_level.html index 4cbd099ad..d7e05b6e8 100644 --- a/scanpipe/templates/scanpipe/includes/project_summary_level.html +++ b/scanpipe/templates/scanpipe/includes/project_summary_level.html @@ -16,6 +16,22 @@

+
+
+

Dependencies

+

+ {% if project.dependency_count %} + + {{ project.dependency_count|intcomma }} + + {% else %} + + {{ project.dependency_count|intcomma }} + + {% endif %} +

+
+

Resources

diff --git a/scanpipe/templates/scanpipe/package_list.html b/scanpipe/templates/scanpipe/package_list.html index e49e5a3fb..0f2ad9556 100644 --- a/scanpipe/templates/scanpipe/package_list.html +++ b/scanpipe/templates/scanpipe/package_list.html @@ -1,5 +1,7 @@ {% extends "scanpipe/base.html" %} +{% block title %}ScanCode.io: {{ project.name }} - Packages{% endblock %} + {% block content %}
{% include 'scanpipe/includes/navbar_header.html' %} diff --git a/scanpipe/templates/scanpipe/project_detail.html b/scanpipe/templates/scanpipe/project_detail.html index 7c146b30b..686be1ca0 100644 --- a/scanpipe/templates/scanpipe/project_detail.html +++ b/scanpipe/templates/scanpipe/project_detail.html @@ -120,6 +120,30 @@

{% endif %} + {% if project.dependency_count %} +
+

+ Discovered Dependencies + {{ project.dependency_count|intcomma }} +

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {% endif %} + {% if project.resource_count %}

@@ -174,6 +198,12 @@

{{ package_licenses|json_script:"package_licenses" }} {{ package_types|json_script:"package_types" }} {% endif %} + {% if project.dependency_count %} + {{ dependency_package_type|json_script:"dependency_package_type" }} + {{ dependency_is_runtime|json_script:"dependency_is_runtime" }} + {{ dependency_is_optional|json_script:"dependency_is_optional" }} + {{ dependency_is_resolved|json_script:"dependency_is_resolved" }} + {% endif %}