From 7932272cc445275a7fe493803494e034ca24a8ce Mon Sep 17 00:00:00 2001 From: Nicolas Nobelis Date: Thu, 30 Mar 2023 10:38:05 +0200 Subject: [PATCH] feat(fossid-webapp): Map FossID snippets to the `ScanSummary` When FossId identifies a file matching snippets, it is a pending file. An operator needs to log to FossID UI and use the license of a snippet or manually enter difference license information. Then the file is marked as "identified". Currently, the FossID scanner in ORT returns the list of all pending files in `ScanSummary` issues, with a severity of `HINT`. This commit maps the snippets of pending files using the newly-created snippet data model. The pending files are still listed as issues: This will be removed in a future commit as it is a breaking change. Signed-off-by: Nicolas Nobelis --- .../src/main/kotlin/scanners/fossid/FossId.kt | 115 +++++++++++++++++- .../scanners/fossid/FossIdScanResults.kt | 4 +- .../test/kotlin/scanners/fossid/FossIdTest.kt | 92 +++++++++++++- 3 files changed, 207 insertions(+), 4 deletions(-) diff --git a/scanner/src/main/kotlin/scanners/fossid/FossId.kt b/scanner/src/main/kotlin/scanners/fossid/FossId.kt index dfcca886d596e..e77ca07cc28f3 100644 --- a/scanner/src/main/kotlin/scanners/fossid/FossId.kt +++ b/scanner/src/main/kotlin/scanners/fossid/FossId.kt @@ -27,6 +27,9 @@ import kotlin.time.Duration.Companion.minutes import kotlin.time.Duration.Companion.seconds import kotlin.time.measureTimedValue +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll import kotlinx.coroutines.delay import kotlinx.coroutines.runBlocking import kotlinx.coroutines.withTimeoutOrNull @@ -48,6 +51,7 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles import org.ossreviewtoolkit.clients.fossid.listPendingFiles import org.ossreviewtoolkit.clients.fossid.listScansForProject +import org.ossreviewtoolkit.clients.fossid.listSnippets import org.ossreviewtoolkit.clients.fossid.model.Project import org.ossreviewtoolkit.clients.fossid.model.Scan import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope @@ -56,28 +60,40 @@ import org.ossreviewtoolkit.clients.fossid.model.status.DownloadStatus import org.ossreviewtoolkit.clients.fossid.model.status.ScanStatus import org.ossreviewtoolkit.clients.fossid.runScan import org.ossreviewtoolkit.downloader.VersionControlSystem +import org.ossreviewtoolkit.model.ArtifactProvenance +import org.ossreviewtoolkit.model.Hash import org.ossreviewtoolkit.model.Issue +import org.ossreviewtoolkit.model.LicenseFinding import org.ossreviewtoolkit.model.Package +import org.ossreviewtoolkit.model.PackageProvider import org.ossreviewtoolkit.model.Provenance +import org.ossreviewtoolkit.model.RemoteArtifact import org.ossreviewtoolkit.model.RepositoryProvenance import org.ossreviewtoolkit.model.ScanResult import org.ossreviewtoolkit.model.ScanSummary import org.ossreviewtoolkit.model.ScannerDetails import org.ossreviewtoolkit.model.Severity +import org.ossreviewtoolkit.model.TextLocation import org.ossreviewtoolkit.model.UnknownProvenance import org.ossreviewtoolkit.model.VcsType import org.ossreviewtoolkit.model.config.DownloaderConfiguration import org.ossreviewtoolkit.model.config.Options import org.ossreviewtoolkit.model.config.ScannerConfiguration import org.ossreviewtoolkit.model.createAndLogIssue +import org.ossreviewtoolkit.model.utils.PurlType +import org.ossreviewtoolkit.model.utils.Snippet +import org.ossreviewtoolkit.model.utils.SnippetFinding import org.ossreviewtoolkit.scanner.AbstractScannerWrapperFactory import org.ossreviewtoolkit.scanner.PackageScannerWrapper import org.ossreviewtoolkit.scanner.ProvenanceScannerWrapper import org.ossreviewtoolkit.scanner.ScanContext import org.ossreviewtoolkit.scanner.ScannerCriteria +import org.ossreviewtoolkit.utils.common.collectMessages import org.ossreviewtoolkit.utils.common.enumSetOf import org.ossreviewtoolkit.utils.common.replaceCredentialsInUri import org.ossreviewtoolkit.utils.ort.showStackTrace +import org.ossreviewtoolkit.utils.spdx.SpdxConstants +import org.ossreviewtoolkit.utils.spdx.toSpdx /** * A wrapper for [FossID](https://fossid.com/). @@ -746,7 +762,23 @@ class FossId internal constructor( "${pendingFiles.size} pending files have been returned for scan '$scanCode'." } - return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles) + val snippets = runBlocking(Dispatchers.IO) { + pendingFiles.map { + async { + logger.info { "Listing snippet for $it..." } + val snippetResponse = service.listSnippets(config.user, config.apiKey, scanCode, it) + .checkResponse("list snippets") + val snippets = requireNotNull(snippetResponse.data) { + "Snippet could not be listed. Response was ${snippetResponse.message}." + } + logger.info { "${snippets.size} snippets." } + + it to snippets.toSet() + } + }.awaitAll().toMap() + } + + return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles, snippets) } /** @@ -760,10 +792,61 @@ class FossId internal constructor( scanId: String ): ScanResult { // TODO: Maybe get issues from FossID (see has_failed_scan_files, get_failed_files and maybe get_scan_log). + + // TODO: Deprecation: Remove the pending files in issues. This is a breaking change. val issues = rawResults.listPendingFiles.mapTo(mutableListOf()) { Issue(source = name, message = "Pending identification for '$it'.", severity = Severity.HINT) } + val snippetFindings = mutableSetOf() + val fakeLocation = TextLocation(".", TextLocation.UNKNOWN_LINE) + snippetFindings += rawResults.listSnippets.flatMap { (file, rawSnippets) -> + val snippets = rawSnippets.map { + val license = it.artifactLicense?.let { + runCatching { + LicenseFinding.createAndMap( + it, + fakeLocation, + detectedLicenseMapping = scannerConfig.detectedLicenseMapping + ).license + }.onFailure { spdxException -> + issues += FossId.createAndLogIssue( + source = "FossId", + message = "Failed to parse license '$it' as an SPDX expression:" + + " ${spdxException.collectMessages()}" + ) + }.getOrNull() + } ?: SpdxConstants.NOASSERTION.toSpdx() + + // FossID does not return the hash of the remote artifact: it returns instead, in the property + // "match_file_id", the MD5 hash of the matched file IN the remote artifact. + val snippetProvenance = it.url?.let { url -> + ArtifactProvenance(RemoteArtifact(url, Hash.NONE)) + } ?: UnknownProvenance + val purlType = it.url?.let { url -> urlToPackageType(url, issues)?.toString() } ?: "generic" + + // TODO: FossId doesn't return the line numbers of the match, only the character range. One must use + // another call "getMatchedLine" to retrieve the matched line numbers. Unfortunately, this is a call + // per snippet which is too expensive. When it is available for a batch of snippets, it can be used + // here. + Snippet( + it.score.toFloat(), + TextLocation(it.file, TextLocation.UNKNOWN_LINE), + snippetProvenance, + "pkg:$purlType/${it.author}/${it.artifact}@${it.version}", + license + ) + } + + val sourceLocation = TextLocation(file, TextLocation.UNKNOWN_LINE) + snippets.map { + SnippetFinding( + sourceLocation, + it + ) + } + } + val ignoredFiles = rawResults.listIgnoredFiles.associateBy { it.path } val (licenseFindings, copyrightFindings) = rawResults.markedAsIdentifiedFiles.ifEmpty { @@ -776,6 +859,7 @@ class FossId internal constructor( packageVerificationCode = "", licenseFindings = licenseFindings.toSortedSet(), copyrightFindings = copyrightFindings.toSortedSet(), + snippetFindings = snippetFindings, issues = issues ) @@ -786,4 +870,33 @@ class FossId internal constructor( mapOf(SCAN_CODE_KEY to scanCode, SCAN_ID_KEY to scanId, SERVER_URL_KEY to config.serverUrl) ) } + + /** + * Return the [PurlType] as determined from the given [url], or null if there is no match. An issue will be added to + * [issues] in this case. + */ + private fun urlToPackageType(url: String, issues: MutableList): PurlType? = + when (val provider = PackageProvider.get(url)) { + PackageProvider.COCOAPODS -> PurlType.COCOAPODS + PackageProvider.CRATES_IO -> PurlType.CARGO + PackageProvider.DEBIAN -> PurlType.DEBIAN + PackageProvider.GITHUB -> PurlType.GITHUB + PackageProvider.GITLAB -> PurlType.GITLAB + PackageProvider.GOLANG -> PurlType.GOLANG + PackageProvider.MAVEN_CENTRAL, PackageProvider.MAVEN_GOOGLE -> PurlType.MAVEN + PackageProvider.NPM_JS -> PurlType.NPM + PackageProvider.NUGET -> PurlType.NUGET + PackageProvider.PACKAGIST -> PurlType.COMPOSER + PackageProvider.PYPI -> PurlType.PYPI + PackageProvider.RUBYGEMS -> PurlType.GEM + null -> null + + else -> { + issues += FossId.createAndLogIssue( + source = "FossId", + message = "Cannot determine PURL type for url '$url' and provider '$provider'." + ) + null + } + } } diff --git a/scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt b/scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt index 3553ba764be08..fc420014ea72c 100644 --- a/scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt +++ b/scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt @@ -22,6 +22,7 @@ package org.ossreviewtoolkit.scanner.scanners.fossid import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile +import org.ossreviewtoolkit.clients.fossid.model.result.Snippet import org.ossreviewtoolkit.clients.fossid.model.summary.Summarizable import org.ossreviewtoolkit.model.CopyrightFinding import org.ossreviewtoolkit.model.Issue @@ -37,7 +38,8 @@ internal data class RawResults( val identifiedFiles: List, val markedAsIdentifiedFiles: List, val listIgnoredFiles: List, - val listPendingFiles: List + val listPendingFiles: List, + val listSnippets: Map> ) /** diff --git a/scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt b/scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt index bf74903377c8c..8a1c5b6f0eb3a 100644 --- a/scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt +++ b/scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt @@ -62,6 +62,7 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles import org.ossreviewtoolkit.clients.fossid.listPendingFiles import org.ossreviewtoolkit.clients.fossid.listScansForProject +import org.ossreviewtoolkit.clients.fossid.listSnippets import org.ossreviewtoolkit.clients.fossid.model.Scan import org.ossreviewtoolkit.clients.fossid.model.identification.common.LicenseMatchType import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile @@ -69,6 +70,8 @@ import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredF import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.License import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.LicenseFile import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile +import org.ossreviewtoolkit.clients.fossid.model.result.MatchType +import org.ossreviewtoolkit.clients.fossid.model.result.Snippet import org.ossreviewtoolkit.clients.fossid.model.rules.IgnoreRule import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope import org.ossreviewtoolkit.clients.fossid.model.rules.RuleType @@ -78,23 +81,29 @@ import org.ossreviewtoolkit.clients.fossid.model.status.UnversionedScanDescripti import org.ossreviewtoolkit.clients.fossid.runScan import org.ossreviewtoolkit.downloader.VersionControlSystem import org.ossreviewtoolkit.downloader.vcs.Git +import org.ossreviewtoolkit.model.ArtifactProvenance import org.ossreviewtoolkit.model.CopyrightFinding +import org.ossreviewtoolkit.model.Hash import org.ossreviewtoolkit.model.Identifier import org.ossreviewtoolkit.model.Issue import org.ossreviewtoolkit.model.LicenseFinding import org.ossreviewtoolkit.model.Package import org.ossreviewtoolkit.model.PackageType +import org.ossreviewtoolkit.model.RemoteArtifact import org.ossreviewtoolkit.model.ScanResult import org.ossreviewtoolkit.model.Severity import org.ossreviewtoolkit.model.TextLocation import org.ossreviewtoolkit.model.VcsInfo import org.ossreviewtoolkit.model.VcsType import org.ossreviewtoolkit.model.config.ScannerConfiguration +import org.ossreviewtoolkit.model.utils.Snippet as OrtSnippet +import org.ossreviewtoolkit.model.utils.SnippetFinding import org.ossreviewtoolkit.scanner.ScanContext import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_CODE_KEY import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_ID_KEY import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SERVER_URL_KEY import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.convertGitUrlToProjectName +import org.ossreviewtoolkit.utils.spdx.SpdxExpression @Suppress("LargeClass") class FossIdTest : WordSpec({ @@ -314,6 +323,7 @@ class FossIdTest : WordSpec({ summary.licenseFindings shouldContainExactlyInAnyOrder expectedLicenseFindings } + // TODO: Deprecation: Remove the pending files in issues. This is a breaking change. "report pending files as issues" { val projectCode = projectCode(PROJECT) val scanCode = scanCode(PROJECT, null) @@ -328,7 +338,7 @@ class FossIdTest : WordSpec({ .expectCheckScanStatus(scanCode, ScanStatus.FINISHED) .expectCreateScan(projectCode, scanCode, vcsInfo, "") .expectDownload(scanCode) - .mockFiles(scanCode, pendingRange = 4..5) + .mockFiles(scanCode, pendingRange = 4..5, snippetRange = 1..5) val fossId = createFossId(config) @@ -341,6 +351,34 @@ class FossIdTest : WordSpec({ summary.issues.map { it.copy(timestamp = Instant.EPOCH) } shouldBe expectedIssues } + "report pending files as snippets" { + val projectCode = projectCode(PROJECT) + val scanCode = scanCode(PROJECT, null) + val config = createConfig(deltaScans = false) + val vcsInfo = createVcsInfo() + val scan = createScan(vcsInfo.url, "${vcsInfo.revision}_other", scanCode) + val pkgId = createIdentifier(index = 42) + + FossIdRestService.create(config.serverUrl) + .expectProjectRequest(projectCode) + .expectListScans(projectCode, listOf(scan)) + .expectCheckScanStatus(scanCode, ScanStatus.FINISHED) + .expectCreateScan(projectCode, scanCode, vcsInfo, "") + .expectDownload(scanCode) + .mockFiles(scanCode, pendingRange = 1..5, snippetRange = 1..5) + + val fossId = createFossId(config) + + val summary = fossId.scan(createPackage(pkgId, vcsInfo)).summary + + val expectedPendingFile = (1..5).map(::createPendingFile).toSet() + val expectedSnippetFindings = (1..5).map(::createSnippetFindings).flatten() + + summary.snippetFindings shouldHaveSize expectedPendingFile.size * 5 + summary.snippetFindings.map { it.sourceLocation.path }.toSet() shouldBe expectedPendingFile + summary.snippetFindings shouldBe expectedSnippetFindings + } + "create a new project if none exists yet" { val projectCode = projectCode(PROJECT) val scanCode = scanCode(PROJECT, null) @@ -1238,6 +1276,52 @@ private fun createIgnoredFile(index: Int): IgnoredFile = */ private fun createPendingFile(index: Int): String = "/pending/file/$index" +/** + * Generate a FossID snippet based on the given [index]. + */ +private fun createSnippet(index: Int): Snippet = Snippet( + index, + "created$index", + index, + index, + index, + MatchType.PARTIAL, + "reason$index", + "author$index", + "artifact$index", + "version$index", + "MIT", + "releaseDate$index", + "mirror$index", + "file$index", + "fileLicense$index", + "url$index", + "hits$index", + index, + "updated$index", + "cpe$index", + "$index", + "matchField$index", + "classification$index", + "highlighting$index" +) + +/** + * Generate a ORT snippet finding based on the given [index]. + */ +private fun createSnippetFindings(index: Int): Set = (1..5).map { snippetIndex -> + SnippetFinding( + TextLocation("/pending/file/$index", TextLocation.UNKNOWN_LINE), + OrtSnippet( + snippetIndex.toFloat(), + TextLocation("file$snippetIndex", TextLocation.UNKNOWN_LINE), + ArtifactProvenance(RemoteArtifact("url$snippetIndex", Hash.NONE)), + "pkg:generic/author$snippetIndex/artifact$snippetIndex@version$snippetIndex", + SpdxExpression.Companion.parse("MIT") + ) + ) +}.toSet() + /** * Prepare this service mock to answer a request for a project with the given [projectCode]. Return a response with * the given [status] and [error]. @@ -1348,12 +1432,14 @@ private fun FossIdServiceWithVersion.mockFiles( identifiedRange: IntRange = IntRange.EMPTY, markedRange: IntRange = IntRange.EMPTY, ignoredRange: IntRange = IntRange.EMPTY, - pendingRange: IntRange = IntRange.EMPTY + pendingRange: IntRange = IntRange.EMPTY, + snippetRange: IntRange = IntRange.EMPTY ): FossIdServiceWithVersion { val identifiedFiles = identifiedRange.map(::createIdentifiedFile) val markedFiles = markedRange.map(::createMarkedIdentifiedFile) val ignoredFiles = ignoredRange.map(::createIgnoredFile) val pendingFiles = pendingRange.map(::createPendingFile) + val snippets = snippetRange.map(::createSnippet) coEvery { listIdentifiedFiles(USER, API_KEY, scanCode) } returns PolymorphicResponseBody( @@ -1367,6 +1453,8 @@ private fun FossIdServiceWithVersion.mockFiles( PolymorphicResponseBody(status = 1, data = PolymorphicList(ignoredFiles)) coEvery { listPendingFiles(USER, API_KEY, scanCode) } returns PolymorphicResponseBody(status = 1, data = PolymorphicList(pendingFiles)) + coEvery { listSnippets(USER, API_KEY, scanCode, any()) } returns + PolymorphicResponseBody(status = 1, data = PolymorphicList(snippets)) return this }