Skip to content

Commit

Permalink
feat(fossid-webapp): Map FossID snippets to the ScanSummary
Browse files Browse the repository at this point in the history
When FossId identifies a file matching snippets, it is a pending file.
An operator needs to log to FossID UI and use the license of a snippet
or manually enter difference license information. Then the file is
marked as "identified".

Currently, the FossID scanner in ORT returns the list of all pending
files in `ScanSummary` issues, with a severity of `HINT`. This commit
maps the snippets of pending files using the newly-created snippet data
model.

The pending files are still listed as issues: This will be removed in a
future commit as it is a breaking change.

Signed-off-by: Nicolas Nobelis <nicolas.nobelis@bosch.io>
  • Loading branch information
nnobelis committed May 11, 2023
1 parent 9eda92f commit 7932272
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 4 deletions.
115 changes: 114 additions & 1 deletion scanner/src/main/kotlin/scanners/fossid/FossId.kt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ import kotlin.time.Duration.Companion.minutes
import kotlin.time.Duration.Companion.seconds
import kotlin.time.measureTimedValue

import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.delay
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withTimeoutOrNull
Expand All @@ -48,6 +51,7 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles
import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles
import org.ossreviewtoolkit.clients.fossid.listPendingFiles
import org.ossreviewtoolkit.clients.fossid.listScansForProject
import org.ossreviewtoolkit.clients.fossid.listSnippets
import org.ossreviewtoolkit.clients.fossid.model.Project
import org.ossreviewtoolkit.clients.fossid.model.Scan
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope
Expand All @@ -56,28 +60,40 @@ import org.ossreviewtoolkit.clients.fossid.model.status.DownloadStatus
import org.ossreviewtoolkit.clients.fossid.model.status.ScanStatus
import org.ossreviewtoolkit.clients.fossid.runScan
import org.ossreviewtoolkit.downloader.VersionControlSystem
import org.ossreviewtoolkit.model.ArtifactProvenance
import org.ossreviewtoolkit.model.Hash
import org.ossreviewtoolkit.model.Issue
import org.ossreviewtoolkit.model.LicenseFinding
import org.ossreviewtoolkit.model.Package
import org.ossreviewtoolkit.model.PackageProvider
import org.ossreviewtoolkit.model.Provenance
import org.ossreviewtoolkit.model.RemoteArtifact
import org.ossreviewtoolkit.model.RepositoryProvenance
import org.ossreviewtoolkit.model.ScanResult
import org.ossreviewtoolkit.model.ScanSummary
import org.ossreviewtoolkit.model.ScannerDetails
import org.ossreviewtoolkit.model.Severity
import org.ossreviewtoolkit.model.TextLocation
import org.ossreviewtoolkit.model.UnknownProvenance
import org.ossreviewtoolkit.model.VcsType
import org.ossreviewtoolkit.model.config.DownloaderConfiguration
import org.ossreviewtoolkit.model.config.Options
import org.ossreviewtoolkit.model.config.ScannerConfiguration
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.model.utils.PurlType
import org.ossreviewtoolkit.model.utils.Snippet
import org.ossreviewtoolkit.model.utils.SnippetFinding
import org.ossreviewtoolkit.scanner.AbstractScannerWrapperFactory
import org.ossreviewtoolkit.scanner.PackageScannerWrapper
import org.ossreviewtoolkit.scanner.ProvenanceScannerWrapper
import org.ossreviewtoolkit.scanner.ScanContext
import org.ossreviewtoolkit.scanner.ScannerCriteria
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.enumSetOf
import org.ossreviewtoolkit.utils.common.replaceCredentialsInUri
import org.ossreviewtoolkit.utils.ort.showStackTrace
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
import org.ossreviewtoolkit.utils.spdx.toSpdx

/**
* A wrapper for [FossID](https://fossid.com/).
Expand Down Expand Up @@ -746,7 +762,23 @@ class FossId internal constructor(
"${pendingFiles.size} pending files have been returned for scan '$scanCode'."
}

return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles)
val snippets = runBlocking(Dispatchers.IO) {
pendingFiles.map {
async {
logger.info { "Listing snippet for $it..." }
val snippetResponse = service.listSnippets(config.user, config.apiKey, scanCode, it)
.checkResponse("list snippets")
val snippets = requireNotNull(snippetResponse.data) {
"Snippet could not be listed. Response was ${snippetResponse.message}."
}
logger.info { "${snippets.size} snippets." }

it to snippets.toSet()
}
}.awaitAll().toMap()
}

return RawResults(identifiedFiles, markedAsIdentifiedFiles, listIgnoredFiles, pendingFiles, snippets)
}

/**
Expand All @@ -760,10 +792,61 @@ class FossId internal constructor(
scanId: String
): ScanResult {
// TODO: Maybe get issues from FossID (see has_failed_scan_files, get_failed_files and maybe get_scan_log).

// TODO: Deprecation: Remove the pending files in issues. This is a breaking change.
val issues = rawResults.listPendingFiles.mapTo(mutableListOf()) {
Issue(source = name, message = "Pending identification for '$it'.", severity = Severity.HINT)
}

val snippetFindings = mutableSetOf<SnippetFinding>()
val fakeLocation = TextLocation(".", TextLocation.UNKNOWN_LINE)
snippetFindings += rawResults.listSnippets.flatMap { (file, rawSnippets) ->
val snippets = rawSnippets.map {
val license = it.artifactLicense?.let {
runCatching {
LicenseFinding.createAndMap(
it,
fakeLocation,
detectedLicenseMapping = scannerConfig.detectedLicenseMapping
).license
}.onFailure { spdxException ->
issues += FossId.createAndLogIssue(
source = "FossId",
message = "Failed to parse license '$it' as an SPDX expression:" +
" ${spdxException.collectMessages()}"
)
}.getOrNull()
} ?: SpdxConstants.NOASSERTION.toSpdx()

// FossID does not return the hash of the remote artifact: it returns instead, in the property
// "match_file_id", the MD5 hash of the matched file IN the remote artifact.
val snippetProvenance = it.url?.let { url ->
ArtifactProvenance(RemoteArtifact(url, Hash.NONE))
} ?: UnknownProvenance
val purlType = it.url?.let { url -> urlToPackageType(url, issues)?.toString() } ?: "generic"

// TODO: FossId doesn't return the line numbers of the match, only the character range. One must use
// another call "getMatchedLine" to retrieve the matched line numbers. Unfortunately, this is a call
// per snippet which is too expensive. When it is available for a batch of snippets, it can be used
// here.
Snippet(
it.score.toFloat(),
TextLocation(it.file, TextLocation.UNKNOWN_LINE),
snippetProvenance,
"pkg:$purlType/${it.author}/${it.artifact}@${it.version}",
license
)
}

val sourceLocation = TextLocation(file, TextLocation.UNKNOWN_LINE)
snippets.map {
SnippetFinding(
sourceLocation,
it
)
}
}

val ignoredFiles = rawResults.listIgnoredFiles.associateBy { it.path }

val (licenseFindings, copyrightFindings) = rawResults.markedAsIdentifiedFiles.ifEmpty {
Expand All @@ -776,6 +859,7 @@ class FossId internal constructor(
packageVerificationCode = "",
licenseFindings = licenseFindings.toSortedSet(),
copyrightFindings = copyrightFindings.toSortedSet(),
snippetFindings = snippetFindings,
issues = issues
)

Expand All @@ -786,4 +870,33 @@ class FossId internal constructor(
mapOf(SCAN_CODE_KEY to scanCode, SCAN_ID_KEY to scanId, SERVER_URL_KEY to config.serverUrl)
)
}

/**
* Return the [PurlType] as determined from the given [url], or null if there is no match. An issue will be added to
* [issues] in this case.
*/
private fun urlToPackageType(url: String, issues: MutableList<Issue>): PurlType? =
when (val provider = PackageProvider.get(url)) {
PackageProvider.COCOAPODS -> PurlType.COCOAPODS
PackageProvider.CRATES_IO -> PurlType.CARGO
PackageProvider.DEBIAN -> PurlType.DEBIAN
PackageProvider.GITHUB -> PurlType.GITHUB
PackageProvider.GITLAB -> PurlType.GITLAB
PackageProvider.GOLANG -> PurlType.GOLANG
PackageProvider.MAVEN_CENTRAL, PackageProvider.MAVEN_GOOGLE -> PurlType.MAVEN
PackageProvider.NPM_JS -> PurlType.NPM
PackageProvider.NUGET -> PurlType.NUGET
PackageProvider.PACKAGIST -> PurlType.COMPOSER
PackageProvider.PYPI -> PurlType.PYPI
PackageProvider.RUBYGEMS -> PurlType.GEM
null -> null

else -> {
issues += FossId.createAndLogIssue(
source = "FossId",
message = "Cannot determine PURL type for url '$url' and provider '$provider'."
)
null
}
}
}
4 changes: 3 additions & 1 deletion scanner/src/main/kotlin/scanners/fossid/FossIdScanResults.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package org.ossreviewtoolkit.scanner.scanners.fossid
import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile
import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile
import org.ossreviewtoolkit.clients.fossid.model.result.Snippet
import org.ossreviewtoolkit.clients.fossid.model.summary.Summarizable
import org.ossreviewtoolkit.model.CopyrightFinding
import org.ossreviewtoolkit.model.Issue
Expand All @@ -37,7 +38,8 @@ internal data class RawResults(
val identifiedFiles: List<IdentifiedFile>,
val markedAsIdentifiedFiles: List<MarkedAsIdentifiedFile>,
val listIgnoredFiles: List<IgnoredFile>,
val listPendingFiles: List<String>
val listPendingFiles: List<String>,
val listSnippets: Map<String, Set<Snippet>>
)

/**
Expand Down
92 changes: 90 additions & 2 deletions scanner/src/test/kotlin/scanners/fossid/FossIdTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,16 @@ import org.ossreviewtoolkit.clients.fossid.listIgnoredFiles
import org.ossreviewtoolkit.clients.fossid.listMarkedAsIdentifiedFiles
import org.ossreviewtoolkit.clients.fossid.listPendingFiles
import org.ossreviewtoolkit.clients.fossid.listScansForProject
import org.ossreviewtoolkit.clients.fossid.listSnippets
import org.ossreviewtoolkit.clients.fossid.model.Scan
import org.ossreviewtoolkit.clients.fossid.model.identification.common.LicenseMatchType
import org.ossreviewtoolkit.clients.fossid.model.identification.identifiedFiles.IdentifiedFile
import org.ossreviewtoolkit.clients.fossid.model.identification.ignored.IgnoredFile
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.License
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.LicenseFile
import org.ossreviewtoolkit.clients.fossid.model.identification.markedAsIdentified.MarkedAsIdentifiedFile
import org.ossreviewtoolkit.clients.fossid.model.result.MatchType
import org.ossreviewtoolkit.clients.fossid.model.result.Snippet
import org.ossreviewtoolkit.clients.fossid.model.rules.IgnoreRule
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleScope
import org.ossreviewtoolkit.clients.fossid.model.rules.RuleType
Expand All @@ -78,23 +81,29 @@ import org.ossreviewtoolkit.clients.fossid.model.status.UnversionedScanDescripti
import org.ossreviewtoolkit.clients.fossid.runScan
import org.ossreviewtoolkit.downloader.VersionControlSystem
import org.ossreviewtoolkit.downloader.vcs.Git
import org.ossreviewtoolkit.model.ArtifactProvenance
import org.ossreviewtoolkit.model.CopyrightFinding
import org.ossreviewtoolkit.model.Hash
import org.ossreviewtoolkit.model.Identifier
import org.ossreviewtoolkit.model.Issue
import org.ossreviewtoolkit.model.LicenseFinding
import org.ossreviewtoolkit.model.Package
import org.ossreviewtoolkit.model.PackageType
import org.ossreviewtoolkit.model.RemoteArtifact
import org.ossreviewtoolkit.model.ScanResult
import org.ossreviewtoolkit.model.Severity
import org.ossreviewtoolkit.model.TextLocation
import org.ossreviewtoolkit.model.VcsInfo
import org.ossreviewtoolkit.model.VcsType
import org.ossreviewtoolkit.model.config.ScannerConfiguration
import org.ossreviewtoolkit.model.utils.Snippet as OrtSnippet
import org.ossreviewtoolkit.model.utils.SnippetFinding
import org.ossreviewtoolkit.scanner.ScanContext
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_CODE_KEY
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SCAN_ID_KEY
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.SERVER_URL_KEY
import org.ossreviewtoolkit.scanner.scanners.fossid.FossId.Companion.convertGitUrlToProjectName
import org.ossreviewtoolkit.utils.spdx.SpdxExpression

@Suppress("LargeClass")
class FossIdTest : WordSpec({
Expand Down Expand Up @@ -314,6 +323,7 @@ class FossIdTest : WordSpec({
summary.licenseFindings shouldContainExactlyInAnyOrder expectedLicenseFindings
}

// TODO: Deprecation: Remove the pending files in issues. This is a breaking change.
"report pending files as issues" {
val projectCode = projectCode(PROJECT)
val scanCode = scanCode(PROJECT, null)
Expand All @@ -328,7 +338,7 @@ class FossIdTest : WordSpec({
.expectCheckScanStatus(scanCode, ScanStatus.FINISHED)
.expectCreateScan(projectCode, scanCode, vcsInfo, "")
.expectDownload(scanCode)
.mockFiles(scanCode, pendingRange = 4..5)
.mockFiles(scanCode, pendingRange = 4..5, snippetRange = 1..5)

val fossId = createFossId(config)

Expand All @@ -341,6 +351,34 @@ class FossIdTest : WordSpec({
summary.issues.map { it.copy(timestamp = Instant.EPOCH) } shouldBe expectedIssues
}

"report pending files as snippets" {
val projectCode = projectCode(PROJECT)
val scanCode = scanCode(PROJECT, null)
val config = createConfig(deltaScans = false)
val vcsInfo = createVcsInfo()
val scan = createScan(vcsInfo.url, "${vcsInfo.revision}_other", scanCode)
val pkgId = createIdentifier(index = 42)

FossIdRestService.create(config.serverUrl)
.expectProjectRequest(projectCode)
.expectListScans(projectCode, listOf(scan))
.expectCheckScanStatus(scanCode, ScanStatus.FINISHED)
.expectCreateScan(projectCode, scanCode, vcsInfo, "")
.expectDownload(scanCode)
.mockFiles(scanCode, pendingRange = 1..5, snippetRange = 1..5)

val fossId = createFossId(config)

val summary = fossId.scan(createPackage(pkgId, vcsInfo)).summary

val expectedPendingFile = (1..5).map(::createPendingFile).toSet()
val expectedSnippetFindings = (1..5).map(::createSnippetFindings).flatten()

summary.snippetFindings shouldHaveSize expectedPendingFile.size * 5
summary.snippetFindings.map { it.sourceLocation.path }.toSet() shouldBe expectedPendingFile
summary.snippetFindings shouldBe expectedSnippetFindings
}

"create a new project if none exists yet" {
val projectCode = projectCode(PROJECT)
val scanCode = scanCode(PROJECT, null)
Expand Down Expand Up @@ -1238,6 +1276,52 @@ private fun createIgnoredFile(index: Int): IgnoredFile =
*/
private fun createPendingFile(index: Int): String = "/pending/file/$index"

/**
* Generate a FossID snippet based on the given [index].
*/
private fun createSnippet(index: Int): Snippet = Snippet(
index,
"created$index",
index,
index,
index,
MatchType.PARTIAL,
"reason$index",
"author$index",
"artifact$index",
"version$index",
"MIT",
"releaseDate$index",
"mirror$index",
"file$index",
"fileLicense$index",
"url$index",
"hits$index",
index,
"updated$index",
"cpe$index",
"$index",
"matchField$index",
"classification$index",
"highlighting$index"
)

/**
* Generate a ORT snippet finding based on the given [index].
*/
private fun createSnippetFindings(index: Int): Set<SnippetFinding> = (1..5).map { snippetIndex ->
SnippetFinding(
TextLocation("/pending/file/$index", TextLocation.UNKNOWN_LINE),
OrtSnippet(
snippetIndex.toFloat(),
TextLocation("file$snippetIndex", TextLocation.UNKNOWN_LINE),
ArtifactProvenance(RemoteArtifact("url$snippetIndex", Hash.NONE)),
"pkg:generic/author$snippetIndex/artifact$snippetIndex@version$snippetIndex",
SpdxExpression.Companion.parse("MIT")
)
)
}.toSet()

/**
* Prepare this service mock to answer a request for a project with the given [projectCode]. Return a response with
* the given [status] and [error].
Expand Down Expand Up @@ -1348,12 +1432,14 @@ private fun FossIdServiceWithVersion.mockFiles(
identifiedRange: IntRange = IntRange.EMPTY,
markedRange: IntRange = IntRange.EMPTY,
ignoredRange: IntRange = IntRange.EMPTY,
pendingRange: IntRange = IntRange.EMPTY
pendingRange: IntRange = IntRange.EMPTY,
snippetRange: IntRange = IntRange.EMPTY
): FossIdServiceWithVersion {
val identifiedFiles = identifiedRange.map(::createIdentifiedFile)
val markedFiles = markedRange.map(::createMarkedIdentifiedFile)
val ignoredFiles = ignoredRange.map(::createIgnoredFile)
val pendingFiles = pendingRange.map(::createPendingFile)
val snippets = snippetRange.map(::createSnippet)

coEvery { listIdentifiedFiles(USER, API_KEY, scanCode) } returns
PolymorphicResponseBody(
Expand All @@ -1367,6 +1453,8 @@ private fun FossIdServiceWithVersion.mockFiles(
PolymorphicResponseBody(status = 1, data = PolymorphicList(ignoredFiles))
coEvery { listPendingFiles(USER, API_KEY, scanCode) } returns
PolymorphicResponseBody(status = 1, data = PolymorphicList(pendingFiles))
coEvery { listSnippets(USER, API_KEY, scanCode, any()) } returns
PolymorphicResponseBody(status = 1, data = PolymorphicList(snippets))

return this
}
Expand Down

0 comments on commit 7932272

Please sign in to comment.