Skip to content

Commit

Permalink
feat(scanner): Add support for ScanCode output format version 3
Browse files Browse the repository at this point in the history
While in previous outputs formats the primary elements for license
entries were single license keys (which needed to be grouped to
expressions), starting with output format version 3 the primary license
entries are expressions.

Resolves #6617.

Signed-off-by: Sebastian Schuberth <sschuberth@gmail.com>
  • Loading branch information
sschuberth committed Sep 1, 2023
1 parent e35f94d commit 606bd5f
Show file tree
Hide file tree
Showing 4 changed files with 820 additions and 25 deletions.
69 changes: 55 additions & 14 deletions plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,66 @@ data class Options(
val input: List<String>
)

@Serializable
data class FileEntry(
val path: String,
val type: String,
val licenses: List<LicenseEntry>,
val copyrights: List<CopyrightEntry>,
sealed interface FileEntry {
val path: String
val type: String
val licenses: List<LicenseEntry>
val copyrights: List<CopyrightEntry>
val scanErrors: List<String>
)

@Serializable
data class Version1(
override val path: String,
override val type: String,
override val licenses: List<LicenseEntry>,
override val copyrights: List<CopyrightEntry>,
override val scanErrors: List<String>
) : FileEntry

@Serializable
data class Version3(
override val path: String,
override val type: String,
val licenseDetections: List<LicenseDetection>,
override val copyrights: List<CopyrightEntry>,
override val scanErrors: List<String>
) : FileEntry {
override val licenses = licenseDetections.flatMap { it.matches }
}
}

@Serializable
data class LicenseEntry(
val key: String,
val score: Float,
val spdxLicenseKey: String? = null, // This might be explicitly set to null in JSON.
val startLine: Int,
val endLine: Int,
val matchedRule: LicenseRule
data class LicenseDetection(
val matches: List<LicenseEntry>
)

sealed interface LicenseEntry {
val licenseExpression: String
val startLine: Int
val endLine: Int
val score: Float

@Serializable
data class Version1(
val key: String,
override val score: Float,
val spdxLicenseKey: String? = null, // This might be explicitly set to null in JSON.
override val startLine: Int,
override val endLine: Int,
val matchedRule: LicenseRule
) : LicenseEntry {
override val licenseExpression = matchedRule.licenseExpression
}

@Serializable
data class Version3(
override val score: Float,
override val startLine: Int,
override val endLine: Int,
override val licenseExpression: String
) : LicenseEntry
}

@Serializable
data class LicenseRule(
val licenseExpression: String
Expand Down
27 changes: 19 additions & 8 deletions plugins/scanners/scancode/src/main/kotlin/ScanCodeResultParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import org.ossreviewtoolkit.utils.spdx.toSpdxId

import org.semver4j.Semver

const val MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION = 2
const val MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION = 3

private val LICENSE_REF_PREFIX_SCAN_CODE = "${SpdxConstants.LICENSE_REF_PREFIX}${ScanCode.SCANNER_NAME.lowercase()}-"
private val TIMESTAMP_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HHmmss.n").withZone(ZoneId.of("UTC"))
Expand All @@ -65,10 +65,20 @@ fun parseResult(result: JsonElement): ScanCodeResult {
// Select the correct set of (de-)serializers bundled in a module for parsing the respective format version.
val module = when (outputFormatVersion?.major) {
null, 1 -> SerializersModule {
polymorphicDefaultDeserializer(FileEntry::class) { FileEntry.Version1.serializer() }
polymorphicDefaultDeserializer(LicenseEntry::class) { LicenseEntry.Version1.serializer() }
polymorphicDefaultDeserializer(CopyrightEntry::class) { CopyrightEntry.Version1.serializer() }
}

2 -> SerializersModule {
polymorphicDefaultDeserializer(FileEntry::class) { FileEntry.Version1.serializer() }
polymorphicDefaultDeserializer(LicenseEntry::class) { LicenseEntry.Version1.serializer() }
polymorphicDefaultDeserializer(CopyrightEntry::class) { CopyrightEntry.Version2.serializer() }
}

else -> SerializersModule {
polymorphicDefaultDeserializer(FileEntry::class) { FileEntry.Version3.serializer() }
polymorphicDefaultDeserializer(LicenseEntry::class) { LicenseEntry.Version3.serializer() }
polymorphicDefaultDeserializer(CopyrightEntry::class) { CopyrightEntry.Version2.serializer() }
}
}
Expand Down Expand Up @@ -119,25 +129,26 @@ fun ScanCodeResult.toScanSummary(): ScanSummary {
val filesOfTypeFile = files.filter { it.type == "file" }

// Build a map of all ScanCode license keys in the result associated with their corresponding SPDX ID.
val scanCodeKeyToSpdxIdMappings = files.flatMap { file ->
file.licenses.map { license ->
license.key to getSpdxId(license.spdxLicenseKey, license.key)
}
}.toMap()
val scanCodeKeyToSpdxIdMappings = licenseReferences?.associate { it.key to it.spdxLicenseKey }
?: files.flatMap { file ->
file.licenses.filterIsInstance<LicenseEntry.Version1>().map { license ->
license.key to getSpdxId(license.spdxLicenseKey, license.key)
}
}.toMap()

filesOfTypeFile.forEach { file ->
// ScanCode creates separate license entries for each license in an expression. Deduplicate these by grouping by
// the same expression.
val licenses = file.licenses.groupBy {
LicenseMatch(it.matchedRule.licenseExpression, it.startLine, it.endLine, it.score)
LicenseMatch(it.licenseExpression, it.startLine, it.endLine, it.score)
}.map {
// Arbitrarily take the first of the duplicate license entries.
it.value.first()
}

licenses.mapTo(licenseFindings) { license ->
// ScanCode uses its own license keys as identifiers in license expressions.
val spdxLicenseExpression = license.matchedRule.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)
val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)

LicenseFinding(
license = spdxLicenseExpression,
Expand Down
Loading

0 comments on commit 606bd5f

Please sign in to comment.