Skip to content

Commit

Permalink
ScanCode: Gracefully handle underscores in license keys
Browse files Browse the repository at this point in the history
ScanCode may return SPDX license keys containing underscores characters
which is not allwed, see [1]. This results in ORT's scanner crashing
due to an SpdxException when it tries to parse the SPDX license key.

This issue has first occured in 2020 and been fixed by [2]. It got
re-introduced recently by [3].

Fix the issue based on the idea of the orignal fix [2]. The touched
function becomes less efficient. Delierately don't refactor for
efficiency because this fix can be reverted as soon as [3] is fixed.

[1] aboutcode-org/scancode-toolkit#2813
[2] fb0370f
[3] #4523

Signed-off-by: Frank Viernau <frank.viernau@here.com>
  • Loading branch information
fviernau committed Jan 25, 2022
1 parent def4677 commit 7960410
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ private fun getLicenseFindings(result: JsonNode, parseExpressions: Boolean): Lis
*/
private fun getSpdxLicenseId(license: JsonNode): String {
// There is a bug in ScanCode 3.0.2 that returns an empty string instead of null for licenses unknown to SPDX.
val id = license["spdx_license_key"].textValueOrEmpty()
val id = license["spdx_license_key"].textValueOrEmpty().replace('_', '-')

// For regular SPDX IDs, return early here.
if (id.isNotEmpty() && !id.startsWith(LICENSE_REF_PREFIX)) return id
Expand Down
26 changes: 16 additions & 10 deletions utils/spdx/src/main/kotlin/Utils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,23 @@ private fun getLicenseTextResource(id: String): URL? =

private val LICENSE_REF_FILENAME_REGEX by lazy { Regex("^LicenseRef-\\w+-") }

private fun getLicenseTextFile(id: String, dir: File): File? =
id.replace(LICENSE_REF_FILENAME_REGEX, "").let { idWithoutLicenseRefNamespace ->
sequenceOf(
id,
id.removePrefix("LicenseRef-"),
idWithoutLicenseRefNamespace,
"$idWithoutLicenseRefNamespace.LICENSE"
).mapNotNull { filename ->
dir.resolve(filename).takeIf { it.isFile }
}.firstOrNull()
private fun getLicenseTextFile(id: String, dir: File): File? {
val idWithoutLicenseRefNamespace = id.replace(LICENSE_REF_FILENAME_REGEX, "")
val candidateFilenames = setOf(
id,
id.removePrefix("LicenseRef-"),
idWithoutLicenseRefNamespace,
"$idWithoutLicenseRefNamespace.LICENSE"
)

dir.listFiles().filter { it.isFile }.forEach { file ->
// Normalize the filenames to work around https://github.com/nexB/scancode-toolkit/issues/2813.
val normalizedFilename = "${file.nameWithoutExtension.replace('_', '-')}.${file.extension}"
if (normalizedFilename in candidateFilenames) return file
}

return null
}

private fun addScanCodeLicenseTextsDir(licenseTextDirectories: List<File>): List<File> =
(listOfNotNull(scanCodeLicenseTextDir) + licenseTextDirectories).distinct()

0 comments on commit 7960410

Please sign in to comment.