Skip to content

Commit

Permalink
feat(scanner): Create and store file listings for each provenance
Browse files Browse the repository at this point in the history
Store the files as a compressed blob using `xz`, as this leads to better
results compared to `gzip` or `zip` while still being reasonably fast.
The compressed sizes of `JSON` and `YAML` files are similar. So, choose
`YAML` for better readability.

Signed-off-by: Frank Viernau <frank_viernau@epam.com>
  • Loading branch information
fviernau committed May 5, 2023
1 parent b7aac90 commit 01325dd
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,6 @@ data class FileListingStorageConfiguration(
)
}
}

fun FileListingStorageConfiguration?.ortDefault(): FileListingStorageConfiguration =
this ?: FileListingStorageConfiguration()
12 changes: 12 additions & 0 deletions scanner/src/main/kotlin/ScanController.kt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@ class ScanController(
fun getAllProvenances(): Set<KnownProvenance> =
nestedProvenances.values.flatMapTo(mutableSetOf()) { it.getProvenances() }

/**
* Return all provenances including sub-repositories associated with the identifiers of the packages they belong to.
*/
fun getIdsByProvenance(): Map<KnownProvenance, Set<Identifier>> =
buildMap<_, MutableSet<Identifier>> {
getNestedProvenancesByPackage().forEach { (pkg, nestedProvenance) ->
nestedProvenance.getProvenances().forEach { provenance ->
getOrPut(provenance) { mutableSetOf() } += pkg.id
}
}
}

/**
* Get all provenances for which no scan result for the provided [scanner] is available.
*/
Expand Down
45 changes: 45 additions & 0 deletions scanner/src/main/kotlin/Scanner.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package org.ossreviewtoolkit.scanner
import java.io.File
import java.nio.file.StandardCopyOption
import java.time.Instant
import java.util.concurrent.atomic.AtomicInteger

import kotlin.io.path.moveTo
import kotlin.time.measureTime
Expand Down Expand Up @@ -50,19 +51,22 @@ import org.ossreviewtoolkit.model.config.DownloaderConfiguration
import org.ossreviewtoolkit.model.config.Options
import org.ossreviewtoolkit.model.config.ScannerConfiguration
import org.ossreviewtoolkit.model.config.createFileArchiver
import org.ossreviewtoolkit.model.config.ortDefault
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.scanner.provenance.NestedProvenance
import org.ossreviewtoolkit.scanner.provenance.NestedProvenanceResolver
import org.ossreviewtoolkit.scanner.provenance.NestedProvenanceScanResult
import org.ossreviewtoolkit.scanner.provenance.PackageProvenanceResolver
import org.ossreviewtoolkit.scanner.provenance.ProvenanceDownloader
import org.ossreviewtoolkit.scanner.utils.FileListingResolver
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.safeDeleteRecursively
import org.ossreviewtoolkit.utils.ort.Environment
import org.ossreviewtoolkit.utils.ort.showStackTrace

const val TOOL_NAME = "scanner"

@Suppress("TooManyFunctions")
class Scanner(
val scannerConfig: ScannerConfiguration,
val downloaderConfig: DownloaderConfiguration,
Expand Down Expand Up @@ -92,6 +96,11 @@ class Scanner(

private val archiver = scannerConfig.archive.createFileArchiver()

private val fileListingResolver = FileListingResolver(
storage = scannerConfig.fileListingStorage.ortDefault().createStorage(),
provenanceDownloader = provenanceDownloader
)

suspend fun scan(ortResult: OrtResult, skipExcluded: Boolean, labels: Map<String, String>): OrtResult {
val startTime = Instant.now()

Expand Down Expand Up @@ -163,6 +172,7 @@ class Scanner(
runProvenanceScanners(controller, context)
runPathScanners(controller, context)

createMissingFileListings(controller)
createMissingArchives(controller)

val results = controller.getNestedScanResultsByPackage().entries.associateTo(sortedMapOf()) {
Expand Down Expand Up @@ -577,6 +587,41 @@ class Scanner(
}
}

private fun createMissingFileListings(controller: ScanController) {
val idsByProvenance = controller.getIdsByProvenance()
val provenancesMissingFileListings = idsByProvenance.keys.filterNot { fileListingResolver.has(it) }

logger.info { "Creating file listings for ${provenancesMissingFileListings.size} provenances." }

val duration = measureTime {
val previousIndex = AtomicInteger(0)

provenancesMissingFileListings.parallelStream().forEach { provenance ->
val index = previousIndex.incrementAndGet()

logger.info { "Creating file listing for provenance $index of ${provenancesMissingFileListings.size}." }

kotlin.runCatching {
fileListingResolver.resolve(provenance)
}.onFailure {
idsByProvenance.getValue(provenance).forEach { id ->
controller.addIssue(
id,
Issue(
source = "Downloader",
message = "Could not create file listing for " +
"'${id.toCoordinates()}': ${it.collectMessages()}",
severity = Severity.ERROR
)
)
}
}
}
}

logger.info("Created file listing for ${provenancesMissingFileListings.size} provenances in $duration.")
}

private fun createMissingArchives(controller: ScanController) {
// TODO: The archives are currently created in a way compatible with the existing implementation in the
// PathScanner. This allows to keep using existing file archives without changing the logic used to
Expand Down

0 comments on commit 01325dd

Please sign in to comment.