Skip to content

Commit

Permalink
feat(scanner): Create and store file listings for each provenance
Browse files Browse the repository at this point in the history
Store the files as a compressed blob using `xz`, as this leads to better
results compared to `gzip` or `zip` while still being reasonably fast.
The compressed sizes of `JSON` and `YAML` files are similar. So, choose
`YAML` for better readability.

Signed-off-by: Frank Viernau <frank_viernau@epam.com>
  • Loading branch information
fviernau committed May 8, 2023
1 parent ad7e861 commit 21f4cbb
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,6 @@ data class FileListingStorageConfiguration(
)
}
}

fun FileListingStorageConfiguration?.ortDefault(): FileListingStorageConfiguration =
this ?: FileListingStorageConfiguration()
12 changes: 12 additions & 0 deletions scanner/src/main/kotlin/ScanController.kt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@ class ScanController(
fun getAllProvenances(): Set<KnownProvenance> =
nestedProvenances.values.flatMapTo(mutableSetOf()) { it.getProvenances() }

/**
* Return all provenances including sub-repositories associated with the identifiers of the packages they belong to.
*/
fun getIdsByProvenance(): Map<KnownProvenance, Set<Identifier>> =
buildMap<_, MutableSet<Identifier>> {
getNestedProvenancesByPackage().forEach { (pkg, nestedProvenance) ->
nestedProvenance.getProvenances().forEach { provenance ->
getOrPut(provenance) { mutableSetOf() } += pkg.id
}
}
}

/**
* Get all provenances for which no scan result for the provided [scanner] is available.
*/
Expand Down
48 changes: 47 additions & 1 deletion scanner/src/main/kotlin/Scanner.kt
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,22 @@ import org.ossreviewtoolkit.model.config.DownloaderConfiguration
import org.ossreviewtoolkit.model.config.Options
import org.ossreviewtoolkit.model.config.ScannerConfiguration
import org.ossreviewtoolkit.model.config.createFileArchiver
import org.ossreviewtoolkit.model.config.ortDefault
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.scanner.provenance.NestedProvenance
import org.ossreviewtoolkit.scanner.provenance.NestedProvenanceResolver
import org.ossreviewtoolkit.scanner.provenance.NestedProvenanceScanResult
import org.ossreviewtoolkit.scanner.provenance.PackageProvenanceResolver
import org.ossreviewtoolkit.scanner.provenance.ProvenanceDownloader
import org.ossreviewtoolkit.scanner.utils.FileListingResolver
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.safeDeleteRecursively
import org.ossreviewtoolkit.utils.ort.Environment
import org.ossreviewtoolkit.utils.ort.showStackTrace

const val TOOL_NAME = "scanner"

@Suppress("TooManyFunctions")
class Scanner(
val scannerConfig: ScannerConfiguration,
val downloaderConfig: DownloaderConfiguration,
Expand Down Expand Up @@ -92,6 +95,11 @@ class Scanner(

private val archiver = scannerConfig.archive.createFileArchiver()

private val fileListingResolver = FileListingResolver(
storage = scannerConfig.fileListingStorage.ortDefault().createStorage(),
provenanceDownloader = provenanceDownloader
)

suspend fun scan(ortResult: OrtResult, skipExcluded: Boolean, labels: Map<String, String>): OrtResult {
val startTime = Instant.now()

Expand Down Expand Up @@ -163,6 +171,7 @@ class Scanner(
runProvenanceScanners(controller, context)
runPathScanners(controller, context)

createMissingFileListings(controller)
createMissingArchives(controller)

val results = controller.getNestedScanResultsByPackage().entries.associateTo(sortedMapOf()) {
Expand Down Expand Up @@ -192,7 +201,7 @@ class Scanner(
}.onFailure {
controller.addProvenanceResolutionIssue(
pkg.id,
Issue(source = TOOL_NAME, severity = Severity.ERROR, message = it.collectMessages())
Issue(source = TOOL_NAME, message = it.collectMessages())
)
}
}
Expand Down Expand Up @@ -577,6 +586,43 @@ class Scanner(
}
}

private suspend fun createMissingFileListings(controller: ScanController) {
val idsByProvenance = controller.getIdsByProvenance()
val provenancesMissingFileListings = idsByProvenance.keys.filterNot { fileListingResolver.has(it) }

logger.info { "Creating file listings for ${provenancesMissingFileListings.size} provenances." }

val duration = measureTime {
withContext(Dispatchers.IO) {
provenancesMissingFileListings.mapIndexed { index, provenance ->
async {
logger.info {
"Creating file listing for provenance $index of ${provenancesMissingFileListings.size}."
}

runCatching {
fileListingResolver.resolve(provenance)
}.onFailure {
idsByProvenance.getValue(provenance).forEach { id ->
controller.addIssue(
id,
Issue(
source = "Downloader",
message = "Could not create file listing for " +
"'${id.toCoordinates()}': ${it.collectMessages()}",
severity = Severity.ERROR
)
)
}
}
}
}.awaitAll()
}
}

logger.info { "Created file listing for ${provenancesMissingFileListings.size} provenances in $duration." }
}

private fun createMissingArchives(controller: ScanController) {
// TODO: The archives are currently created in a way compatible with the existing implementation in the
// PathScanner. This allows to keep using existing file archives without changing the logic used to
Expand Down
2 changes: 2 additions & 0 deletions scanner/src/main/kotlin/utils/FileListingResolver.kt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ internal class FileListingResolver(
createFileListing(dir).also { storage.writeFileListing(provenance, it) }
}
}

fun has(provenance: KnownProvenance): Boolean = storage.hasFile(provenance)
}

private fun ProvenanceFileStorage.writeFileListing(provenance: KnownProvenance, fileListing: FileListing) {
Expand Down

0 comments on commit 21f4cbb

Please sign in to comment.