Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scanner: Create and store file listings for each resolved provenance #6970

Merged
merged 6 commits into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions integrations/schemas/ort-configuration-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@
"detectedLicenseMapping": {
"$ref": "#/definitions/DetectedLicenseMapping"
},
"fileListingStorage": {
"$ref": "#/definitions/FileListingStorage"
},
"options": {
"$ref": "#/definitions/ScannerOptions"
},
Expand Down Expand Up @@ -336,6 +339,18 @@
}
}
},
"FileListingStorage": {
"type": "object",
"additionalProperties": false,
"properties": {
"fileStorage": {
"$ref": "#/definitions/FileStorage"
},
"postgresStorage": {
"$ref": "#/definitions/PostgresConfig"
}
}
},
"FileStorage": {
"type": "object",
"additionalProperties": false,
Expand Down
75 changes: 75 additions & 0 deletions model/src/main/kotlin/config/FileListingStorageConfiguration.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
fviernau marked this conversation as resolved.
Show resolved Hide resolved
* Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.model.config

import org.apache.logging.log4j.kotlin.Logging

import org.ossreviewtoolkit.model.utils.DatabaseUtils
import org.ossreviewtoolkit.model.utils.FileProvenanceFileStorage
import org.ossreviewtoolkit.model.utils.PostgresProvenanceFileStorage
import org.ossreviewtoolkit.model.utils.ProvenanceFileStorage
import org.ossreviewtoolkit.utils.ort.ortDataDirectory
import org.ossreviewtoolkit.utils.ort.storage.FileStorage
import org.ossreviewtoolkit.utils.ort.storage.LocalFileStorage

private const val TABLE_NAME = "provenance_file_listings"
private const val FILENAME = "file_listings.xz"

data class FileListingStorageConfiguration(
/**
* Configuration of the [FileStorage] used for storing the file listings.
*/
val fileStorage: FileStorageConfiguration? = null,

/**
* Configuration of the [PostgresProvenanceFileStorage] used for storing the file listings.
*/
val postgresStorage: PostgresStorageConfiguration? = null
) {
private companion object : Logging

init {
if (fileStorage != null && postgresStorage != null) {
FileListingStorageConfiguration.logger.warn {
"'fileStorage' and 'postgresStorage' are both configured but only one storage can be used. " +
"Using 'fileStorage'."
}
mnonnenmacher marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

fun FileListingStorageConfiguration?.createStorage(): ProvenanceFileStorage =
when {
this?.fileStorage != null -> FileProvenanceFileStorage(
storage = fileStorage.createFileStorage(),
filename = FILENAME
)
this?.postgresStorage != null -> PostgresProvenanceFileStorage(
dataSource = DatabaseUtils.createHikariDataSource(
config = postgresStorage.connection,
applicationNameSuffix = "file-listings"
),
tableName = TABLE_NAME
)
else -> FileProvenanceFileStorage(
storage = LocalFileStorage(ortDataDirectory.resolve("scanner/file-listings")),
filename = FILENAME
)
}
5 changes: 5 additions & 0 deletions model/src/main/kotlin/config/ScannerConfiguration.kt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ data class ScannerConfiguration(
"LicenseRef-scancode-unknown-spdx" to SpdxConstants.NOASSERTION
),

/**
* The storage to store the file listings by provenance.
*/
val fileListingStorage: FileListingStorageConfiguration? = null,

/**
* Scanner specific configuration options. The key needs to match the name of the scanner class, e.g. "ScanCode"
* for the ScanCode wrapper. See the documentation of the scanner for available options.
Expand Down
18 changes: 18 additions & 0 deletions model/src/main/resources/reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,24 @@ ort:
BSD (Three Clause License): 'BSD-3-clause'
LicenseRef-scancode-generic-cla: 'NOASSERTION'

fileListingStorage:
fileStorage:
localFileStorage:
directory: ~/.ort/scanner/provenance-file-listings
compression: false

postgresStorage:
connection:
url: 'jdbc:postgresql://your-postgresql-server:5444/your-database'
schema: public
username: username
password: password
sslmode: required
sslcert: /defaultdir/postgresql.crt
sslkey: /defaultdir/postgresql.pk8
sslrootcert: /defaultdir/root.crt
parallelTransactions: 5

options:
# A map of maps from scanner class names to scanner-specific key-value pairs.
ScanCode:
Expand Down
24 changes: 24 additions & 0 deletions model/src/test/kotlin/config/OrtConfigurationTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,30 @@ class OrtConfigurationTest : WordSpec({
"LicenseRef-scancode-generic-cla" to "NOASSERTION"
)

fileListingStorage shouldNotBeNull {
fileStorage shouldNotBeNull {
httpFileStorage should beNull()
localFileStorage shouldNotBeNull {
directory shouldBe File("~/.ort/scanner/provenance-file-listings")
}
}

postgresStorage shouldNotBeNull {
with(connection) {
url shouldBe "jdbc:postgresql://your-postgresql-server:5444/your-database"
schema shouldBe "public"
username shouldBe "username"
password shouldBe "password"
sslmode shouldBe "required"
sslcert shouldBe "/defaultdir/postgresql.crt"
sslkey shouldBe "/defaultdir/postgresql.pk8"
sslrootcert shouldBe "/defaultdir/root.crt"
}

type shouldBe StorageType.PROVENANCE_BASED
}
}

options shouldNotBeNull {
get("ScanCode") shouldNotBeNull {
this shouldContainExactly mapOf(
Expand Down
64 changes: 64 additions & 0 deletions scanner/src/main/kotlin/FileListing.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
fviernau marked this conversation as resolved.
Show resolved Hide resolved
* Copyright (C) 2023 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.scanner

import com.fasterxml.jackson.databind.annotation.JsonSerialize
import com.fasterxml.jackson.databind.util.StdConverter

import java.util.SortedSet

import org.ossreviewtoolkit.scanner.FileListing.FileEntry
import org.ossreviewtoolkit.utils.common.StringSortedSetConverter
import org.ossreviewtoolkit.utils.common.getDuplicates

/**
* The model to store a file listing for a resolved provenance.
*/
fviernau marked this conversation as resolved.
Show resolved Hide resolved
internal data class FileListing(
/**
* The set of glob expressions which have been used to match directories to be excluded from the file listing.
*/
@JsonSerialize(converter = StringSortedSetConverter::class)
val ignorePatterns: Set<String>,

/**
* The set of files contained in the resolved provenance, excluding files which are within a directory ignored by
* [ignorePatterns].
*/
@JsonSerialize(converter = FileEntrySortedSetConverter::class)
fviernau marked this conversation as resolved.
Show resolved Hide resolved
val files: Set<FileEntry>
) {
data class FileEntry constructor(
val path: String,
val sha1: String
)

init {
val duplicates = files.getDuplicates { it.path }.keys

require(duplicates.isEmpty()) {
"The file listing contains duplicate paths which is not allowed: ${duplicates.joinToString()}."
}
}
fviernau marked this conversation as resolved.
Show resolved Hide resolved
}

private class FileEntrySortedSetConverter : StdConverter<Set<FileEntry>, SortedSet<FileEntry>>() {
override fun convert(value: Set<FileEntry>) = value.toSortedSet(compareBy { it.path })
}
12 changes: 12 additions & 0 deletions scanner/src/main/kotlin/ScanController.kt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@ class ScanController(
fun getAllProvenances(): Set<KnownProvenance> =
nestedProvenances.values.flatMapTo(mutableSetOf()) { it.getProvenances() }

/**
* Return all provenances including sub-repositories associated with the identifiers of the packages they belong to.
*/
fun getIdsByProvenance(): Map<KnownProvenance, Set<Identifier>> =
buildMap<_, MutableSet<Identifier>> {
getNestedProvenancesByPackage().forEach { (pkg, nestedProvenance) ->
nestedProvenance.getProvenances().forEach { provenance ->
getOrPut(provenance) { mutableSetOf() } += pkg.id
}
}
}
mnonnenmacher marked this conversation as resolved.
Show resolved Hide resolved

/**
* Get all provenances for which no scan result for the provided [scanner] is available.
*/
Expand Down
52 changes: 47 additions & 5 deletions scanner/src/main/kotlin/Scanner.kt
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,26 @@ import org.ossreviewtoolkit.model.RepositoryProvenance
import org.ossreviewtoolkit.model.ScanResult
import org.ossreviewtoolkit.model.ScanSummary
import org.ossreviewtoolkit.model.ScannerRun
import org.ossreviewtoolkit.model.Severity
import org.ossreviewtoolkit.model.config.DownloaderConfiguration
import org.ossreviewtoolkit.model.config.Options
import org.ossreviewtoolkit.model.config.ScannerConfiguration
import org.ossreviewtoolkit.model.config.createFileArchiver
import org.ossreviewtoolkit.model.config.createStorage
import org.ossreviewtoolkit.model.createAndLogIssue
import org.ossreviewtoolkit.scanner.provenance.NestedProvenance
import org.ossreviewtoolkit.scanner.provenance.NestedProvenanceResolver
import org.ossreviewtoolkit.scanner.provenance.NestedProvenanceScanResult
import org.ossreviewtoolkit.scanner.provenance.PackageProvenanceResolver
import org.ossreviewtoolkit.scanner.provenance.ProvenanceDownloader
import org.ossreviewtoolkit.scanner.utils.FileListingResolver
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.safeDeleteRecursively
import org.ossreviewtoolkit.utils.ort.Environment
import org.ossreviewtoolkit.utils.ort.showStackTrace

const val TOOL_NAME = "scanner"

@Suppress("TooManyFunctions")
class Scanner(
val scannerConfig: ScannerConfiguration,
val downloaderConfig: DownloaderConfiguration,
Expand Down Expand Up @@ -92,6 +94,11 @@ class Scanner(

private val archiver = scannerConfig.archive.createFileArchiver()

private val fileListingResolver = FileListingResolver(
storage = scannerConfig.fileListingStorage.createStorage(),
provenanceDownloader = provenanceDownloader
)

suspend fun scan(ortResult: OrtResult, skipExcluded: Boolean, labels: Map<String, String>): OrtResult {
val startTime = Instant.now()

Expand Down Expand Up @@ -163,6 +170,7 @@ class Scanner(
runProvenanceScanners(controller, context)
runPathScanners(controller, context)

createMissingFileListings(controller)
createMissingArchives(controller)

val results = controller.getNestedScanResultsByPackage().entries.associateTo(sortedMapOf()) {
Expand Down Expand Up @@ -192,7 +200,7 @@ class Scanner(
}.onFailure {
controller.addProvenanceResolutionIssue(
pkg.id,
Issue(source = TOOL_NAME, severity = Severity.ERROR, message = it.collectMessages())
Issue(source = TOOL_NAME, message = it.collectMessages())
sschuberth marked this conversation as resolved.
Show resolved Hide resolved
)
}
}
Expand Down Expand Up @@ -222,7 +230,6 @@ class Scanner(
id,
Issue(
source = TOOL_NAME,
severity = Severity.ERROR,
message = "Could not resolve nested provenance for package " +
"'${id.toCoordinates()}': ${it.collectMessages()}"
)
Expand Down Expand Up @@ -577,6 +584,42 @@ class Scanner(
}
}

private suspend fun createMissingFileListings(controller: ScanController) {
val idsByProvenance = controller.getIdsByProvenance()
val provenancesMissingFileListings = idsByProvenance.keys.filterNot { fileListingResolver.has(it) }

logger.info { "Creating file listings for ${provenancesMissingFileListings.size} provenances." }

val duration = measureTime {
withContext(Dispatchers.IO) {
provenancesMissingFileListings.mapIndexed { index, provenance ->
async {
logger.info {
"Creating file listing for provenance $index of ${provenancesMissingFileListings.size}."
}

runCatching {
fileListingResolver.resolve(provenance)
}.onFailure {
idsByProvenance.getValue(provenance).forEach { id ->
controller.addIssue(
id,
Issue(
source = "Downloader",
message = "Could not create file listing for " +
"'${id.toCoordinates()}': ${it.collectMessages()}"
)
)
}
}
}
}.awaitAll()
}
}

logger.info { "Created file listing for ${provenancesMissingFileListings.size} provenances in $duration." }
}

private fun createMissingArchives(controller: ScanController) {
// TODO: The archives are currently created in a way compatible with the existing implementation in the
// PathScanner. This allows to keep using existing file archives without changing the logic used to
Expand Down Expand Up @@ -610,8 +653,7 @@ class Scanner(
Issue(
source = "Downloader",
message = "Could not create file archive for " +
"'${pkg.id.toCoordinates()}': ${it.collectMessages()}",
severity = Severity.ERROR
"'${pkg.id.toCoordinates()}': ${it.collectMessages()}"
)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import org.ossreviewtoolkit.utils.ort.createOrtTempDir
/**
* An interface that provides functionality to download source code.
*/
interface ProvenanceDownloader {
fun interface ProvenanceDownloader {
fviernau marked this conversation as resolved.
Show resolved Hide resolved
/**
* Download the source code specified by the provided [provenance] and return the path to the directory that
* contains the downloaded source code.
Expand Down
Loading