Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor license checker task to work with Apache Rat v0.16.1 #16121

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Bump `com.azure:azure-core-http-netty` from 1.15.3 to 1.15.4 ([#16133](https://github.com/opensearch-project/OpenSearch/pull/16133))
- Bump `org.jline:jline` from 3.26.3 to 3.27.0 ([#16135](https://github.com/opensearch-project/OpenSearch/pull/16135))
- Bump `netty` from 4.1.112.Final to 4.1.114.Final ([#16182](https://github.com/opensearch-project/OpenSearch/pull/16182))
- Bump `org.apache.rat:apache-rat'` from 0.15 to 0.16.1 ([#16121](https://github.com/opensearch-project/OpenSearch/pull/16121))

### Changed
- Add support for docker compose v2 in TestFixturesPlugin ([#16049](https://github.com/opensearch-project/OpenSearch/pull/16049))
Expand Down
2 changes: 1 addition & 1 deletion buildSrc/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ dependencies {
api 'com.netflix.nebula:gradle-extra-configurations-plugin:10.0.0'
api 'com.netflix.nebula:nebula-publishing-plugin:21.0.0'
api 'com.netflix.nebula:gradle-info-plugin:12.1.6'
api 'org.apache.rat:apache-rat:0.15'
api 'org.apache.rat:apache-rat:0.16.1'
api "commons-io:commons-io:${props.getProperty('commonsio')}"
api "net.java.dev.jna:jna:5.14.0"
api 'com.github.johnrengelman:shadow:8.1.1'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,36 @@
*/
package org.opensearch.gradle.precommit

import org.apache.rat.anttasks.Report
import org.apache.rat.anttasks.SubstringLicenseMatcher
import org.apache.rat.license.SimpleLicenseFamily
import org.apache.rat.Defaults
import org.apache.rat.ReportConfiguration
import org.apache.rat.Reporter
import org.apache.rat.analysis.matchers.SimpleTextMatcher
import org.apache.rat.anttasks.License
import org.apache.rat.anttasks.ResourceCollectionContainer
import org.apache.rat.license.ILicense
import org.apache.rat.license.ILicenseFamily
import org.apache.rat.license.ILicenseFamilyBuilder
import org.apache.rat.utils.DefaultLog
import org.apache.tools.ant.types.resources.FileResource
import org.apache.tools.ant.types.resources.Union
import org.gradle.api.tasks.OutputFile
import org.opensearch.gradle.AntTask
import org.gradle.api.file.FileCollection
import org.gradle.api.tasks.Input
import org.gradle.api.tasks.InputFiles
import org.gradle.api.tasks.IgnoreEmptyDirectories;
import org.gradle.api.tasks.OutputFile
import org.gradle.api.tasks.IgnoreEmptyDirectories
import org.gradle.api.tasks.PathSensitive
import org.gradle.api.tasks.PathSensitivity
import org.gradle.api.tasks.SkipWhenEmpty

import java.nio.file.Files

/**
* Checks files for license headers.
* <p>
* This is a port of the apache lucene check
*/
class LicenseHeadersTask extends AntTask {

@OutputFile
File reportFile = new File(project.buildDir, 'reports/licenseHeaders/rat.log')
File reportFile = new File(project.buildDir, 'reports/licenseHeaders/rat1.log')

/** Allowed license families for this project. */
@Input
Expand Down Expand Up @@ -87,6 +93,29 @@ class LicenseHeadersTask extends AntTask {
return project.sourceSets.collect({it.allJava})
}

/**
* Create license matcher from allowed/disallowed license list.
*
* @param licenseSettingsMap A map of license identifier and its associated data (family name, category and pattern)
*/
private static ILicense generateRatLicense(String licenseCategory, String licenseFamilyName, String pattern) {
SortedSet<ILicenseFamily> licenseCtx = new TreeSet<ILicenseFamily>()
var licenseFamilyBuilder = new ILicenseFamilyBuilder()
var licenseFamily = licenseFamilyBuilder.setLicenseFamilyCategory(licenseCategory)
.setLicenseFamilyName(licenseFamilyName)
.build()
licenseCtx.add(licenseFamily)

var license = new License()
license.setName(licenseFamily.getFamilyName())
license.setFamily(licenseFamily.getFamilyCategory())
license.add(new SimpleTextMatcher(pattern))

var configuredLicense = license.build(licenseCtx)

return configuredLicense
}

/**
Comment on lines +96 to 119
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This configuration should be done in the XML configuration file.

* Add a new license type.
*
Expand All @@ -105,73 +134,59 @@ class LicenseHeadersTask extends AntTask {

@Override
protected void runAnt(AntBuilder ant) {
ant.project.addTaskDefinition('ratReport', Report)
ant.project.addDataTypeDefinition('substringMatcher', SubstringLicenseMatcher)
ant.project.addDataTypeDefinition('approvedLicense', SimpleLicenseFamily)

Files.deleteIfExists(reportFile.toPath())

// run rat, going to the file
ant.ratReport(reportFile: reportFile.absolutePath, addDefaultLicenseMatchers: true) {
for (FileCollection dirSet : javaFiles) {
for (File dir: dirSet.srcDirs) {
// sometimes these dirs don't exist, e.g. site-plugin has no actual java src/main...
if (dir.exists()) {
ant.fileset(dir: dir, excludes: excludes.join(' '))
}
}
}

// BSD 4-clause stuff (is disallowed below)
// we keep this here, in case someone adds BSD code for some reason, it should never be allowed.
substringMatcher(licenseFamilyCategory: "BSD4 ",
licenseFamilyName: "Original BSD License (with advertising clause)") {
pattern(substring: "All advertising materials")
}

// Apache
substringMatcher(licenseFamilyCategory: "AL ",
licenseFamilyName: "Apache") {
// Apache license (ES)
pattern(substring: "Licensed to Elasticsearch under one or more contributor")
}

List<ILicense> approvedLicenses = List.of(
// Apache 2
generateRatLicense("AL2", "Apache License Version 2.0", "Licensed to the Apache Software Foundation (ASF)"),
// Generated code from Protocol Buffer compiler
generateRatLicense("GEN", "Generated", "Generated by the protocol buffer compiler. DO NOT EDIT!"),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed this in my example but you should be able to add it to the configuration file fairly easily following the example I have there.

Please submit a bug report to Rat indicating that the string "Generated by the protocol buffer compiler. DO NOT EDIT!" should be added to the list of generated text tags.

// Apache (ES)
generateRatLicense("AL", "Apache", "Licensed to Elasticsearch under one or more contributor"),
// SPDX
substringMatcher(licenseFamilyCategory: "SPDX ",
licenseFamilyName: "SPDX") {
// Apache license (OpenSearch)
pattern(substring: "SPDX-License-Identifier: Apache-2.0")
pattern(substring: "Copyright OpenSearch Contributors.")
}

generateRatLicense("SPDX", "SPDX", "SPDX-License-Identifier: Apache-2.0"),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is covered in the standard AL license definition. See the configuration file in my long comment for an example of how this is done.

// SPDX: Apache license (OpenSearch)
generateRatLicense("SPDX-ES", "SPDX", "Copyright OpenSearch Contributors."),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This license makes no sense. It is not a license but a copyright notice. Just placing the copyright in the file is not enough to make it a license.

// Generated resources
substringMatcher(licenseFamilyCategory: "GEN ",
licenseFamilyName: "Generated") {
// parsers generated by antlr
pattern(substring: "ANTLR GENERATED CODE")
}

// Vendored Code
substringMatcher(licenseFamilyCategory: "VEN ",
licenseFamilyName: "Vendored") {
pattern(substring: "@notice")
}

// license types added by the project
for (Map.Entry<String, String[]> additional : additionalLicenses.entrySet()) {
String category = additional.getKey().substring(0, 5)
String family = additional.getKey().substring(5)
substringMatcher(licenseFamilyCategory: category,
licenseFamilyName: family) {
pattern(substring: additional.getValue())
}
}
generateRatLicense("GEN", "Generated", "ANTLR GENERATED CODE"),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one I have in the example. The protocol buffer generated code string should go into the same license.

// Vendored code
generateRatLicense("VEN", "Vendored", "@noticed"),
)

// Currently Apache Rat doesn't display header for source code file with negative matching
// like BSD4
// Source: https://github.com/apache/creadur-rat/blob/apache-rat-project-0.16.1/apache-rat-core/src/main/resources/org/apache/rat/plain-rat.xsl#L85-L87)
// Uncomment and integrate the negative matcher for BSD4 once Rat supports
// List<ILicense> disapprovedLicenses = List.of(
// // BSD 4-clause stuff (is disallowed below)
// // we keep this here, in case someone adds BSD code for some reason, it should never be allowed.
// generateRatLicense("BSD4", "Original BSD License (with advertising clause)", "All advertising materials"),
// )

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above comment is incorrect. There are two ways to do this.

  1. Add BSD4 to the configuration file and DO NOT add it to the list of approved licenses.
  2. explicitly call the configuraiton option andremove the id "BSD4" from the list of approved licenses.

ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably don't want to log to the default log (std out). So create a wrapper around the Gradle logger that implements the Rat Log interface. See the example code I provided in the long comment.

configuration.setOut(reportFile)
configuration.setStyleSheet(Defaults.getPlainStyleSheet())
configuration.addLicensesIfNotPresent(approvedLicenses)
configuration.addApprovedLicenseCategories(approvedLicenses.stream().map(l -> l.getLicenseFamily().getFamilyCategory()).toList())

// License types added by the project
for (Map.Entry<String, String> additional : additionalLicenses.entrySet()) {
String category = additional.getKey().substring(0, 5)
String family = additional.getKey().substring(5)
configuration.addLicense(generateRatLicense(
category,
family,
additional.getValue(),
))
configuration.addApprovedLicenseCategory(category)
}

// approved categories
for (String licenseFamily : approvedLicenses) {
approvedLicense(familyName: licenseFamily)
Union union = new Union()
for (FileCollection dirSet : javaFiles) {
for (File file: dirSet) {
union.add(new FileResource(file))
}
}
configuration.setReportable(new ResourceCollectionContainer(union))
Reporter.report(configuration)

// check the license file for any errors, this should be fast.
boolean zeroUnknownLicenses = false
Expand All @@ -180,7 +195,6 @@ class LicenseHeadersTask extends AntTask {
if (line.startsWith("0 Unknown Licenses")) {
zeroUnknownLicenses = true
}

if (line.startsWith(" !")) {
foundProblemsWithFiles = true
}
Expand Down
Loading