From b7295b79de63177d59576c1ea44d7d1ff61c5877 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Thu, 14 Apr 2022 15:07:23 -0400 Subject: [PATCH] Ensure SPDXIDs are valid (#955) --- internal/formats/common/spdxhelpers/spdxid.go | 13 ++++++ .../formats/common/spdxhelpers/spdxid_test.go | 39 ++++++++++++++++++ .../formats/spdx22json/model/element_id.go | 30 ++------------ .../formats/spdx22tagvalue/encoder_test.go | 29 ++++++++++++++ .../snapshot/TestSPDXJSONSPDXIDs.golden | 40 +++++++++++++++++++ .../formats/spdx22tagvalue/to_format_model.go | 2 +- 6 files changed, 125 insertions(+), 28 deletions(-) create mode 100644 internal/formats/common/spdxhelpers/spdxid.go create mode 100644 internal/formats/common/spdxhelpers/spdxid_test.go create mode 100644 internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden diff --git a/internal/formats/common/spdxhelpers/spdxid.go b/internal/formats/common/spdxhelpers/spdxid.go new file mode 100644 index 00000000000..f7a2180c98f --- /dev/null +++ b/internal/formats/common/spdxhelpers/spdxid.go @@ -0,0 +1,13 @@ +package spdxhelpers + +import ( + "regexp" +) + +var expr = regexp.MustCompile("[^a-zA-Z0-9.-]") + +// SPDX spec says SPDXID must be: +// "SPDXRef-"[idstring] where [idstring] is a unique string containing letters, numbers, ., and/or - +func SanitizeElementID(id string) string { + return expr.ReplaceAllString(id, "-") +} diff --git a/internal/formats/common/spdxhelpers/spdxid_test.go b/internal/formats/common/spdxhelpers/spdxid_test.go new file mode 100644 index 00000000000..b66ccd45ebd --- /dev/null +++ b/internal/formats/common/spdxhelpers/spdxid_test.go @@ -0,0 +1,39 @@ +package spdxhelpers + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_SanitizeElementID(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + input: "letters", + expected: "letters", + }, + { + input: "ssl-client", + expected: "ssl-client", + }, + { + input: "ssl_client", + expected: "ssl-client", + }, + { + input: "go-module-sigs.k8s.io/structured-merge-diff/v3", + expected: "go-module-sigs.k8s.io-structured-merge-diff-v3", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + actual := SanitizeElementID(test.input) + + assert.Equal(t, test.expected, actual) + }) + } +} diff --git a/internal/formats/spdx22json/model/element_id.go b/internal/formats/spdx22json/model/element_id.go index 50251fe37ce..0973ce4bd21 100644 --- a/internal/formats/spdx22json/model/element_id.go +++ b/internal/formats/spdx22json/model/element_id.go @@ -1,5 +1,7 @@ package model +import "github.com/anchore/syft/internal/formats/common/spdxhelpers" + // ElementID represents the identifier string portion of an SPDX element // identifier. DocElementID should be used for any attributes which can // contain identifiers defined in a different SPDX document. @@ -7,31 +9,5 @@ package model type ElementID string func (e ElementID) String() string { - return "SPDXRef-" + string(e) -} - -// DocElementID represents an SPDX element identifier that could be defined -// in a different SPDX document, and therefore could have a "DocumentRef-" -// portion, such as Relationship and Annotations. -// ElementID is used for attributes in which a "DocumentRef-" portion cannot -// appear, such as a Package or File definition (since it is necessarily -// being defined in the present document). -// DocumentRefID will be the empty string for elements defined in the -// present document. -// DocElementIDs should NOT contain the mandatory 'DocumentRef-' or -// 'SPDXRef-' portions. -type DocElementID struct { - DocumentRefID string - ElementRefID ElementID -} - -// RenderDocElementID takes a DocElementID and returns the string equivalent, -// with the SPDXRef- prefix (and, if applicable, the DocumentRef- prefix) -// reinserted. -func (d DocElementID) String() string { - prefix := "" - if d.DocumentRefID != "" { - prefix = "DocumentRef-" + d.DocumentRefID + ":" - } - return prefix + d.ElementRefID.String() + return "SPDXRef-" + spdxhelpers.SanitizeElementID(string(e)) } diff --git a/internal/formats/spdx22tagvalue/encoder_test.go b/internal/formats/spdx22tagvalue/encoder_test.go index af8dd500a75..9c3dba9e994 100644 --- a/internal/formats/spdx22tagvalue/encoder_test.go +++ b/internal/formats/spdx22tagvalue/encoder_test.go @@ -6,6 +6,9 @@ import ( "testing" "github.com/anchore/syft/internal/formats/common/testutils" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" ) var updateSpdxTagValue = flag.Bool("update-spdx-tv", false, "update the *.golden files for spdx-tv encoders") @@ -31,6 +34,32 @@ func TestSPDXTagValueImageEncoder(t *testing.T) { ) } +func TestSPDXJSONSPDXIDs(t *testing.T) { + var pkgs []pkg.Package + for _, name := range []string{"some/slashes", "@at-sign", "under_scores"} { + p := pkg.Package{ + Name: name, + } + p.SetID() + pkgs = append(pkgs, p) + } + testutils.AssertEncoderAgainstGoldenSnapshot(t, + Format(), + sbom.SBOM{ + Artifacts: sbom.Artifacts{ + PackageCatalog: pkg.NewCatalog(pkgs...), + }, + Relationships: nil, + Source: source.Metadata{ + Scheme: source.DirectoryScheme, + }, + Descriptor: sbom.Descriptor{}, + }, + true, + spdxTagValueRedactor, + ) +} + func spdxTagValueRedactor(s []byte) []byte { // each SBOM reports the time it was generated, which is not useful during snapshot testing s = regexp.MustCompile(`Created: .*`).ReplaceAll(s, []byte("redacted")) diff --git a/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden b/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden new file mode 100644 index 00000000000..fb675caf785 --- /dev/null +++ b/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden @@ -0,0 +1,40 @@ +SPDXVersion: SPDX-2.2 +DataLicense: CC0-1.0 +SPDXID: SPDXRef-DOCUMENT +DocumentName: . +DocumentNamespace: https://anchore.com/syft/dir/e69056a9-935e-4f00-b85f-9467f5d99a92 +LicenseListVersion: 3.16 +Creator: Organization: Anchore, Inc +Creator: Tool: syft-[not provided] +Created: 2022-04-13T16:38:03Z + +##### Package: @at-sign + +PackageName: @at-sign +SPDXID: SPDXRef-Package---at-sign-739e4f0d93fb8298 +PackageDownloadLocation: NOASSERTION +FilesAnalyzed: false +PackageLicenseConcluded: NONE +PackageLicenseDeclared: NONE +PackageCopyrightText: NOASSERTION + +##### Package: some/slashes + +PackageName: some/slashes +SPDXID: SPDXRef-Package--some-slashes-26db06648b24bff9 +PackageDownloadLocation: NOASSERTION +FilesAnalyzed: false +PackageLicenseConcluded: NONE +PackageLicenseDeclared: NONE +PackageCopyrightText: NOASSERTION + +##### Package: under_scores + +PackageName: under_scores +SPDXID: SPDXRef-Package--under-scores-250cbfefcdea318b +PackageDownloadLocation: NOASSERTION +FilesAnalyzed: false +PackageLicenseConcluded: NONE +PackageLicenseDeclared: NONE +PackageCopyrightText: NOASSERTION + diff --git a/internal/formats/spdx22tagvalue/to_format_model.go b/internal/formats/spdx22tagvalue/to_format_model.go index 2f0d84ba726..b189f977dba 100644 --- a/internal/formats/spdx22tagvalue/to_format_model.go +++ b/internal/formats/spdx22tagvalue/to_format_model.go @@ -96,7 +96,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2 for _, p := range catalog.Sorted() { // name should be guaranteed to be unique, but semantically useful and stable - id := fmt.Sprintf("Package-%+v-%s-%s", p.Type, p.Name, p.ID()) + id := spdxhelpers.SanitizeElementID(fmt.Sprintf("Package-%+v-%s-%s", p.Type, p.Name, p.ID())) // If the Concluded License is not the same as the Declared License, a written explanation should be provided // in the Comments on License field (section 3.16). With respect to NOASSERTION, a written explanation in