Skip to content

Commit

Permalink
Add digest property to parent and nested java package metadata (#941)
Browse files Browse the repository at this point in the history
  • Loading branch information
spiffcs authored Apr 8, 2022
1 parent e415bb2 commit 782b2e3
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 24 deletions.
32 changes: 31 additions & 1 deletion internal/formats/common/cyclonedxhelpers/external_references.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import (
"fmt"
"strings"

syftFile "github.com/anchore/syft/syft/file"

"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/syft/syft/pkg"
)

//nolint:funlen, gocognit
func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
refs := []cyclonedx.ExternalReference{}
var refs []cyclonedx.ExternalReference
if hasMetadata(p) {
switch metadata := p.Metadata.(type) {
case pkg.ApkMetadata:
Expand Down Expand Up @@ -46,6 +49,19 @@ func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
Type: cyclonedx.ERTypeWebsite,
})
}
case pkg.JavaMetadata:
if len(metadata.ArchiveDigests) > 0 {
for _, digest := range metadata.ArchiveDigests {
refs = append(refs, cyclonedx.ExternalReference{
URL: "",
Type: cyclonedx.ERTypeBuildMeta,
Hashes: &[]cyclonedx.Hash{{
Algorithm: cyclonedx.HashAlgorithm(digest.Algorithm),
Value: digest.Value,
}},
})
}
}
case pkg.PythonPackageMetadata:
if metadata.DirectURLOrigin != nil && metadata.DirectURLOrigin.URL != "" {
ref := cyclonedx.ExternalReference{
Expand Down Expand Up @@ -79,6 +95,20 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) {
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.GemMetadata:
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.JavaMetadata:
var digests []syftFile.Digest
if ref := findExternalRef(c, cyclonedx.ERTypeBuildMeta); ref != nil {
if ref.Hashes != nil {
for _, hash := range *ref.Hashes {
digests = append(digests, syftFile.Digest{
Algorithm: string(hash.Algorithm),
Value: hash.Value,
})
}
}
}

meta.ArchiveDigests = digests
case *pkg.PythonPackageMetadata:
if meta.DirectURLOrigin == nil {
meta.DirectURLOrigin = &pkg.PythonDirectURLOriginInfo{}
Expand Down
1 change: 1 addition & 0 deletions internal/formats/common/spdxhelpers/external_refs.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ func ExternalRefs(p pkg.Package) (externalRefs []ExternalRef) {
ReferenceType: PurlExternalRefType,
})
}

return externalRefs
}
8 changes: 8 additions & 0 deletions internal/formats/common/spdxhelpers/to_syft_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,14 @@ func extractMetadata(p *spdx.Package2_2, info pkgInfo) (pkg.MetadataType, interf
Architecture: arch,
Maintainer: p.PackageOriginatorPerson,
}
case pkg.JavaPkg:
var digests []file.Digest
for algorithm, value := range p.PackageChecksums {
digests = append(digests, file.Digest{Algorithm: string(algorithm), Value: value.Value})
}
return pkg.JavaMetadataType, pkg.JavaMetadata{
ArchiveDigests: digests,
}
}
return pkg.UnknownMetadataType, nil
}
Expand Down
22 changes: 20 additions & 2 deletions internal/formats/spdx22json/to_format_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,32 @@ func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []m
for _, p := range catalog.Sorted() {
license := spdxhelpers.License(p)
packageSpdxID := model.ElementID(p.ID()).String()

filesAnalyzed := false

// we generate digest for some Java packages
// see page 33 of the spdx specification for 2.2
// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
var checksums []model.Checksum
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if len(javaMetadata.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range javaMetadata.ArchiveDigests {
checksums = append(checksums, model.Checksum{
Algorithm: digest.Algorithm,
ChecksumValue: digest.Value,
})
}
}
}
// note: the license concluded and declared should be the same since we are collecting license information
// from the project data itself (the installed package files).
packages = append(packages, model.Package{
Checksums: checksums,
Description: spdxhelpers.Description(p),
DownloadLocation: spdxhelpers.DownloadLocation(p),
ExternalRefs: spdxhelpers.ExternalRefs(p),
FilesAnalyzed: false,
FilesAnalyzed: filesAnalyzed,
HasFiles: fileIDsForPackage(packageSpdxID, relationships),
Homepage: spdxhelpers.Homepage(p),
// The Declared License is what the authors of a project believe govern the package
Expand Down
21 changes: 20 additions & 1 deletion internal/formats/spdx22tagvalue/to_format_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,24 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// the Comments on License field (section 3.16) is preferred.
license := spdxhelpers.License(p)

filesAnalyzed := false
checksums := make(map[spdx.ChecksumAlgorithm]spdx.Checksum)

// If the pkg type is Java we have attempted to generated a digest
// FilesAnalyzed should be true in this case
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if len(javaMetadata.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range javaMetadata.ArchiveDigests {
checksums[spdx.ChecksumAlgorithm(digest.Algorithm)] = spdx.Checksum{
Algorithm: spdx.ChecksumAlgorithm(digest.Algorithm),
Value: digest.Value,
}
}
}
}

results[spdx.ElementID(id)] = &spdx.Package2_2{

// NOT PART OF SPEC
Expand Down Expand Up @@ -159,7 +177,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2

// Intent: A package can refer to a project, product, artifact, distribution or a component that is
// external to the SPDX document.
FilesAnalyzed: false,
FilesAnalyzed: filesAnalyzed,
// NOT PART OF SPEC: did FilesAnalyzed tag appear?
IsFilesAnalyzedTagPresent: true,

Expand All @@ -180,6 +198,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// to determine if any file in the original package has been changed. If the SPDX file is to be included
// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
// checksum by default.
PackageChecksums: checksums,

// note: based on the purpose above no discovered checksums should be provided, but instead, only
// tool-derived checksums.
Expand Down
23 changes: 16 additions & 7 deletions syft/file/digest_cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,30 +77,39 @@ func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, locatio
}
defer internal.CloseAndLogError(contentReader, location.VirtualPath)

digests, err := DigestsFromFile(contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}

return digests, nil
}

func DigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, error) {
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(i.hashes))
writers := make([]io.Writer, len(i.hashes))
for idx, hashObj := range i.hashes {
hashers := make([]hash.Hash, len(hashes))
writers := make([]io.Writer, len(hashes))
for idx, hashObj := range hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}

size, err := io.Copy(io.MultiWriter(writers...), contentReader)
size, err := io.Copy(io.MultiWriter(writers...), closer)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
return nil, err
}

if size == 0 {
return make([]Digest, 0), nil
}

result := make([]Digest, len(i.hashes))
result := make([]Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = Digest{
Algorithm: DigestAlgorithmName(i.hashes[idx]),
Algorithm: DigestAlgorithmName(hashes[idx]),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}
Expand Down
38 changes: 32 additions & 6 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package java

import (
"crypto"
"fmt"
"io"
"os"
"path"
"strings"

"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
syftFile "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
)
Expand All @@ -34,6 +37,11 @@ var archiveFormatGlobs = []string{
// project that we can build in CI feel free to include it
}

// javaArchiveHashes are all the current hash algorithms used to calculate archive digests
var javaArchiveHashes = []crypto.Hash{
crypto.SHA1,
}

type archiveParser struct {
fileManifest file.ZipFileManifest
virtualPath string
Expand Down Expand Up @@ -101,6 +109,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
}

// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
// NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -135,6 +144,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files
// TODO: do we want to prefer or check for pom files over manifest here?
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
Expand All @@ -157,15 +167,28 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
return nil, nil
}

archiveCloser, err := os.Open(j.archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", j.archivePath, err)
}
defer archiveCloser.Close()

// grab and assign digest for the entire archive
digests, err := syftFile.DigestsFromFile(archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err)
}

return &pkg.Package{
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
Language: pkg.Java,
Type: j.fileInfo.pkgType(),
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: j.virtualPath,
Manifest: manifest,
VirtualPath: j.virtualPath,
Manifest: manifest,
ArchiveDigests: digests,
},
}, nil
}
Expand All @@ -181,12 +204,14 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([

var pkgs []*pkg.Package

properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath)
// pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomPropertiesGlob))
if err != nil {
return nil, err
}

projects, err := pomProjectByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomXMLGlob), j.virtualPath)
// pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomXMLGlob))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -273,7 +298,7 @@ func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener
return nestedPkgs, nestedRelationships, nil
}

func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) {
func pomPropertiesByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProperties, error) {
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
Expand All @@ -298,10 +323,11 @@ func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtua

propertiesByParentPath[path.Dir(filePath)] = *pomProperties
}

return propertiesByParentPath, nil
}

func pomProjectByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProject, error) {
func pomProjectByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProject, error) {
contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
Expand Down
6 changes: 5 additions & 1 deletion syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,11 @@ func TestParseJar(t *testing.T) {
metadata := a.Metadata.(pkg.JavaMetadata)
metadata.Parent = nil

// redact Digest which is computed differently between CI and local
if len(metadata.ArchiveDigests) > 0 {
metadata.ArchiveDigests = nil
}

// ignore select fields (only works for the main section)
for _, field := range test.ignoreExtras {
if metadata.Manifest != nil && metadata.Manifest.Main != nil {
Expand Down Expand Up @@ -567,7 +572,6 @@ func TestParseNestedJar(t *testing.T) {
}
}
}

}
})
}
Expand Down
14 changes: 8 additions & 6 deletions syft/pkg/java_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package pkg
import (
"strings"

"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"

"github.com/anchore/syft/internal"
Expand All @@ -20,12 +21,13 @@ var jenkinsPluginPomPropertiesGroupIDs = []string{

// JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship.
type JavaMetadata struct {
VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
ArchiveDigests []file.Digest `hash:"ignore" json:"digest,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
}

// PomProperties represents the fields of interest extracted from a Java archive's pom.properties file.
Expand Down

0 comments on commit 782b2e3

Please sign in to comment.