Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add digest property to parent and nested java package metadata #941

Merged
merged 21 commits into from
Apr 8, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ require (
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20220218161850-94dd64e39d7c // indirect
google.golang.org/grpc v1.44.0 // indirect
Expand Down
35 changes: 35 additions & 0 deletions internal/file/digest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package file

import (
"crypto/sha1" //nolint:gosec
"encoding/hex"
"io"
"os"

"golang.org/x/xerrors"

"github.com/anchore/syft/internal/log"
)

const DefaultDigestAlgorithm = "sha1"
spiffcs marked this conversation as resolved.
Show resolved Hide resolved

// Digest takes a filepath and returns a sha1 digest for the given contents
func Digest(filepath string) (digest string, err error) {
file, err := os.Open(filepath)
if err != nil {
return digest, xerrors.Errorf("unable to open file: %s - %w", filepath, err)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
}

h := sha1.New() //nolint:gosec
if _, err := io.Copy(h, file); err != nil {
return digest, xerrors.Errorf("unable to calculate SHA-1 for %s: %w", filepath, err)
}

defer func() {
err := file.Close()
if err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, filepath, err)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
}
}()
return hex.EncodeToString(h.Sum(nil)), nil
}
3 changes: 2 additions & 1 deletion internal/file/opener.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import (

// Opener is an object that stores a path to later be opened as a file.
type Opener struct {
path string
path string
digest string
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
}

// Open the stored path as a io.ReadCloser.
Expand Down
12 changes: 11 additions & 1 deletion internal/file/zip_file_traversal.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths
}

// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
// It also preserves the digest of the discovered zip file on the opener so that data can be used later for metadata construction
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
results := make(map[string]Opener)

Expand All @@ -91,6 +92,12 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close()

// grab and assign digest for the visited zip file
digest, err := Digest(file.FileInfo().Name())
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.Warnf("failed to parse digest for file (%s): %+v", file.Name, err)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
}

zippedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
Expand All @@ -110,7 +117,10 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
}

results[file.Name] = Opener{path: tempFile.Name()}
results[file.Name] = Opener{
path: tempFile.Name(),
digest: digest,
}

return nil
}
Expand Down
26 changes: 25 additions & 1 deletion internal/formats/common/cyclonedxhelpers/external_references.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ import (
"fmt"
"strings"

"github.com/anchore/syft/internal/file"
syftFile "github.com/anchore/syft/syft/file"

"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/syft/syft/pkg"
)

//nolint:funlen
kzantow marked this conversation as resolved.
Show resolved Hide resolved
func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
refs := []cyclonedx.ExternalReference{}
var refs []cyclonedx.ExternalReference
kzantow marked this conversation as resolved.
Show resolved Hide resolved
if hasMetadata(p) {
switch metadata := p.Metadata.(type) {
case pkg.ApkMetadata:
Expand Down Expand Up @@ -46,6 +50,17 @@ func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
Type: cyclonedx.ERTypeWebsite,
})
}
case pkg.JavaMetadata:
if metadata.Digest != nil {
refs = append(refs, cyclonedx.ExternalReference{
URL: "",
Type: cyclonedx.ERTypeBuildMeta,
Hashes: &[]cyclonedx.Hash{{
Algorithm: file.DefaultDigestAlgorithm,
Value: metadata.Digest.Value,
}},
})
}
case pkg.PythonPackageMetadata:
if metadata.DirectURLOrigin != nil && metadata.DirectURLOrigin.URL != "" {
ref := cyclonedx.ExternalReference{
Expand Down Expand Up @@ -79,6 +94,15 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) {
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.GemMetadata:
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.JavaMetadata:
meta.Digest = &syftFile.Digest{}
ref := findExternalRef(c, cyclonedx.ERTypeBuildMeta)
for _, hash := range *ref.Hashes {
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
if hash.Algorithm == file.DefaultDigestAlgorithm {
meta.Digest.Algorithm = file.DefaultDigestAlgorithm
meta.Digest.Value = hash.Value
}
}
case *pkg.PythonPackageMetadata:
if meta.DirectURLOrigin == nil {
meta.DirectURLOrigin = &pkg.PythonDirectURLOriginInfo{}
Expand Down
1 change: 1 addition & 0 deletions internal/formats/common/spdxhelpers/external_refs.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ func ExternalRefs(p pkg.Package) (externalRefs []ExternalRef) {
ReferenceType: PurlExternalRefType,
})
}

return externalRefs
}
4 changes: 1 addition & 3 deletions internal/formats/spdx22json/model/annotation.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package model

import "time"
spiffcs marked this conversation as resolved.
Show resolved Hide resolved

type AnnotationType string

const (
Expand All @@ -12,7 +10,7 @@ const (
type Annotation struct {
// Identify when the comment was made. This is to be specified according to the combined date and time in the
// UTC format, as specified in the ISO 8601 standard.
AnnotationDate time.Time `json:"annotationDate"`
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
AnnotationDate string `json:"annotationDate"`
// Type of the annotation
AnnotationType AnnotationType `json:"annotationType"`
// This field identifies the person, organization or tool that has commented on a file, package, or the entire document.
Expand Down
21 changes: 19 additions & 2 deletions internal/formats/spdx22json/to_format_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/anchore/syft/internal"
internalFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/formats/common/spdxhelpers"
"github.com/anchore/syft/internal/formats/spdx22json/model"
"github.com/anchore/syft/internal/log"
Expand Down Expand Up @@ -55,14 +56,30 @@ func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []m
for _, p := range catalog.Sorted() {
license := spdxhelpers.License(p)
packageSpdxID := model.ElementID(p.ID()).String()

filesAnalyzed := false
checksums := make([]model.Checksum, 0)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved

// we generate digest for some Java packages
// see page 33 of the spdx specification for 2.2
// https://spdx.dev/wp-content/uploads/sites/41/2020/08/SPDX-specification-2-2.pdf
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if javaMetadata.Digest != nil {
filesAnalyzed = true
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
checksums = append(checksums, model.Checksum{
Algorithm: internalFile.DefaultDigestAlgorithm,
ChecksumValue: javaMetadata.Digest.Value,
})
}
}
// note: the license concluded and declared should be the same since we are collecting license information
// from the project data itself (the installed package files).
packages = append(packages, model.Package{
Checksums: checksums,
Description: spdxhelpers.Description(p),
DownloadLocation: spdxhelpers.DownloadLocation(p),
ExternalRefs: spdxhelpers.ExternalRefs(p),
FilesAnalyzed: false,
FilesAnalyzed: filesAnalyzed,
HasFiles: fileIDsForPackage(packageSpdxID, relationships),
Homepage: spdxhelpers.Homepage(p),
// The Declared License is what the authors of a project believe govern the package
Expand Down
19 changes: 18 additions & 1 deletion internal/formats/spdx22tagvalue/to_format_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,22 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// the Comments on License field (section 3.16) is preferred.
license := spdxhelpers.License(p)

filesAnalyzed := false
checksums := make(map[spdx.ChecksumAlgorithm]spdx.Checksum)

// If the pkg type is Java we have attempted to generated a digest
// FilesAnalyzed should be true in this case
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
if javaMetadata.Digest != nil {
filesAnalyzed = true
checksums[spdx.SHA1] = spdx.Checksum{
Algorithm: spdx.SHA1,
Value: javaMetadata.Digest.Value,
}
}
}

results[spdx.ElementID(id)] = &spdx.Package2_2{

// NOT PART OF SPEC
Expand Down Expand Up @@ -159,7 +175,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2

// Intent: A package can refer to a project, product, artifact, distribution or a component that is
// external to the SPDX document.
FilesAnalyzed: false,
FilesAnalyzed: filesAnalyzed,
// NOT PART OF SPEC: did FilesAnalyzed tag appear?
IsFilesAnalyzedTagPresent: true,

Expand All @@ -180,6 +196,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// to determine if any file in the original package has been changed. If the SPDX file is to be included
// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
// checksum by default.
PackageChecksums: checksums,

// note: based on the purpose above no discovered checksums should be provided, but instead, only
// tool-derived checksums.
Expand Down
24 changes: 20 additions & 4 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
syftFile "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
)
Expand Down Expand Up @@ -101,6 +102,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
}

// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
// NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -135,6 +137,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files
// TODO: do we want to prefer or check for pom files over manifest here?
kzantow marked this conversation as resolved.
Show resolved Hide resolved
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
Expand All @@ -157,6 +160,12 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
return nil, nil
}

// grab and assign digest for the entire archive
digest, err := file.Digest(j.archivePath)
if err != nil {
log.Warnf("failed to parse digest for file (%s): %+v", j.archivePath, err)
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
}

return &pkg.Package{
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
Expand All @@ -166,6 +175,10 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
Metadata: pkg.JavaMetadata{
VirtualPath: j.virtualPath,
Manifest: manifest,
Digest: &syftFile.Digest{
Algorithm: file.DefaultDigestAlgorithm,
Value: digest,
},
},
}, nil
}
Expand All @@ -181,12 +194,14 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([

var pkgs []*pkg.Package

properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath)
// pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomPropertiesGlob))
if err != nil {
return nil, err
}

projects, err := pomProjectByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomXMLGlob), j.virtualPath)
// pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomXMLGlob))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -273,7 +288,7 @@ func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener
return nestedPkgs, nestedRelationships, nil
}

func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) {
func pomPropertiesByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProperties, error) {
kzantow marked this conversation as resolved.
Show resolved Hide resolved
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
Expand All @@ -298,10 +313,11 @@ func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtua

propertiesByParentPath[path.Dir(filePath)] = *pomProperties
}

return propertiesByParentPath, nil
}

func pomProjectByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProject, error) {
func pomProjectByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProject, error) {
contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
Expand Down
6 changes: 5 additions & 1 deletion syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,11 @@ func TestParseJar(t *testing.T) {
metadata := a.Metadata.(pkg.JavaMetadata)
metadata.Parent = nil

// redact Digest which is computed differently between CI and local
if metadata.Digest != nil {
metadata.Digest = nil
}
kzantow marked this conversation as resolved.
Show resolved Hide resolved

// ignore select fields (only works for the main section)
for _, field := range test.ignoreExtras {
if metadata.Manifest != nil && metadata.Manifest.Main != nil {
Expand Down Expand Up @@ -567,7 +572,6 @@ func TestParseNestedJar(t *testing.T) {
}
}
}

}
})
}
Expand Down
2 changes: 2 additions & 0 deletions syft/pkg/java_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package pkg
import (
"strings"

"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"

"github.com/anchore/syft/internal"
Expand All @@ -24,6 +25,7 @@ type JavaMetadata struct {
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
Digest *file.Digest `hash:"ignore" json:"digest,omitempty"`
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
}
Expand Down