From acc79ebd436c5b4390f861bc6f50d8e9f75f80fe Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Sat, 13 Jan 2024 17:22:38 -0500 Subject: [PATCH] update ConditionalDetector logic --- pkg/detectors/detectors.go | 16 ++++++++----- pkg/detectors/parseur/parseur.go | 7 +++--- pkg/engine/engine.go | 2 +- pkg/sources/sources.go | 40 +++++++++++++++++++++++++++----- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index 0618435e91fc..4f9e40b24718 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -31,18 +31,22 @@ type Detector interface { // ConditionalDetector is an optional interface that a detector can implement to // skip chunks based on specific criteria. type ConditionalDetector interface { - // ScanChunk determines whether the detector should run. - ScanChunk(chunk sources.Chunk) bool + // MatchesChunk determines whether the detector should run. + MatchesChunk(chunk sources.Chunk) bool } -// FilenameConditions is a set of common conditions to be used by ConditionalDetector. +var lockFilePat = regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`) + +// Conditions is a set of common conditions to be used by ConditionalDetector. // (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104) -var FilenameConditions = struct { +var Conditions = struct { // LockFiles are a common source of false-positives. // https://github.com/trufflesecurity/trufflehog/issues/1460 - LockFiles *regexp.Regexp + IsLockFile func(path string) bool }{ - LockFiles: regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`), + IsLockFile: func(path string) bool { + return lockFilePat.MatchString(path) + }, } // Versioner is an optional interface that a detector can implement to diff --git a/pkg/detectors/parseur/parseur.go b/pkg/detectors/parseur/parseur.go index adfa2023fa10..c7453630efd5 100644 --- a/pkg/detectors/parseur/parseur.go +++ b/pkg/detectors/parseur/parseur.go @@ -33,10 +33,9 @@ func (s Scanner) Keywords() []string { return []string{"parseur"} } -func (s Scanner) ScanChunk(chunk sources.Chunk) bool { - // TODO: Can |chunk.SourceMetadata| be nil? - if m, ok := chunk.SourceMetadata.GetData().(sources.GitSourceMetadata); ok { - return !detectors.FilenameConditions.LockFiles.MatchString(m.GetFile()) +func (s Scanner) MatchesChunk(chunk sources.Chunk) bool { + if m, ok := sources.NewGitSourceMetadata(chunk.SourceType, chunk.SourceMetadata); ok { + return !detectors.Conditions.IsLockFile(m.File) } return true } diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index e11461b3e238..ec8dadab4d71 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -478,7 +478,7 @@ func (e *Engine) detectorWorker(ctx context.Context) { for k, detector := range chunkSpecificDetectors { d, ok := detector.(detectors.ConditionalDetector) - if ok && !d.ScanChunk(*chunk) { + if ok && !d.MatchesChunk(*chunk) { ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk) delete(chunkSpecificDetectors, k) continue diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index a929261dd741..149ae3c8093e 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -38,12 +38,40 @@ type Chunk struct { Verify bool } -// GitSourceMetadata defines a common interface for Git-based source metadata. -// For example, this should match Git, Azure, Bitbucket, GitHub, and Gitlab. -type GitSourceMetadata interface { - GetRepository() string - GetCommit() string - GetFile() string +// GitSourceMetadata defines a common struct for Git-based source metadata. +type GitSourceMetadata struct { + Repository string + Commit string + File string +} + +func NewGitSourceMetadata(source sourcespb.SourceType, data *source_metadatapb.MetaData) (*GitSourceMetadata, bool) { + if data == nil { + return nil, false + } + + switch source { + case sourcespb.SourceType_SOURCE_TYPE_GIT: + md := data.GetGit() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_AZURE_REPOS: + md := data.GetAzureRepos() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_BITBUCKET: + md := data.GetBitbucket() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_GERRIT: + md := data.GetGerrit() + return &GitSourceMetadata{md.GetProject(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_GITHUB: + md := data.GetGithub() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_GITLAB: + md := data.GetGitlab() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + default: + return nil, false + } } // ChunkingTarget specifies criteria for a targeted chunking process.