From 048585dab172c3256147a3f31671ab2d670401cc Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Fri, 1 Sep 2023 00:12:39 -0400 Subject: [PATCH] feat: optional detectors --- pkg/detectors/detectors.go | 18 ++++++++++++++++++ pkg/detectors/parseur/parseur.go | 11 +++++++++++ pkg/engine/engine.go | 7 +++++++ pkg/sources/sources.go | 8 ++++++++ 4 files changed, 44 insertions(+) diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index ba50c9fbc82d6..0618435e91fc8 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -5,6 +5,7 @@ import ( "crypto/rand" "math/big" "net/url" + "regexp" "strings" "unicode" @@ -27,6 +28,23 @@ type Detector interface { Type() detectorspb.DetectorType } +// ConditionalDetector is an optional interface that a detector can implement to +// skip chunks based on specific criteria. +type ConditionalDetector interface { + // ScanChunk determines whether the detector should run. + ScanChunk(chunk sources.Chunk) bool +} + +// FilenameConditions is a set of common conditions to be used by ConditionalDetector. +// (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104) +var FilenameConditions = struct { + // LockFiles are a common source of false-positives. + // https://github.com/trufflesecurity/trufflehog/issues/1460 + LockFiles *regexp.Regexp +}{ + LockFiles: regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`), +} + // Versioner is an optional interface that a detector can implement to // differentiate instances of the same detector type. type Versioner interface { diff --git a/pkg/detectors/parseur/parseur.go b/pkg/detectors/parseur/parseur.go index 4cad1da9dfcdd..adfa2023fa10c 100644 --- a/pkg/detectors/parseur/parseur.go +++ b/pkg/detectors/parseur/parseur.go @@ -7,6 +7,8 @@ import ( "regexp" "strings" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -16,6 +18,7 @@ type Scanner struct{} // Ensure the Scanner satisfies the interface at compile time var _ detectors.Detector = (*Scanner)(nil) +var _ detectors.ConditionalDetector = (*Scanner)(nil) var ( client = common.SaneHttpClient() @@ -30,6 +33,14 @@ func (s Scanner) Keywords() []string { return []string{"parseur"} } +func (s Scanner) ScanChunk(chunk sources.Chunk) bool { + // TODO: Can |chunk.SourceMetadata| be nil? + if m, ok := chunk.SourceMetadata.GetData().(sources.GitSourceMetadata); ok { + return !detectors.FilenameConditions.LockFiles.MatchString(m.GetFile()) + } + return true +} + // FromData will find and optionally verify Parseur secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 38f0da44371a4..e11461b3e238e 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -477,6 +477,13 @@ func (e *Engine) detectorWorker(ctx context.Context) { e.ahoCorasickCore.PopulateMatchingDetectors(string(decoded.Chunk.Data), chunkSpecificDetectors) for k, detector := range chunkSpecificDetectors { + d, ok := detector.(detectors.ConditionalDetector) + if ok && !d.ScanChunk(*chunk) { + ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk) + delete(chunkSpecificDetectors, k) + continue + } + decoded.Chunk.Verify = e.verify wgDetect.Add(1) e.detectableChunksChan <- detectableChunk{ diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index c6ab88f7e912a..a929261dd7410 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -38,6 +38,14 @@ type Chunk struct { Verify bool } +// GitSourceMetadata defines a common interface for Git-based source metadata. +// For example, this should match Git, Azure, Bitbucket, GitHub, and Gitlab. +type GitSourceMetadata interface { + GetRepository() string + GetCommit() string + GetFile() string +} + // ChunkingTarget specifies criteria for a targeted chunking process. // Instead of collecting data indiscriminately, this struct allows the caller // to specify particular subsets of data they're interested in. This becomes