Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip detectors for known bad chunks #1741

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"math/big"
"net/url"
"regexp"
"strings"
"unicode"

Expand Down Expand Up @@ -44,6 +45,27 @@ type CustomResultsCleaner interface {
ShouldCleanResultsIrrespectiveOfConfiguration() bool
}

// ConditionalDetector is an optional interface that a detector can implement to
// skip chunks based on specific criteria.
type ConditionalDetector interface {
// ShouldScanChunk determines whether the detector should run.
ShouldScanChunk(chunk sources.Chunk) bool
}

var lockFilePat = regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`)

// Conditions is a set of common conditions to be used by ConditionalDetector.
// (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104)
var Conditions = struct {
// LockFiles are a common source of false-positives.
// https://github.com/trufflesecurity/trufflehog/issues/1460
IsLockFile func(path string) bool
}{
IsLockFile: func(path string) bool {
return lockFilePat.MatchString(path)
},
}

// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Expand Down
10 changes: 10 additions & 0 deletions pkg/detectors/parseur/parseur.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"net/http"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/sources"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -18,6 +20,7 @@ type Scanner struct {

// Ensure the Scanner satisfies the interface at compile time
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.ConditionalDetector = (*Scanner)(nil)

var (
defaultClient = common.SaneHttpClient()
Expand All @@ -32,6 +35,13 @@ func (s Scanner) Keywords() []string {
return []string{"parseur"}
}

func (s Scanner) ShouldScanChunk(chunk sources.Chunk) bool {
if m, ok := sources.NewGitSourceMetadata(chunk.SourceType, chunk.SourceMetadata); ok {
return !detectors.Conditions.IsLockFile(m.File)
}
return true
}

// FromData will find and optionally verify Parseur secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
Expand Down
5 changes: 5 additions & 0 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,11 @@ func (e *Engine) scannerWorker(ctx context.Context) {
}

for _, detector := range matchingDetectors {
if d, ok := detector.Detector.(detectors.ConditionalDetector); ok && !d.ShouldScanChunk(*chunk) {
ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk)
continue
}

decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides)
wgDetect.Add(1)
e.detectableChunksChan <- detectableChunk{
Expand Down
36 changes: 36 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,42 @@ type Chunk struct {
Verify bool
}

// GitSourceMetadata defines a common struct for Git-based source metadata.
type GitSourceMetadata struct {
Repository string
Commit string
File string
Comment on lines +50 to +52
Copy link
Contributor Author

@rgmz rgmz Jan 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a struct is much less satisfying than an interface. >:(

Also, should these be pointers?

}

func NewGitSourceMetadata(source sourcespb.SourceType, data *source_metadatapb.MetaData) (*GitSourceMetadata, bool) {
if data == nil {
return nil, false
}

switch source {
case sourcespb.SourceType_SOURCE_TYPE_GIT:
md := data.GetGit()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_AZURE_REPOS:
md := data.GetAzureRepos()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_BITBUCKET:
md := data.GetBitbucket()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_GERRIT:
md := data.GetGerrit()
return &GitSourceMetadata{md.GetProject(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_GITHUB:
md := data.GetGithub()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_GITLAB:
md := data.GetGitlab()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
default:
return nil, false
}
}

// ChunkingTarget specifies criteria for a targeted chunking process.
// Instead of collecting data indiscriminately, this struct allows the caller
// to specify particular subsets of data they're interested in. This becomes
Expand Down