From 6083cf3d9a7f63056ef1597c76a0802e5a3cf32d Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Sun, 1 Dec 2024 13:27:23 -0500 Subject: [PATCH] feat(npm): overhaul detector --- pkg/common/http.go | 13 + pkg/decoders/escaped_unicode.go | 2 +- pkg/detectors/npm/basic/.gitkeep | 0 pkg/detectors/npm/registry/parse.go | 236 ++++++++ pkg/detectors/npm/registry/parse_test.go | 493 +++++++++++++++++ pkg/detectors/npm/registry/registry.go | 114 ++++ pkg/detectors/npm/registry/registry_test.go | 521 ++++++++++++++++++ pkg/detectors/npm/registry/type.go | 46 ++ pkg/detectors/npm/registry/verify.go | 308 +++++++++++ pkg/detectors/npm/token/detector.go | 142 +++++ pkg/detectors/npm/token/new/new.go | 72 +++ .../token/new/new_integration_test.go} | 4 +- pkg/detectors/npm/token/new/new_test.go | 80 +++ pkg/detectors/npm/token/uuid/uuid.go | 72 +++ .../token/uuid/uuid_integration_test.go} | 4 +- pkg/detectors/npm/token/uuid/uuid_test.go | 127 +++++ pkg/detectors/npm/version.go | 31 ++ pkg/detectors/npmtoken/npmtoken.go | 78 --- pkg/detectors/npmtoken/npmtoken_test.go | 91 --- pkg/detectors/npmtokenv2/npmtokenv2.go | 79 --- pkg/detectors/npmtokenv2/npmtokenv2_test.go | 81 --- pkg/engine/defaults/defaults.go | 8 +- pkg/engine/engine.go | 2 +- 23 files changed, 2265 insertions(+), 339 deletions(-) create mode 100644 pkg/detectors/npm/basic/.gitkeep create mode 100644 pkg/detectors/npm/registry/parse.go create mode 100644 pkg/detectors/npm/registry/parse_test.go create mode 100644 pkg/detectors/npm/registry/registry.go create mode 100644 pkg/detectors/npm/registry/registry_test.go create mode 100644 pkg/detectors/npm/registry/type.go create mode 100644 pkg/detectors/npm/registry/verify.go create mode 100644 pkg/detectors/npm/token/detector.go create mode 100644 pkg/detectors/npm/token/new/new.go rename pkg/detectors/{npmtokenv2/npmtokenv2_integration_test.go => npm/token/new/new_integration_test.go} (99%) create mode 100644 pkg/detectors/npm/token/new/new_test.go create mode 100644 pkg/detectors/npm/token/uuid/uuid.go rename pkg/detectors/{npmtoken/npmtoken_integration_test.go => npm/token/uuid/uuid_integration_test.go} (99%) create mode 100644 pkg/detectors/npm/token/uuid/uuid_test.go create mode 100644 pkg/detectors/npm/version.go delete mode 100644 pkg/detectors/npmtoken/npmtoken.go delete mode 100644 pkg/detectors/npmtoken/npmtoken_test.go delete mode 100644 pkg/detectors/npmtokenv2/npmtokenv2.go delete mode 100644 pkg/detectors/npmtokenv2/npmtokenv2_test.go diff --git a/pkg/common/http.go b/pkg/common/http.go index 782ac1747c7b..b02e38c0e444 100644 --- a/pkg/common/http.go +++ b/pkg/common/http.go @@ -3,6 +3,7 @@ package common import ( "crypto/tls" "crypto/x509" + "errors" "io" "net" "net/http" @@ -10,6 +11,7 @@ import ( "time" "github.com/hashicorp/go-retryablehttp" + "github.com/trufflesecurity/trufflehog/v3/pkg/feature" ) @@ -230,3 +232,14 @@ func SaneHttpClientTimeOut(timeout time.Duration) *http.Client { httpClient.Transport = NewCustomTransport(nil) return httpClient } + +// ErrIsNoSuchHost returns true if |err| is a determinate DNS lookup error. +// https://pkg.go.dev/net#DNSError +// +// WARNING: some well-known hosts, such as github.com, use a TTL of 0. +// It is possible for the lookup to fail intermittently under certain circumstances +// (e.g., local configuration issue or poor network conditions). +func ErrIsNoSuchHost(err error) bool { + var dnsErr *net.DNSError + return errors.As(err, &dnsErr) && dnsErr.IsNotFound +} diff --git a/pkg/decoders/escaped_unicode.go b/pkg/decoders/escaped_unicode.go index 31899ba1d7d2..02e92b603f4f 100644 --- a/pkg/decoders/escaped_unicode.go +++ b/pkg/decoders/escaped_unicode.go @@ -18,7 +18,7 @@ var _ Decoder = (*EscapedUnicode)(nil) // https://dencode.com/en/string/unicode-escape var ( // Standard Unicode notation. - //https://unicode.org/standard/principles.html + // https://unicode.org/standard/principles.html codePointPat = regexp.MustCompile(`\bU\+([a-fA-F0-9]{4}).?`) // Common escape sequence used in programming languages. diff --git a/pkg/detectors/npm/basic/.gitkeep b/pkg/detectors/npm/basic/.gitkeep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/pkg/detectors/npm/registry/parse.go b/pkg/detectors/npm/registry/parse.go new file mode 100644 index 000000000000..06a3b52785e4 --- /dev/null +++ b/pkg/detectors/npm/registry/parse.go @@ -0,0 +1,236 @@ +package registry + +import ( + "fmt" + "strings" + + regexp "github.com/wasilibs/go-re2" +) + +var ( + domainPat = `(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}` // this doesn't match single segment hosts (e.g., localhost) + ipV4Pat = `(?:[0-9]{1,3}\.){3}[0-9]{1,3}` // overly permissive but should be fine in context + hostPat = fmt.Sprintf(`(?:%s|%s)(?::\d{1,5})?`, domainPat, ipV4Pat) + + knownRegistryPat = func() *regexp.Regexp { + var sb strings.Builder + sb.WriteString(`(?i)((?:https?:)?//)(?:`) + // `registry.yarnpkg.com` is a reverse-proxy (https://github.com/yarnpkg/yarn/issues/889) + // `registry.npmmirror.com` and `registry.npm.taobao.org` are mirrors (https://stackoverflow.com/a/73147820) + sb.WriteString(`(registry\.(?:npmjs\.(?:com|eu|org(?:\.au)?)|npmmirror\.com|npm\.taobao\.org|yarnpkg\.com))`) + artifactoryPath := `/(?:artifactory|[a-z0-9._-]+)/api/npm/[a-z][a-z0-9._-]+` + artifactoryOldPath := `/(?:artifactory|[a-z0-9._-]+)/v\d\.\d/artifacts/[a-z][a-z0-9._-]+` // appears to be a path from older versions. + sb.WriteString(`|([a-z0-9]+(?:[a-z0-9-]+[a-z0-9])?\.jfrog\.io` + artifactoryPath + `)`) // cloud + sb.WriteString(fmt.Sprintf(`|(%s(?:%s|%s))`, hostPat, artifactoryPath, artifactoryOldPath)) // hosted + // https://help.sonatype.com/repomanager2/node-packaged-modules-and-npm-registries + sb.WriteString(`|(` + hostPat + `/nexus/content/(?:groups|repositories)/[a-z0-9-][a-z0-9._-]+)`) + // https://help.sonatype.com/repomanager3/nexus-repository-administration/formats/npm-registry/configuring-npm + // TODO: Handle non-standard subdirectories like `example.com/artifacts/repository/npm-public` + sb.WriteString(`|(` + hostPat + `/(?:nexus/)?repository/[a-z0-9-][a-z0-9._-]+)`) + // https://docs.gitlab.com/ee/user/packages/npm_registry/ + sb.WriteString(`|(` + hostPat + `/api/v4/(?:groups/\d+/-/|projects/\d+/)?packages/npm)`) + // https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-npm-registry + sb.WriteString(`|(npm\.pkg\.github\.com)`) + // https://learn.microsoft.com/en-us/azure/devops/artifacts/get-started-npm?view=azure-devops&tabs=Windows + // https://stackoverflow.com/a/73495381 + azurePat := `pkgs\.dev\.azure\.com/[a-z0-9._-]+(?:/[a-z0-9._-]+)?` + vsPat := `[a-z0-9-]+\.pkgs\.visualstudio\.com(?:/[a-z0-9._-]+)?` + sb.WriteString(fmt.Sprintf(`|((?:%s|%s)/_packaging/[a-z0-9._-]+/npm(?:/registry)?)`, azurePat, vsPat)) + // https://www.jetbrains.com/help/space/using-an-npm-registry-with-npmjs-com.html + sb.WriteString(`|(npm\.pkg\.jetbrains\.space/[a-z0-9][a-z0-9-]{0,61}[a-z0-9]/p/[a-z0-9][a-z0-9-]*[a-z0-9]/[a-z0-9][a-z0-9-]+[a-z0-9])`) + // Only contain letters, numbers, and hypthens. + // Must begin and end with letter or number + sb.WriteString(`|((?:[a-z0-9][a-z0-9-]*[a-z0-9]-)?npm\.pkg\.dev/[a-z0-9-]+/[a-z0-9._-]+)`) + sb.WriteString(`|(npm(?:-proxy)?\.fury\.io/[a-z0-9._-]+)`) + sb.WriteString(`|([a-z0-9-]+\.d\.codeartifact\.[a-z-]+[^-]-\d\.amazonaws\.com/npm/[a-z0-9_-]+)`) + sb.WriteString(`)`) + + return regexp.MustCompile(sb.String()) + }() + + genericRegistryPat = func() *regexp.Regexp { + urlPat := fmt.Sprintf(`%s(?:/[a-z0-9._-]+)*`, hostPat) + + var sb strings.Builder + sb.WriteString(`(?i)`) + // .npmrc + // //(npm.example.com)/:_authToken=... + // TODO: match based on prefix or suffix? + // sb.WriteString(fmt.Sprintf(`(?:^|['"\x60 \t;#-])(//%s)(?:/?:_auth|_password|user)?`, urlPat)) + sb.WriteString(fmt.Sprintf(`(?:^|['"\x60 \t;#-])(//%s)(?:/?:_auth|_password|user)?`, urlPat)) + // registry=https://npm.example.com/ or @scope:registry=https://npm.example.com/ + sb.WriteString(fmt.Sprintf(`|registry.{1,50}?['"]?(https?://%s)/?['"]?`, urlPat)) + // @scope=https://npm.example.com/ (rare) + sb.WriteString(fmt.Sprintf(`|@[a-z0-9\-_]{1,50}['"]?[ \t]*(?:=[ \t]*)?['"]?(https?://%s)/?['"]?`, urlPat)) + // .yarnrc.toml + sb.WriteString(fmt.Sprintf(`|npmRegistryServer['"]?[ \t]*:[ \t]*['"]?(https?://%s)/?(?:['"]|\s)`, urlPat)) + sb.WriteString(fmt.Sprintf(`|npmRegistries['"]?[ \t]*:(?:.|\s){0,50}((?:https?:)?//%s)/?['"]?:`, urlPat)) + // .upmconfig.toml + sb.WriteString(fmt.Sprintf(`|\[npmAuth\.['"](https?://%s)/?['"]\]`, urlPat)) + + pat := regexp.MustCompile(sb.String()) + // fmt.Println(pat.String()) + // Sanity check to make sure the pattern doesn't contain a mistake. + if pat.NumSubexp() != 6 { + panic(fmt.Sprintf("Pattern |genericRegistryPat| should have 6 capture groups but has %d", pat.NumSubexp())) + } + return pat + }() + + // Common false-positives that can be safely ignored. + invalidRegistryPat = func() *regexp.Regexp { + var sb strings.Builder + sb.WriteString("(?i)(") + sb.WriteString(`contoso\.pkgs\.visualstudio\.com/?`) + sb.WriteString(`|registry\.(blah\.(com|edu|eu|foo|org)|foo(bar)?\.(bar|cc|com|eu)|last\.thing|myorg\.com)/?`) + // sb.WriteString(`|r(egistry\.(npmmirror\.com|npm\.taobao\.org)|\.cnpmjs\.org)/?`) + sb.WriteString(`|\.terraform\.io/?`) + sb.WriteString(`|\.?lvh.me/?`) + sb.WriteString(`|\.?example\.(com|org)/?`) + sb.WriteString(`|my[.-]registry\.com/?`) + sb.WriteString(`|some\.(host|(other\.)?registry)/?`) + sb.WriteString(`|npm\.im/?`) + sb.WriteString(`|npm\.mycustomregistry\.com/?`) + sb.WriteString(`|(www\.npmjs\.com|browsenpm\.org)/?`) + sb.WriteString(`|travis-ci\.org/`) + sb.WriteString(`|(api|developer|help|www)\.github\.com/`) + sb.WriteString(")") + return regexp.MustCompile(sb.String()) + }() +) + +// parseKnownRegistryURI +func parseKnownRegistryURI(data string, registryUri string) *Info { + matches := knownRegistryPat.FindStringSubmatch(registryUri) + if len(matches) == 0 { + return nil + } + + // Skip the first two indices: 1 is the entire string, 2 is the protocol. + index, uri := firstNonEmptyMatch(matches, 2) + info := &Info{ + Type: Type(index - 1), + } + info.Scheme, info.Uri = parseRegistryURLScheme(data, uri) + + // Ensure that things like "registry.yarnpkg.org" get substituted with the proper npm URL. + if info.Type == npm && info.Uri != defaultInfo.Uri { + info.Uri = defaultInfo.Uri + } + + if info.Uri == "" { + fmt.Printf("[k] input: '%s', parsed='%s', info='%s'\n", registryUri, uri, info.Uri) + } + + // Normalize the URI. + if info.Type == npm && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } else if info.Type == artifactoryCloud && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } else if info.Type == githubCloud && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } else if info.Type == azure { + if info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } + if !strings.HasSuffix(strings.ToLower(info.Uri), "/registry") { + info.Uri = info.Uri + "/registry" + } + } else if info.Type == jetbrains && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } else if info.Type == googleArtifactRegistry && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } else if info.Type == gemfury && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } else if info.Type == awsCodeArtifact && info.Scheme != HttpsScheme { + info.Scheme = HttpsScheme + } + + // Ignore known false-positives. + if invalidRegistryPat.MatchString(uri) { + return nil + } + + return info +} + +// parseUnknownRegistryURI +func parseUnknownRegistryURI(data string, registryUri string) *Info { + scheme, uri := parseRegistryURLScheme(data, registryUri) + info := &Info{ + Type: other, + Scheme: scheme, + Uri: uri, + } + + if info.Uri == "" { + fmt.Printf("[uk] input: '%s', parsed='%s'\n", registryUri, uri) + } + // Ignore known false-positives. + if invalidRegistryPat.MatchString(uri) { + return nil + } + + return info +} + +// parseRegistryURLScheme attempts to find the Scheme of the provided |uri|. +// If |uri| does not have a scheme, it looks for context in the |data| chunk. +func parseRegistryURLScheme(data string, uri string) (Scheme, string) { + var ( + scheme = UnknownScheme + uriWithoutScheme string + ) + // If the match starts with "http" or "https", we can be confident about the scheme. + // Otherwise, it is UnknownScheme. + u := strings.ToLower(uri) // for case-insensitive comparison. Might not be the best way. + if strings.HasPrefix(u, "https://") { + scheme = HttpsScheme + uriWithoutScheme = uri[8:] + } else if strings.HasPrefix(u, "http://") { + scheme = HttpScheme + uriWithoutScheme = uri[7:] + } else if strings.HasPrefix(u, "//") { + uriWithoutScheme = uri[2:] + } else { + uriWithoutScheme = uri + } + + // If the Scheme is UnknownScheme, look for other instances of the Uri that might have the scheme. + // + // scheme -> registry=https://example.com/repository/npm-proxy/ + // no scheme -> //example.com/repository/npm-proxy/:_authToken=123456 + if scheme == UnknownScheme { + var ( + uriPat = regexp.MustCompile(`(?i)(https?)://` + uriWithoutScheme) + schemes = make(map[string]struct{}) + ) + for _, m := range uriPat.FindAllStringSubmatch(data, -1) { + schemes[strings.ToLower(m[1])] = struct{}{} + } + // Decisively HTTP or HTTPS; nothing or both is equally useless. + if len(schemes) == 1 { + if _, ok := schemes["https"]; ok { + scheme = HttpsScheme + } else { + scheme = HttpScheme + } + } + } + return scheme, uriWithoutScheme +} + +// firstNonEmptyMatch returns the index and value of the first non-empty match. +// If no non-empty match is found, it will return: 0, "". +func firstNonEmptyMatch(matches []string, skip int) (int, string) { + if len(matches) < skip { + return 0, "" + } + // The first index is the entire matched string. + for i, val := range matches[skip:] { + if val != "" { + return i + skip, val + } + } + return 0, "" +} diff --git a/pkg/detectors/npm/registry/parse_test.go b/pkg/detectors/npm/registry/parse_test.go new file mode 100644 index 000000000000..e30ef7917649 --- /dev/null +++ b/pkg/detectors/npm/registry/parse_test.go @@ -0,0 +1,493 @@ +package registry + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + regexp "github.com/wasilibs/go-re2" +) + +func TestHostPat(t *testing.T) { + tests := map[string]struct { + input string + expected string + }{ + // Valid - Domain + "domain - default registry": { + input: `registry.npmjs.org`, + expected: "registry.npmjs.org", + }, + "domain - dashes": { + input: `nexus.pas-mini.io`, + expected: "nexus.pas-mini.io", + }, + "domain - with port": { + input: `nexus3.my-company.tk:8081`, + expected: "nexus3.my-company.tk:8081", + }, + // Valid - IPv4 + "": { + input: `30.125.69.246`, + expected: "30.125.69.246", + }, + "ip - with port": { + input: `10.10.69.203:8081`, + expected: "10.10.69.203:8081", + }, + + // Invalid + "invalid - localhost with port": { + input: "localhost:8080", + }, + } + + p := regexp.MustCompile(hostPat) + for name, test := range tests { + t.Run(name, func(t *testing.T) { + actual := p.FindString(test.input) + if actual == "" { + if test.expected != "" { + t.Errorf("expecting %s but got nothing", test.expected) + return + } + return + } + + assert.Equal(t, test.expected, actual) + }) + } +} + +func TestNpm_KnownRegistryPat(t *testing.T) { + cases := map[Type]map[string][]string{ + npm: { + "//registry.npmjs.org/": {"//", "registry.npmjs.org"}, + "https://registry.npmjs.org/": {"https://", "registry.npmjs.org"}, + ` resolved "https://registry.yarnpkg.com/abstract-logging/-/abstract-logging-2.0.1.tgz#6b0c371df212db7129b57d2e7fcf282b8bf1c839"`: {"https://", "registry.yarnpkg.com"}, + }, + artifactoryHosted: { + "https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm/": {"https://", "artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm"}, + "registry=http://10.85.59.116/artifactory/v1.0/artifacts/npm/": {"http://", "10.85.59.116/artifactory/v1.0/artifacts/npm"}, + }, + artifactoryCloud: { + "//voomp.jfrog.io/artifactory/api/npm/vk-common-bk/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d": {"//", "voomp.jfrog.io/artifactory/api/npm/vk-common-bk"}, + "//trfhog.jfrog.io/trfhog/api/npm/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d": {"//", "trfhog.jfrog.io/geckorobotics/api/npm/npm"}, + }, + nexusRepo2: { + "http://nexus.zenoss.eng:8081/nexus/content/repositories/npm/": {"http://", "nexus.zenoss.eng:8081/nexus/content/repositories/npm"}, + "http://nexus.pas-mini.io/nexus/content/repositories/npm-private/": {"http://", "nexus.pas-mini.io/nexus/content/repositories/npm-private"}, + }, + nexusRepo3: { + "registry=http://30.125.69.246/repository/npm-group/": {"http://", "30.125.69.246/repository/npm-group"}, + "https://repo.huaweicloud.com/repository/npm/": {"https://", "repo.huaweicloud.com/repository/npm"}, + "http://artifacts.lan.tribe56.com:8081/repository/npm-proxy/@babel/": {"http://", "artifacts.lan.tribe56.com:8081/repository/npm-proxy"}, + "http://10.10.69.203:8081/repository/npm-group/": {"http://", "10.10.69.203:8081/repository/npm-group"}, + "//nexus.public.prd.int.corp-devops.co.uk/repository/moon/": {"//", "nexus.public.prd.int.corp-devops.co.uk/repository/moon"}, + "//ec2-18-225-132-112.us-east-2.compute.amazonaws.com:8081/repository/postboard-server/": {"//", "ec2-18-225-132-112.us-east-2.compute.amazonaws.com:8081/repository/postboard-server"}, + `- name: NPM_PUBLISH_URL + description: "Maven repository url to where jenkins will upload releases artifacts" + required: true + value: "http://nexus3.my-company.tk:8081/repository/npm-releases/" +- name: NPM_PUBLISH_TOKEN + description: "Npm user used when upload artifacts" + required: true + value: "NpmToken.b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb"`: {"http://", "nexus3.my-company.tk:8081/repository/npm-releases"}, + `
1
2
3
[root@nexus3 ~]# cat ~/.npmrc
registry=http://registry.blog.co/repository/npm-group/
//registry.blog.co/repository/npm-group/:_authToken=NpmToken.72b83be3-4b24-3dd1-850f-056cd78bb513
`: {"http://", "registry.blog.co/repository/npm-group"}, + }, + gitlab: { + `"https://gitlab.matrix.org/api/v4/projects/27/packages/npm/@matrix-org/olm/-/@matrix-org/olm-3.2.3.tgz",`: {"https://", "gitlab.matrix.org/api/v4/projects/27/packages/npm"}, + "https://gitlab.com/api/v4/groups/123456/-/packages/npm/": {"https://", "gitlab.com/api/v4/groups/123456/-/packages/npm"}, // couldn't find a real example of this + }, + githubCloud: { + "https://npm.pkg.github.com/": {"https://", "npm.pkg.github.com"}, + "https://npm.pkg.github.com/company": {"https://", "npm.pkg.github.com"}, + }, + azure: { + "//pkgs.dev.azure.com/company/_packaging/feed/npm/": {"//", "pkgs.dev.azure.com/company/_packaging/feed/npm"}, + "https://pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry/": {"https://", "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry"}, + "https://pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry": {"https://", "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry"}, + "//pkgs.dev.azure.com/company/b675ba30-3f64-41c8-b35d-79c162dc3fd7/_packaging/feed/npm/": {"//", "pkgs.dev.azure.com/company/b675ba30-3f64-41c8-b35d-79c162dc3fd7/_packaging/feed/npm"}, + "//tso-na.pkgs.visualstudio.com/7bc545d8-bf9c-477e-bb91-17a982c30c2e/_packaging/feed/npm/registry/": {"//", "ftso-na.pkgs.visualstudio.com/7bc545d8-bf9c-477e-bb91-17a982c30c2e/_packaging/feed/npm/registry"}, + "//company.pkgs.visualstudio.com/project/_packaging/feed/npm/registry/": {"//", "company.pkgs.visualstudio.com/project/_packaging/feed/npm/registry"}, + "//company.pkgs.visualstudio.com/_packaging/feed/npm/registry/:username=bart": {"//", "company.pkgs.visualstudio.com/_packaging/feed/npm/registry"}, + }, + jetbrains: { + "//npm.pkg.jetbrains.space/multiplier/p/multiplier/npm/": {"//", "npm.pkg.jetbrains.space/multiplier/p/multiplier/npm"}, + "https://npm.pkg.jetbrains.space/dwan/p/main/npmempty/": {"https://", "npm.pkg.jetbrains.space/dwan/p/main/npmempty"}, + "https://npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev/": {"https://", "npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev"}, + }, + googleArtifactRegistry: { + "https://us-west1-npm.pkg.dev/company/project": {"https://", "us-west1-npm.pkg.dev/company/project"}, + "https://npm.pkg.dev/company/project": {"https://", "npm.pkg.dev/company/project"}, + "//europe-west4-npm.pkg.dev/corp-staging/corp-libs/:username=oauth2accesstoken": {"//", "europe-west4-npm.pkg.dev/corp-staging/corp-libs"}, + }, + gemfury: { + "//npm.fury.io/dependabot/": {"//", "npm.fury.io/dependabot"}, + }, + } + for group, inputs := range cases { + t.Run(group.String(), func(t *testing.T) { + for input, expected := range inputs { + matches := knownRegistryPat.FindStringSubmatch(input) + if len(matches) == 0 { + t.Errorf("no result for %s", input) + return + } + + index, uri := firstNonEmptyMatch(matches, 2) + rType := Type(index - 1) + if rType != group { + t.Errorf("expected type %s, got %s (%s)", group.String(), rType.String(), input) + } + if matches[1] != expected[0] { + t.Errorf("expected prefix %s, got %s (%s)", expected[0], matches[1], input) + } + if uri != expected[1] { + t.Errorf("expected uri %s, got %s (%s)", expected[1], uri, input) + } + } + }) + } +} + +func TestNpm_GenericRegistryPat(t *testing.T) { + cases := map[string]string{ + // .npmrc + "registry = https://npm.company.de:4873/": "https://npm.company.de:4873", + "registry=https://registry.npm.taobao.org/": "https://registry.npm.taobao.org", + `"registry" "https://registry.npmmirror.com/"`: "https://registry.npmmirror.com", + `@company:registry="https://npm.company.io"`: "https://npm.company.io", + "@marketwall:registry=http://10.0.0.13:4873": "http://10.0.0.13:4873", + `"@fortawesome:registry" "https://npm.fontawesome.com/"`: "https://npm.fontawesome.com", + "@example=https://api.bintray.example/npm/mycompany/myregistry": "https://api.bintray.example/npm/mycompany/myregistry", + `"@example" "https://api.bintray.example/npm/mycompany/myregistry"`: "https://api.bintray.example/npm/mycompany/myregistry", + "//npm.company.com/:_authToken='fake123'": "//npm.company.com", + "//registry-node.company.com/org/1123600651823311/registry/supermap/:_password=123fake": "//registry-node.company.com/org/1123600651823311/registry/supermap", + `"//npm.fontawesome.com/:_authToken" "XXXXXXX-my-token"`: "//npm.fontawesome.com", + `"//npm.fontawesome.com:_authToken" "XXXXXXX-my-token"`: "//npm.fontawesome.com", + `registry=http://55825a54e4454.registry.net:8443/`: "http://55825a54e4454.registry.net:8443", + // yarnrc.yml + `npmScopes: + "my-company": + npmAlwaysAuth: true + npmAuthToken: xxx-xxx + npmRegistryServer: "https://repo.company.org/npm"`: "https://repo.company.org/npm", + ` await fixture.exec("yarn config set npmRegistryServer http://npm.corp.xyz:8080");`: "http://npm.corp.xyz:8080", + `yarn config set npmScopes --json '{ "storybook": { "npmRegistryServer": "http://repo.company.org:6001/" } }'`: "http://repo.company.org:6001", + `yarn config set npmScopes.my-org.npmRegistryServer "https://repo.company.org/npm/nested"`: "https://repo.company.org/npm/nested", + ` npmScopes: + company: + npmRegistryServer: '${METAMASK_NPM_REGISTRY:-https://your.company.com/private/registry}'`: "https://your.company.com/private/registry", + // upmconfig.toml + `[npmAuth."https://api.bintray.com/npm/joe-company/my-registry"]`: "https://api.bintray.com/npm/joe-company/my-registry", + `echo "[npmAuth.'https://your.company.com/private/registry/']" >> ~/.upmconfig.toml`: "https://your.company.com/private/registry", + + // Invalid + `# token-substitute + +[![Build Status](https://travis-ci.org/trustpilot/node-token-substitute.svg?branch=master)](https://travis-ci.org/trustpilot/node-token-substitute) [![npm](https://img.shields.io/npm/v/token-substitute.svg)](https://www.npmjs.com/package/token-substitute)`: "", + } + for input, expected := range cases { + if knownRegistryPat.MatchString(input) { + t.Errorf("matches |knownRegistryPat|: %s", input) + continue + } + + matches := genericRegistryPat.FindStringSubmatch(input) + if len(matches) == 0 && expected != "" { + t.Errorf("received no matches for '%s'\n", input) + continue + } else if len(matches) > 0 && expected == "" { + t.Errorf("match not expected for '%s'\n", input) + continue + } + + _, match := firstNonEmptyMatch(matches, 1) + if match != expected { + t.Errorf("expected '%s', got '%s'\n\t(%s)", expected, match, input) + } + } +} + +func TestNpm_InvalidRegistryPat(t *testing.T) { + cases := []string{ + // npm + // https://github.com/npm/arborist/blob/6bc6c76b4ff156979509bc26a3c50020f69c8c0f/README.md?plain=1#L25 + " '@foo:registry': 'https://registry.foo.com/',", + // short-links + "This library can be installed from the\n[npm registry](https://npm.im/express-rate-limit), or from", + "(https://www.npmjs.com/package/token-substitute", + "www.npmjs.com", + + // NpmMirror.com (read-only mirror) + // "registry=https://registry.npmmirror.com", + // "registry=http://r.cnpmjs.org/", + // " npm i --registry=https://registry.npm.taobao.org", + + // Bun + // https://github.com/oven-sh/bun/blob/693a00dc5b99ad3eefd1d50bfbe3a11ee625a291/docs/install/registries.md?plain=1#L22 + "\"@myorg3\" = { token = \"$npm_token\", url = \"https://registry.myorg.com/\" }", + + // Lerna + // https://github.com/lerna/lerna/blob/3d747a176f632d6e1186e24c216527031c1744e6/e2e/create/src/create.spec.ts#L1047C13-L1049C15 + "\"publishConfig\": {\n \"registry\": \"my-registry.com\"\n },", + + // Renovate + // https://github.com/renovatebot/renovate/blob/b8d06fd3e007027064cfb5e93d0f14dcb7fead4d/lib/modules/datasource/npm/index.spec.ts#L255 + " const npmrc = 'registry=https://npm.mycustomregistry.com/';", + + // Terraform + "registry.terraform.io/providers/hashicorp/google/4.69.1/docs/resources/monitoring_notification_channel", + "www.terraform.io/docs/providers/google-beta/r/google_monitoring_notification_channel", + + // Test values from a common npm dependency. + // https://github.com/rexxars/registry-auth-token/blob/main/test/auth-token.test.js + // "registry=http://registry.npmjs.eu/", + "registry=http://registry.foobar.cc/", + "//registry.foobar.com/:username=foobar", + "registry=http://registry.foobar.eu", + "registry=http://registry.foo.bar", + "registry=http://some.host/registry/deep/path", + "//registry.blah.foo:_authToken=whatev", + "//registry.last.thing:_authToken=yep", + "//registry.blah.com/foo:_authToken=whatev", + "//registry.blah.org/foo/bar:_authToken=recurseExactlyOneLevel", + "//registry.blah.edu/foo/bar/baz:_authToken=recurseNoLevel", + "//registry.blah.eu:_authToken=yep", + "//contoso.pkgs.visualstudio.com/_packaging/MyFeed/npm/:_authToken=heider", + // "registry=http://localhost:8770/", + "travis-ci.org/rexxars/registry-auth-token", + + // other common examples + " npm config set registry http://npm.example.com/", + "npm install express --registry http://my.registry.com/ --verbose", + "'//some.other.registry/:_authToken", + "'//some.registry/:_authToken", + "//acme.example.org/:_authToken=TOKEN_FOR_ACME", + "browsenpm.org/package/registry-auth-token", + "https://developer.github.com/v3/media", + "https://api.github.com/orgs/octocat/hooks/1/deliveries", + "https://help.github.com/en/articles/virtual-environments-for-github-actions", + } + for _, input := range cases { + if !invalidRegistryPat.MatchString(input) { + t.Errorf("received match for '%s'\n", input) + } + } +} + +func TestNpm_ParseKnownRegistryUri(t *testing.T) { + cases := map[Type]struct { + data string + uri string + expected *Info + }{ + other: { + data: `//npm.fontawesome.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "npm.fontawesome.com", + expected: nil, + }, + npm: { + data: `//registry.yarnpk.org/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//registry.yarnpk.org", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "registry.npmjs.org", + }, + }, + artifactoryCloud: { + data: `//company.jfrog.io/company/api/npm/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//company.jfrog.io/company/api/npm/npm", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "company.jfrog.io/company/api/npm/npm", + }, + }, + artifactoryHosted: { + data: "registry=http://artifactory.internal-dev.company.net/artifactory/api/npm/npm/\n//artifactory.internal-dev.company.net/artifactory/api/npm/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//artifactory.internal-dev.company.net/artifactory/api/npm/npm", + expected: &Info{ + Scheme: HttpScheme, + Uri: "artifactory.internal-dev.company.net/artifactory/api/npm/npm", + }, + }, + nexusRepo2: { + data: "registry=http://nexus.corp.org/nexus/content/repositories/npm-group/\n//nexus.corp.org/nexus/content/repositories/npm-group/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//nexus.corp.org/nexus/content/repositories/npm-group", + expected: &Info{ + Scheme: HttpScheme, + Uri: "nexus.corp.org/nexus/content/repositories/npm-group", + }, + }, + nexusRepo3: { + data: "registry=https://nexus.corp.org/repository/npm-hosted/\n//nexus.corp.org/repository/npm-hosted/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//nexus.corp.org/repository/npm-hosted", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "nexus.corp.org/repository/npm-hosted", + }, + }, + gitlab: { + data: "@company:registry=https://gitlab.com/api/v4/projects/12354452/packages/npm/\n//gitlab.com/api/v4/projects/12354452/packages/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//gitlab.com/api/v4/projects/12354452/packages/npm", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "gitlab.com/api/v4/projects/12354452/packages/npm", + }, + }, + githubCloud: { + data: `//npm.pkg.github.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//npm.pkg.github.com", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "npm.pkg.github.com", + }, + }, + azure: { + data: `//pkgs.dev.azure.com/company/project/_packaging/feed/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//pkgs.dev.azure.com/company/project/_packaging/feed/npm", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry", + }, + }, + jetbrains: { + data: `//npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev", + }, + }, + googleArtifactRegistry: { + data: `//us-east1-npm.pkg.dev/company-dev-167118/project/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//us-east1-npm.pkg.dev/company-dev-167118/project", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "us-east1-npm.pkg.dev/company-dev-167118/project", + }, + }, + gemfury: { + data: `//npm-proxy.fury.io/user/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//npm-proxy.fury.io/user", + expected: &Info{ + Scheme: HttpsScheme, + Uri: "npm-proxy.fury.io/user", + }, + }, + } + + for group, c := range cases { + t.Run(group.String(), func(t *testing.T) { + actual := parseKnownRegistryURI(c.data, c.uri) + if actual == nil { + if c.expected != nil { + t.Errorf("no result for %s", c.data) + } + return + } + + c.expected.Type = group + if diff := cmp.Diff(c.expected, actual); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } +} + +func TestNpm_ParseUnknownRegistryUri(t *testing.T) { + // Not exhaustive, parseUnknownRegistryURI doesn't do much. + cases := []struct { + data string + uri string + expected *Info + }{ + { + data: `//npm.fontawesome.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "npm.fontawesome.com", + expected: &Info{ + Type: other, + Scheme: UnknownScheme, + Uri: "npm.fontawesome.com", + }, + }, + { + data: "@fortawesome:registry=https://npm.fontawesome.com\n//npm.fontawesome.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "npm.fontawesome.com", + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.fontawesome.com", + }, + }, + } + + for _, c := range cases { + actual := parseUnknownRegistryURI(c.data, c.uri) + if actual == nil { + t.Errorf("no result for %s", c.data) + continue + } + + if diff := cmp.Diff(c.expected, actual); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + } +} + +func TestNpm_ParseRegistryURLScheme(t *testing.T) { + cases := []struct { + data string + uri string + expectedScheme Scheme + expectedUri string + }{ + { + data: `registry=HTTPS://NPM.EXAMPLE.COM`, + uri: "HTTPS://NPM.EXAMPLE.COM", + expectedScheme: HttpsScheme, + expectedUri: "NPM.EXAMPLE.COM", + }, + { + data: `registry=http://npm.example.com/`, + uri: "http://npm.example.com", + expectedScheme: HttpScheme, + expectedUri: "npm.example.com", + }, + { + data: `//repo.example.com/project/npm/:_authToken=abc123`, + uri: "repo.example.com/project/npm", + expectedScheme: UnknownScheme, + expectedUri: "repo.example.com/project/npm", + }, + { + data: `repo.example.com/project/npm`, + uri: "repo.example.com/project/npm", + expectedScheme: UnknownScheme, + expectedUri: "repo.example.com/project/npm", + }, + { + data: "registry=httpS://repo.example.com/project/npm\n//repo.example.com/project/npm/:_authToken=abc123", + uri: "repo.example.com/project/npm", + expectedScheme: HttpsScheme, + expectedUri: "repo.example.com/project/npm", + }, + { + data: "registry=htTp://repo.example.com/project/npm\n//repo.example.com/project/npm/:_authToken=abc123", + uri: "repo.example.com/project/npm", + expectedScheme: HttpScheme, + expectedUri: "repo.example.com/project/npm", + }, + } + + for _, c := range cases { + actualScheme, actualUri := parseRegistryURLScheme(c.data, c.uri) + if actualScheme != c.expectedScheme { + t.Errorf("Scheme: expected=%s, actual=%s", c.expectedScheme, actualScheme) + } + if actualUri != c.expectedUri { + t.Errorf("uri: expected=%s, actual=%s", c.expectedUri, actualUri) + } + } +} diff --git a/pkg/detectors/npm/registry/registry.go b/pkg/detectors/npm/registry/registry.go new file mode 100644 index 000000000000..1db27675edd5 --- /dev/null +++ b/pkg/detectors/npm/registry/registry.go @@ -0,0 +1,114 @@ +package registry + +import ( + "fmt" + "regexp" + + "github.com/trufflesecurity/trufflehog/v3/pkg/context" +) + +type Info struct { + Type Type + Scheme Scheme + Uri string +} + +// The Scheme of the registry URL. +type Scheme int + +const ( + UnknownScheme Scheme = iota + HttpScheme + HttpsScheme +) + +// String returns the HTTP prefix that corresponds to the enum: "", "http://", and "https://" respectively. +func (scheme Scheme) String() string { + return [...]string{ + "", + "http://", + "https://", + }[scheme] +} + +var defaultInfo = &Info{ + Type: npm, + Scheme: HttpsScheme, + Uri: "registry.npmjs.org", +} + +// FindTokenURL returns the specific registry associated with the |token| if a high confidence match is found in |data|. +// +// Common configurations: +// - npm: https://docs.npmjs.com/using-private-packages-in-a-ci-cd-workflow#create-and-check-in-a-project-specific-npmrc-file +// - Yarn (TODO) +// - Unity Package Manager (TODO) +func FindTokenURL(data string, token string) *Info { + // .npmrc stores auth as `//registry.com/path/:authToken=$TOKEN + // Therefore, we should be able to correlate a token to a registry with a high degree of confidence. + // TODO: handle other formats, such as Yarn. + registryAuthPat := regexp.MustCompile(fmt.Sprintf( + // language=regexp + `(?i)(//%s(?:/[a-z0-9._-]+)*)/?:(?:_auth(?:Token)?|_password).{1,20}%s`, hostPat, regexp.QuoteMeta(token))) + matches := registryAuthPat.FindStringSubmatch(data) + if len(matches) == 0 { + return nil + } + + // A match was found, attempt to parse it into `Info`. + uri := matches[1] + info := parseKnownRegistryURI(data, uri) + if info == nil { + info = parseUnknownRegistryURI(data, uri) + } + return info +} + +// FindAllURLs returns all instances of URLs that *look like* placeholderList. +// These are not associated with a specific token. +func FindAllURLs(ctx context.Context, data string, includeDefault bool) map[string]*Info { + registries := make(map[string]*Info) + + // Look for known high-confidence matches. + for _, matches := range knownRegistryPat.FindAllStringSubmatch(data, -1) { + var ( + scheme = matches[1] + _, uri = firstNonEmptyMatch(matches, 2) // first two matches are the entire string and protocol/prefix + info = parseKnownRegistryURI(data, scheme+uri) + ) + if info == nil || info.Uri == "" { + continue + } else if _, ok := registries[info.Uri]; ok { + continue + } + + logger(ctx).V(3).Info("Found KNOWN registry URL", "registry", info.Scheme.String()+info.Uri) + registries[info.Uri] = info + } + + // Attempt to parse any other low confidence matches. + for _, matches := range genericRegistryPat.FindAllStringSubmatch(data, -1) { + // Skip known registry patterns, those should have already been handled above. + if knownRegistryPat.MatchString(matches[0]) { + continue + } + + var ( + _, uri = firstNonEmptyMatch(matches, 1) // first match is the entire string + info = parseUnknownRegistryURI(data, uri) + ) + if info == nil || info.Uri == "" { + continue + } else if _, ok := registries[info.Uri]; ok { + continue + } + + logger(ctx).V(3).Info("Found UNKNOWN registry URL", "registry", info.Scheme.String()+info.Uri) + registries[info.Uri] = info + } + + if len(registries) == 0 && includeDefault { + registries[defaultInfo.Uri] = defaultInfo + } + return registries +} diff --git a/pkg/detectors/npm/registry/registry_test.go b/pkg/detectors/npm/registry/registry_test.go new file mode 100644 index 000000000000..96e2ca29e044 --- /dev/null +++ b/pkg/detectors/npm/registry/registry_test.go @@ -0,0 +1,521 @@ +package registry + +import ( + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/context" +) + +func TestNpm_FindTokenRegistry(t *testing.T) { + cases := map[string]struct { + data string + token string + expected *Info + }{ + ".npmrc / _auth / top-level / no registry": { + data: "_auth = \"cGFzc3dvcmQ=\"\nemail = john.doe@example.com", + token: "cGFzc3dvcmQ=", + expected: nil, + }, + // TODO: Associate top-level auth with top-level registry. + // ".npmrc / _auth / top-level / registry": { + // input: "_auth = \"cGFzc3dvcmQ=\"\nalways-auth = true\nregistry=https://nexus.company.com/repository/npm-group/", + // token: "cGFzc3dvcmQ=", + // expected: &Info{ + // Type: nexusRepo3, + // Scheme: httpsScheme, + // Uri: "nexus.company.com/repository/npm-group", + // }, + // }, + ".npmrc / _auth / scoped / registry": { + data: "\"//artifactory.company.com/artifactory/api/npm/npm/:_auth\"=cGFzc3dvcmQ=\n", + token: "cGFzc3dvcmQ=", + expected: &Info{ + Type: artifactoryHosted, + Scheme: UnknownScheme, + Uri: "artifactory.company.com/artifactory/api/npm/npm", + }, + }, + ".npmrc / _authToken / no trailing slash": { + data: `"//artifactory.company.com/artifactory/api/npm/npm:_authToken" "=cGFzc3dvcmQ="`, + token: "cGFzc3dvcmQ=", + expected: &Info{ + Type: artifactoryHosted, + Scheme: UnknownScheme, + Uri: "artifactory.company.com/artifactory/api/npm/npm", + }, + }, + ".npmrc / _authToken / registry": { + data: `"//artifactory.company.com/artifactory/api/npm/npm/:_authToken" "=cGFzc3dvcmQ="`, + token: "cGFzc3dvcmQ=", + expected: &Info{ + Type: artifactoryHosted, + Scheme: UnknownScheme, + Uri: "artifactory.company.com/artifactory/api/npm/npm", + }, + }, + "cli / _authToken / registry": { + data: "npm config set @company:registry=https://npm.pkg.github.com/\nnpm config set //npm.pkg.github.com/:_authToken=ghp_sS3gaQUHaXSdwojeksTlaIAgJ7jWsn4D7gPO\n", + token: "ghp_sS3gaQUHaXSdwojeksTlaIAgJ7jWsn4D7gPO", + expected: &Info{ + Type: githubCloud, + Scheme: HttpsScheme, + Uri: "npm.pkg.github.com", + }, + }, + "cli / _authToken / multiple registries": { + data: "npm config set @other:registry=https://npm.pkg.github.com/\nnpm config set //npm.pkg.github.com/:_authToken=ghp_sS3gaQUHaXSdwojeksTlaIAgJ7jWsn4D7gPO\nnpm config set \"@fortawesome:registry\" https://npm.fontawesome.com/\nnpm config set \"//npm.fontawesome.com/:_authToken\" cGFzc3dvcmQ=", + token: "cGFzc3dvcmQ=", + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.fontawesome.com", + }, + }, + } + + for name, test := range cases { + t.Run(name, func(t *testing.T) { + actual := FindTokenURL(test.data, test.token) + + ignoreOpts := cmpopts.IgnoreFields(Info{}) + if diff := cmp.Diff(test.expected, actual, ignoreOpts); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } +} + +type registryTestCase struct { + input string + expected *Info +} + +func TestNpm_FindAllRegistryURLs_Known(t *testing.T) { + cases := map[string]registryTestCase{ + "npm - default": { + input: `NpmToken.35ea93c4-8c57-4a7c-8526-115b9eeeab8a`, + }, + "npm": { + input: "//registry.npmjs.org/:_authToken=cGFzc3dvcmQ=", + expected: &Info{ + Type: npm, + Scheme: HttpsScheme, + Uri: "registry.npmjs.org", + }, + }, + "artifactoryHosted": { + input: `//repo.company.com/artifactory/api/npm/npm-repo/:_password=cGFzc3dvcmQ=`, + expected: &Info{ + Type: artifactoryHosted, + Uri: "repo.company.com/artifactory/api/npm/npm-repo", + }, + }, + "artifactoryCloud": { + input: `//company.jfrog.io/company/api/npm/npm/:_authToken=cGFzc3dvcmQ=`, + expected: &Info{ + Type: artifactoryCloud, + Scheme: HttpsScheme, + Uri: "company.jfrog.io/company/api/npm/npm", + }, + }, + "nexusRepo2 - repository": { + input: `//nexus.company.org:8081/nexus/content/repositories/npm`, + expected: &Info{ + Type: nexusRepo2, + Uri: "nexus.company.org:8081/nexus/content/repositories/npm", + }, + }, + "nexusRepo2 - group": { + input: `//nexus.company.org:8081/nexus/content/groups/npm`, + expected: &Info{ + Type: nexusRepo2, + Uri: "nexus.company.org:8081/nexus/content/groups/npm", + }, + }, + "nexusRepo3": { + input: `//nexus.company.com/repository/npm-proxy`, + expected: &Info{ + Type: nexusRepo3, + Uri: "nexus.company.com/repository/npm-proxy", + }, + }, + "gitlab - project": { + input: `//gitlab.matrix.org/api/v4/projects/27/packages/npm/`, + expected: &Info{ + Type: gitlab, + Uri: "gitlab.matrix.org/api/v4/projects/27/packages/npm", + }, + }, + "gitlab - group": { + input: `//gitlab.com/api/v4/groups/1234/-/packages/npm/`, + expected: &Info{ + Type: gitlab, + Uri: "gitlab.com/api/v4/groups/1234/-/packages/npm", + }, + }, + // This is apparently a thing? No idea, found it in the wild though. + "gitlab - top-level": { + input: `"//code.company.com/api/v4/packages/npm/:_authToken" "ZENNP-123456789"`, + expected: &Info{ + Type: gitlab, + Uri: "code.company.com/api/v4/packages/npm", + }, + }, + "gitlab - .yarnrc.yml - npmRegistries (2)": { + input: ` npmRegistries: + //npm.company.com/api/v4/packages/npm: + npmAlwaysAuth: true + npmAuthToken: ""`, + expected: &Info{ + Type: gitlab, + Scheme: UnknownScheme, + Uri: "npm.company.com/api/v4/packages/npm", + }, + }, + "github": { + input: `//npm.pkg.github.com/`, + expected: &Info{ + Type: githubCloud, + Scheme: HttpsScheme, + Uri: "npm.pkg.github.com", + }, + }, + "azure - org": { + input: `//pkgs.dev.azure.com/company/_packaging/feed/npm/registry/`, + expected: &Info{ + Type: azure, + Scheme: HttpsScheme, + Uri: "pkgs.dev.azure.com/company/_packaging/feed/npm/registry", + }, + }, + "azure - repo": { + input: `//pkgs.dev.azure.com/company/project/_packaging/feed/npm/`, + expected: &Info{ + Type: azure, + Scheme: HttpsScheme, + Uri: "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry", + }, + }, + "azure - visualstudio": { + input: `//company.pkgs.visualstudio.com/05337347-30ac-46d4-b46f-5f5cb80c6818/_packaging/feed/npm/registry/`, + expected: &Info{ + Type: azure, + Scheme: HttpsScheme, + Uri: "company.pkgs.visualstudio.com/05337347-30ac-46d4-b46f-5f5cb80c6818/_packaging/feed/npm/registry", + }, + }, + "google artifact registry": { + input: `@rbl:registry=https://us-central1-npm.pkg.dev/company/project/ +//us-central1-npm.pkg.dev/company/project/:_authToken="ya29.A0ARrdaM9VpQcc5egcSN7zzEGQLzvz5jZiXEkIDmnsV2RW3KBbhbq8qkRHMUcC6gxknE9LuDW3mt4Dz3teWYXfI-4WGr6_mTQqj60BhAg4sPA7wov7PM-E3QonNwTN9De41ARPJUyvfc8Mi2GVoYzle3MJ_8KNYo4" +//us-central1-npm.pkg.dev/company/project/:always-auth=true`, + expected: &Info{ + Type: googleArtifactRegistry, + Scheme: HttpsScheme, + Uri: "us-central1-npm.pkg.dev/company/project", + }, + }, + "jetbrains": { + input: `//npm.pkg.jetbrains.space/company/p/project/repo/`, + expected: &Info{ + Type: jetbrains, + Scheme: HttpsScheme, + Uri: "npm.pkg.jetbrains.space/company/p/project/repo", + }, + }, + "gemfury": { + input: `//npm.fury.io/user/`, + expected: &Info{ + Type: gemfury, + Scheme: HttpsScheme, + Uri: "npm.fury.io/user", + }, + }, + "aws - npmRegistries": { + input: `npmRegistries: + "https://compstak-prod-278693104475.d.codeartifact.us-east-1.amazonaws.com/npm/frontend/": + npmAlwaysAuth: true + npmAuthToken: "${CODEARTIFACT_AUTH_TOKEN}"`, + expected: &Info{ + Type: awsCodeArtifact, + Scheme: HttpsScheme, + Uri: "compstak-prod-278693104475.d.codeartifact.us-east-1.amazonaws.com/npm/frontend", + }, + }, + "aws - npmScopes": { + input: `npmScopes: + compstak: + npmAlwaysAuth: true + npmAuthToken: "${CODEARTIFACT_AUTH_TOKEN}" + npmRegistryServer: https://compstak-prod-278696104475.d.codeartifact.us-east-1.amazonaws.com/npm/frontend`, + expected: &Info{ + Type: awsCodeArtifact, + Scheme: HttpsScheme, + Uri: "compstak-prod-278696104475.d.codeartifact.us-east-1.amazonaws.com/npm/frontend", + }, + }, + } + + for name, tCase := range cases { + var expected Info + if tCase.expected != nil { + expected = *tCase.expected + } else { + expected = Info{} + } + + schemes := [...]Scheme{UnknownScheme, HttpScheme, HttpsScheme} + for _, scheme := range schemes { + var ( + expected = expected + uri = expected.Uri + input string + ) + + if expected.Scheme == UnknownScheme { + expected.Scheme = scheme + } + + if scheme == UnknownScheme { + input = tCase.input + } else if scheme == HttpScheme { + input = fmt.Sprintf("registry=http://%s/\n%s", uri, tCase.input) + } else { + input = fmt.Sprintf("registry=https://%s/\n%s", uri, tCase.input) + } + + t.Run(fmt.Sprintf("%s - %s", name, scheme.String()), func(t *testing.T) { + urls := FindAllURLs(context.Background(), input, false) + if len(urls) == 0 && expected.Uri == "" { + return + } else if len(urls) != 1 && expected.Uri != "" { + t.Errorf("expected 1 result, got %d (%v)", len(urls), urls) + return + } + + var actual Info + for _, i := range urls { + actual = *i + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } + } +} + +func TestNpm_FindAllRegistryURLs_Unknown(t *testing.T) { + cases := map[string]registryTestCase{ + "nothing": { + input: `NpmToken.35ea93c4-8c57-4a7c-8526-115b9eeeab8a`, + expected: nil, + }, + "package.json - publishConfig": { + input: `"\"publishConfig\": {\n \"registry\": \"http://repository.dsv.myhost/npmjs\"\n },`, + expected: &Info{ + Type: other, + Scheme: HttpScheme, + Uri: "repository.dsv.myhost/npmjs", + }, + }, + "cli - publish registry flag": { + input: `//npm publish --registry http://ec2-18-223-132-112.us-east-2.compute.amazonaws.com:8081/npm/`, + expected: &Info{ + Type: other, + Scheme: HttpScheme, + Uri: "ec2-18-223-132-112.us-east-2.compute.amazonaws.com:8081/npm", + }, + }, + "cli - publish scoped registry flag": { + input: `//npm publish --@myscope:registry=http://internal.company.com/packages/npmjs-registry/`, + expected: &Info{ + Type: other, + Scheme: HttpScheme, + Uri: "internal.company.com/packages/npmjs-registry", + }, + }, + "cli - config registry": { + input: `npm config set registry "https://npm.company.com/"`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.company.com", + }, + }, + "cli - config scope registry": { + input: `npm config set "@company:registry" "https://npm.company.com/"`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.company.com", + }, + }, + "cli - config authToken": { + input: `npm config set "//npm.company.com/:_authToken" token123`, + expected: &Info{ + Type: other, + Scheme: UnknownScheme, + Uri: "npm.company.com", + }, + }, + ".npmrc - registry": { + input: `"registry=https://npm.company.com/`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.company.com", + }, + }, + ".npmrc - scope registry": { + input: `@company:registry = https://repo.company.com:8443/`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "repo.company.com:8443", + }, + }, + ".npmrc - scope registry, no equals": { + input: `"@company:registry" "https://artifacts.company.com/npm/"`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "artifacts.company.com/npm", + }, + }, + ".npmrc - scope": { + input: `@company = "https://repo.company.com/"`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "repo.company.com", + }, + }, + ".npmrc - _auth": { + input: `"//npm.company.com/:_auth" = "cGFzc3dvcmQ="`, + expected: &Info{ + Type: other, + Scheme: UnknownScheme, + Uri: "npm.company.com", + }, + }, + ".npmrc - _auth with https context": { + input: `"//npm.company.com/:_auth" = "cGFzc3dvcmQ=" +registry=https://npm.company.com/`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.company.com", + }, + }, + ".npmrc - _auth with http context": { + input: `"//npm.company.com/:_auth" = "cGFzc3dvcmQ=" +registry=http://npm.company.com/`, + expected: &Info{ + Type: other, + Scheme: HttpScheme, + Uri: "npm.company.com", + }, + }, + ".npmrc - _password": { + input: `//npm.company.com/:_password=cGFzc3dvcmQ=`, + expected: &Info{ + Type: other, + Scheme: UnknownScheme, + Uri: "npm.company.com", + }, + }, + ".npmrc - ip": { + input: `@eventdex:registry=http://4.89.41.88:4873/ +//4.89.41.88:4873/:_authToken="DgcjMOa2QyMunSLr9YDzUA=="`, + expected: &Info{ + Type: other, + Scheme: HttpScheme, + Uri: "4.89.41.88:4873", + }, + }, + // https://docs.unity3d.com/Manual/upm-config-scoped.html + ".upmconfig.toml": { + input: `[npmAuth."https://api.bintray.example/npm/mycompany/myregistry"]`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "api.bintray.example/npm/mycompany/myregistry", + }, + }, + ".yarnrc.yml - npmScopes (1)": { + input: `npmScopes: + fortawesome: + npmAlwaysAuth: true + npmRegistryServer: "https://npm.fontawesome.com/" + npmAuthToken: "${20FCC725-C7FF-4BBF-3DE8-632C89A16C87}"`, + expected: &Info{ + Type: other, + Scheme: HttpsScheme, + Uri: "npm.fontawesome.com", + }, + }, + ".yarnrc.yml - npmRegistries (2)": { + input: `run: | + echo "npmRegistries:" >> ~/.yarnrc.yml + echo " //registry.company.com:" >> ~/.yarnrc.yml + echo " npmAuthToken: $NPM_TOKEN" >> ~/.yarnrc.yml`, + expected: &Info{ + Type: other, + Scheme: UnknownScheme, + Uri: "registry.company.com", + }, + }, + // TODO: https://github.com/renovatebot/renovate/blob/075a96c00aa53ede32576e924fe81b040789fc14/docs/usage/getting-started/private-packages.md + // "renovatebot": { + // input: ` matchHost: 'https://packages.my-company.com/myregistry/',`, + // expected: &Info{ + // Type: other, + // Scheme: HttpsScheme, + // Uri: "packages.my-company.com/myregistry", + // }, + // }, + // https://github.com/renovatebot/renovate/blob/b8d06fd3e007027064cfb5e93d0f14dcb7fead4d/lib/modules/manager/npm/post-update/rules.spec.ts#L39 + // additionalYarnRcYml: { + // npmRegistries: { + // '//registry.company.com/': { + // npmAuthIdent: 'user123:pass123', + // }, + // }, + // }, + + // Invalid + "invalid/readme": { + input: `# token-substitute + +[![Build Status](https://travis-ci.org/trustpilot/node-token-substitute.svg?branch=master)](https://travis-ci.org/trustpilot/node-token-substitute) [![npm](https://img.shields.io/npm/v/token-substitute.svg)](https://www.npmjs.com/package/token-substitute)`, + }, + } + + for name, tCase := range cases { + t.Run(name, func(t *testing.T) { + urls := FindAllURLs(context.Background(), tCase.input, false) + if len(urls) != 1 && tCase.expected != nil { + t.Errorf("expected 1 result for %s, got %d (%v)", tCase.input, len(urls), urls) + return + } else if len(urls) > 0 && tCase.expected == nil { + t.Errorf("expected no result for %s, got %d (%v)", tCase.input, len(urls), urls) + return + } + + var actualInfo *Info + for _, i := range urls { + actualInfo = i + } + + if diff := cmp.Diff(tCase.expected, actualInfo); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } +} diff --git a/pkg/detectors/npm/registry/type.go b/pkg/detectors/npm/registry/type.go new file mode 100644 index 000000000000..6793e0f5fc76 --- /dev/null +++ b/pkg/detectors/npm/registry/type.go @@ -0,0 +1,46 @@ +package registry + +// Type is used to indicate the registry implementation, if known. +// This is crucial for verification due to differences in behaviour. +type Type int + +const ( + /* + * Others npm registries include: + * - https://github.com/verdaccio/verdaccio + * - https://coding.net/help/docs/ci/practice/artifacts/npm.html + * - https://www.privjs.com + * - https://npm.fontawesome.com + */ + other Type = iota + npm + artifactoryCloud + artifactoryHosted + nexusRepo2 + nexusRepo3 + gitlab // TODO: create distinct type for self-hosted GitLab? + githubCloud // TODO: self-hosted GitHub + azure + jetbrains + googleArtifactRegistry + gemfury + awsCodeArtifact +) + +func (t Type) String() string { + return [...]string{ + "other", + "npm", + "artifactoryCloud", + "artifactoryHosted", + "nexusRepo2", + "nexusRepo3", + "gitlab", + "githubCloud", + "azure", + "jetbrains", + "googleArtifactRegistry", + "gemfury", + "awsCodeArtifact", + }[t] +} diff --git a/pkg/detectors/npm/registry/verify.go b/pkg/detectors/npm/registry/verify.go new file mode 100644 index 000000000000..2a856266f99e --- /dev/null +++ b/pkg/detectors/npm/registry/verify.go @@ -0,0 +1,308 @@ +package registry + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "github.com/go-logr/logr" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/context" +) + +// VerifyToken attempts to verify the |token| based on the registry's type (if known). +// +// NOTE: some known registry types DO NOT SUPPORT VERIFICATION (yet?) +func VerifyToken(ctx context.Context, client *http.Client, registryInfo *Info, token string) (bool, map[string]string, error) { + var ( + logCtx = context.WithValues(ctx, "registry", registryInfo.Uri, "token", token) + + whoamiRes *whoamiResponse + searchRes *searchResponse + allRes *allResponse + response *http.Response + err error + ) + if registryInfo.Scheme == UnknownScheme { + // Sanity check — this should never happen. + return false, nil, errors.New("registry scheme must be HTTP or HTTPS, not unknown") + } + + switch registryInfo.Type { + case other: + // These support various endpoints. + var errs []error + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + if err != nil { + if common.ErrIsNoSuchHost(err) { + return false, nil, err + } + errs = append(errs, err) + } + + searchRes, response, err = searchRequest(logCtx, client, registryInfo, token) + if searchRes != nil { + return true, nil, nil + } + if err != nil { + errs = append(errs, err) + } + + allRes, response, err = allRequest(logCtx, client, registryInfo, token) + if allRes != nil { + return true, nil, nil + } + if err != nil { + errs = append(errs, err) + } + + return false, nil, errors.Join(errs...) + case npm: + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + return false, nil, err + case artifactoryCloud, artifactoryHosted: + // Returns {"username":"anonymous"} if no auth is provided. + // Using /AllEndpoint or /SearchEndpoint seems to return a vague "One or more query value parameters are null" error. Not sure why. + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil && whoamiRes.Username != "anonymous" { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + return false, nil, err + case nexusRepo2: + // Returns 401 if auth is invalid, doesn't support WhoamiEndpoint or SearchEndpoint. + allRes, response, err = allRequest(logCtx, client, registryInfo, token) + if allRes != nil { + return true, nil, nil + } + return false, nil, err + case nexusRepo3: + // Returns {"username":"anonymous"} or 401 for WhoamiEndpoint. Supports both AllEndpoint and SearchEndpoint. + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil && whoamiRes.Username != "anonymous" { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + return false, nil, err + case gitlab: + // GitLab does not support any meta endpoints, only direct package lookups. + // https://docs.gitlab.com/ee/user/packages/npm_registry/#package-forwarding-to-npmjscom + // TODO: + return false, nil, fmt.Errorf("GitLab verification is not supported") + case githubCloud: + // Returns 403 if auth is invalid. + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + if response != nil && response.StatusCode == http.StatusForbidden { + if err != nil && strings.Contains(err.Error(), "unexpected response status") { + err = nil + } + } + return false, nil, err + case azure: + // Doesn't support SearchEndpoint. + // https://github.com/MicrosoftDocs/azure-devops-docs/issues/10455 + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + return false, nil, err + case jetbrains: + // Does not support AllEndpoint. + // Returns 401 if auth is invalid. + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes != nil && whoamiRes.Username != "internal" { + return true, map[string]string{"username": whoamiRes.Username}, nil + } + return false, nil, err + + case googleArtifactRegistry: + // Does not support WhoamiEndpoint, AllEndpoint, or SearchEndpoint. (https://stackoverflow.com/q/76470861) + // Returns 404 for valid token, 403 for invalid token. + // TODO + return false, nil, fmt.Errorf("Google Artifact Registry verification is not supported") + case gemfury: + // Returns 401 if auth is invalid. + whoamiRes, response, err = whoamiRequest(logCtx, client, registryInfo, token) + if whoamiRes == nil { + return false, nil, err + } + return true, map[string]string{"username": whoamiRes.Username}, nil + case awsCodeArtifact: + // TODO + return false, nil, fmt.Errorf("AWS Code Artifact verification is not supported") + default: + return false, nil, fmt.Errorf("unrecognized registry type: %s", registryInfo.Type) + } +} + +func closeResponseBody(response *http.Response) { + if response == nil { + return + } + _, _ = io.Copy(io.Discard, response.Body) + _ = response.Body.Close() +} + +func logger(ctx context.Context) logr.Logger { + return ctx.Logger().WithName("npm") +} + +// whoamiRequest attempts to call the `/-/whoami` registry endpoint. +// See: https://github.com/npm/documentation/blob/f030a50fcf72bf3b8445c2ff63745644bbdb81c1/content/cli/v7/commands/npm-whoami.md?plain=1#L30 +func whoamiRequest( + ctx context.Context, + client *http.Client, + registryInfo *Info, + authValue string, +) (*whoamiResponse, *http.Response, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("%s%s/-/whoami", registryInfo.Scheme.String(), registryInfo.Uri), nil) + if err != nil { + return nil, nil, err + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authValue)) + res, err := client.Do(req) + if err != nil { + return nil, nil, err + } + defer closeResponseBody(res) + + logger(ctx).V(3).Info("Got whoami response", "status_code", res.StatusCode) + switch res.StatusCode { + case http.StatusOK: + var whoamiRes whoamiResponse + if err := json.NewDecoder(res.Body).Decode(&whoamiRes); err != nil { + return nil, res, err + } + + // It is possible for the response to be `{"username": null}`, `{"username":""}`, etc. + // While a valid token _can_ return an empty username, the registry is likely returning 200 for invalid auth. + if whoamiRes.Username == "" { + return nil, res, nil + } + return &whoamiRes, res, nil + case http.StatusUnauthorized: + return nil, res, nil + default: + body, _ := io.ReadAll(res.Body) + return nil, res, fmt.Errorf("unexpected response status %d for %s, body = %q", res.StatusCode, req.URL, string(body)) + } +} + +type whoamiResponse struct { + Username string `json:"username"` +} + +// searchRequest attempts to call the `/-/v1/search` registry endpoint. +// See: https://github.com/npm/registry/blob/main/docs/REGISTRY-API.md#get-v1search +func searchRequest( + ctx context.Context, + client *http.Client, + registryInfo *Info, + authValue string, +) (*searchResponse, *http.Response, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("%s%s/-/v1/search", registryInfo.Scheme.String(), registryInfo.Uri), nil) + if err != nil { + return nil, nil, fmt.Errorf("failed to construct search request: %s", err) + } + + query := url.Values{} + query.Add("text", "test") + query.Add("size", "1") + req.URL.RawQuery = query.Encode() + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authValue)) + + res, err := client.Do(req) + if err != nil { + // A |tls.RecordHeaderError| likely means that the server is using HTTP, not HTTPS. + // TODO: Is it possible to handle the reverse case? + // TODO: Handle this at the DoVerificationLevel + // var tlsErr tls.RecordHeaderError + // if errors.As(err, &tlsErr) && registryScheme == registry.HttpsScheme { + // return searchRequest(ctx, client, registry.HttpScheme, registryUri, authType, authValue) + // } + return nil, res, fmt.Errorf("search request failed: %w", err) + } + defer closeResponseBody(res) + + logger(ctx).V(3).Info("Got search response", "status_code", res.StatusCode) + switch res.StatusCode { + case http.StatusOK: + var searchRes searchResponse + if err := json.NewDecoder(res.Body).Decode(&searchRes); err != nil { + return nil, res, err + } + if (searchRes == searchResponse{}) { + return nil, res, fmt.Errorf("failed to decode search response JSON") + } + return &searchRes, res, nil + case http.StatusUnauthorized: + return nil, res, nil + default: + body, _ := io.ReadAll(res.Body) + return nil, res, fmt.Errorf("unexpected response status %d for %s, body = %q", res.StatusCode, req.URL, string(body)) + } +} + +type searchResponse struct { + Ok bool `json:"ok"` + Total int `json:"total"` + Time string `json:"time"` +} + +// allRequest attempts to call the `/-/all` registry endpoint. +// While the endpoint is deprecated, some older registries don't support the newer whoami/search endpoints. +// See: https://blog.npmjs.org/post/157615772423/deprecating-the-all-registry-endpoint.html +func allRequest( + ctx context.Context, + client *http.Client, + registryInfo *Info, + authValue string, +) (*allResponse, *http.Response, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("%s%s/-/all", registryInfo.Scheme.String(), registryInfo.Uri), nil) + if err != nil { + return nil, nil, fmt.Errorf("failed to construct all request: %s", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authValue)) + res, err := client.Do(req) + if err != nil { + return nil, nil, err + } + defer closeResponseBody(res) + + logger(ctx).V(3).Info("Got all response", "status_code", res.StatusCode) + switch res.StatusCode { + case http.StatusOK: + var allRes allResponse + if err := json.NewDecoder(res.Body).Decode(&allRes); err != nil { + return nil, res, err + } + + if allRes.Updated == 0 { + return nil, res, fmt.Errorf("failed to decode all response JSON") + } + return &allRes, res, nil + case http.StatusUnauthorized: + return nil, res, nil + default: + body, _ := io.ReadAll(res.Body) + return nil, res, fmt.Errorf("unexpected response status %d for %s, body=%q", res.StatusCode, req.URL, string(body)) + } +} + +type allResponse struct { + Updated int `json:"_updated"` +} diff --git a/pkg/detectors/npm/token/detector.go b/pkg/detectors/npm/token/detector.go new file mode 100644 index 000000000000..37d23d6c70ea --- /dev/null +++ b/pkg/detectors/npm/token/detector.go @@ -0,0 +1,142 @@ +package token + +import ( + "crypto/tls" + "errors" + "net/http" + + "golang.org/x/exp/maps" + "golang.org/x/sync/singleflight" + + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/registry" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type BaseScanner struct { + client *http.Client +} + +func (s BaseScanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_NpmToken +} + +func (s BaseScanner) Description() string { + return "NPM tokens are used to authenticate with NPM registries." +} + +type verifyResult struct { + IsVerified bool + ExtraData map[string]string + Error error +} + +var ( + noSuchHostCache = simple.NewCache[struct{}]() + verificationGroup singleflight.Group +) + +func (s BaseScanner) VerifyToken( + ctx context.Context, + data string, + token string, +) (bool, map[string]string, error) { + logger := ctx.Logger().WithName("npm") + if s.client == nil { + s.client = detectors.DetectorHttpClientWithNoLocalAddresses + } + + // Locate registry URL(s) in the data string. + registries := make(map[string]*registry.Info) + if r := registry.FindTokenURL(data, token); r != nil { + // A high-confidence match was found. + // e.g., |token|="s3cret" and |data| contains "//npm.company.com/:_authToken=s3cret". + registries[r.Uri] = r + logger.V(4).Info("Found high-confidence match for token", "token", token, "registry", r.Uri) + } else { + // A high confidence match was not found. + // Attempt to verify the token against any registries we can find. + for uri, info := range registry.FindAllURLs(ctx, data, true) { + registries[uri] = info + } + logger.V(4).Info("Found low-confidence matches for token", "token", token, "registries", maps.Keys(registries)) + } + + // Iterate through registries + errs := make([]error, 0, len(registries)) + for uri, info := range registries { + // Use cached value where possible. + if noSuchHostCache.Exists(uri) { + logger.V(3).Info("Skipping invalid registry", "registry", uri) + continue + } + + r, _, _ := verificationGroup.Do(uri+token, func() (any, error) { + logger.V(4).Info("Testing potential registry", "registry", uri, "token", token) + verified, extraData, err := doVerification(ctx, s.client, info, token) + if err != nil { + // TODO: narrow this in scope? Known hosts like `github.com` should be exempt. + if common.ErrIsNoSuchHost(err) { + noSuchHostCache.Set(uri, struct{}{}) + } + } + return verifyResult{ + IsVerified: verified, + ExtraData: extraData, + Error: err, + }, nil + }) + + res := r.(verifyResult) + if res.IsVerified { + return true, res.ExtraData, res.Error + } + + errs = append(errs, res.Error) + } + + return false, nil, errors.Join(errs...) +} + +// doVerification checks whether |token| is valid for the given |registry|. +func doVerification( + ctx context.Context, + client *http.Client, + registryInfo *registry.Info, + authValue string, +) (bool, map[string]string, error) { + // If the scheme is "unknown", default to HTTPS. + if registryInfo.Scheme == registry.UnknownScheme { + registryInfo.Scheme = registry.HttpsScheme + } + + isVerified, extraData, err := registry.VerifyToken(ctx, client, registryInfo, authValue) + if !isVerified { + if err != nil { + // If the scheme wasn't found when parsing we default to HTTPS, however, it might actually be HTTP. + // This re-attempts the request with HTTP. + // + // e.g., `//registry.example.com/:_authToken=...` + var tlsErr tls.RecordHeaderError + if errors.As(err, &tlsErr) && registryInfo.Scheme == registry.HttpsScheme { + r := *registryInfo + r.Scheme = registryInfo.Scheme + return doVerification(ctx, client, &r, authValue) + } + } + return false, nil, nil + } + + data := map[string]string{ + "registry_type": registryInfo.Type.String(), + "registry_url": registryInfo.Uri, + "rotation_guide": "https://howtorotate.com/docs/tutorials/npm/", + } + for k, v := range extraData { + data[k] = v + } + return true, data, err +} diff --git a/pkg/detectors/npm/token/new/new.go b/pkg/detectors/npm/token/new/new.go new file mode 100644 index 000000000000..364543210985 --- /dev/null +++ b/pkg/detectors/npm/token/new/new.go @@ -0,0 +1,72 @@ +package new + +import ( + "context" + "errors" + + regexp "github.com/wasilibs/go-re2" + + logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token" +) + +type Scanner struct { + token.BaseScanner +} + +// Ensure the Scanner satisfies the interfaces at compile time. +var _ interface { + detectors.Detector + detectors.Versioner +} = (*Scanner)(nil) + +func (s Scanner) Version() int { return int(npm.TokenNew) } + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s Scanner) Keywords() []string { + return []string{"npm_"} +} + +var tokenPat = regexp.MustCompile(`\b(npm_[a-zA-Z0-9]{36})\b`) + +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + logCtx := logContext.AddLogger(ctx) + + // Deduplicate results for more efficient handling. + tokens := make(map[string]struct{}) + for _, match := range tokenPat.FindAllStringSubmatch(dataStr, -1) { + m := match[1] + if detectors.StringShannonEntropy(m) < 4 { + continue + } + tokens[m] = struct{}{} + } + + // Handle results. + for t := range tokens { + r := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(t), + } + + if verify { + verified, extraData, vErr := s.VerifyToken(logCtx, dataStr, t) + r.Verified = verified + r.ExtraData = extraData + if vErr != nil { + if errors.Is(vErr, detectors.ErrNoLocalIP) { + continue + } + r.SetVerificationError(vErr) + } + } + + results = append(results, r) + } + + return +} diff --git a/pkg/detectors/npmtokenv2/npmtokenv2_integration_test.go b/pkg/detectors/npm/token/new/new_integration_test.go similarity index 99% rename from pkg/detectors/npmtokenv2/npmtokenv2_integration_test.go rename to pkg/detectors/npm/token/new/new_integration_test.go index 0a9d285c8d2e..fd033f1a8612 100644 --- a/pkg/detectors/npmtokenv2/npmtokenv2_integration_test.go +++ b/pkg/detectors/npm/token/new/new_integration_test.go @@ -1,7 +1,7 @@ //go:build detectors // +build detectors -package npmtokenv2 +package new import ( "context" @@ -10,9 +10,9 @@ import ( "time" "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) diff --git a/pkg/detectors/npm/token/new/new_test.go b/pkg/detectors/npm/token/new/new_test.go new file mode 100644 index 000000000000..ecc23ddff988 --- /dev/null +++ b/pkg/detectors/npm/token/new/new_test.go @@ -0,0 +1,80 @@ +package new + +import ( + "context" + "testing" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) + +type npmPatternTestCase struct { + input string + expected string +} + +func TestNpmTokenNew_Pattern(t *testing.T) { + cases := map[string]npmPatternTestCase{ + "no_context": { + input: `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`, + expected: "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", + }, + ".npmrc": { + input: `//registry.npmjs.org/:_authToken=npm_ZAQB7VuVmml1pMGorDFwyeEpuQrA8I4ypgPF`, + expected: "npm_ZAQB7VuVmml1pMGorDFwyeEpuQrA8I4ypgPF", + }, + "yaml_spec": { + input: ` - env: + NPM_TOKEN: npm_tCEMceczuiTXKQaBjGIaAezYQ63PqI972ANG`, + expected: "npm_tCEMceczuiTXKQaBjGIaAezYQ63PqI972ANG", + }, + "bashrc": { + input: `export NPM_TOKEN=npm_ySTLJHpS9DCwByClZBMyqRWptr2kB40hEjiS`, + expected: "npm_ySTLJHpS9DCwByClZBMyqRWptr2kB40hEjiS", + }, + + // Invalid + "invalid/placeholder_0": { + input: ` //registry.npmjs.org/:_authToken=npm_000000000000000000000000000000000000`, + }, + "invalid/placeholder_x": { + input: `//registry.npmjs.org/:_authToken=npm_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX`, + }, + "invalid/word_boundary": { + input: ` "image_small_url": "https://c10.patreonusercontent.com/3/eyJoIjo2NDAsInYiOiIzIiwidyI6NjQwfQ%3D%3D/patreon-media/campaign/1493621/91a5dc5347a741af89aaed35d2a82b5c?token-time=2145916800\u0026token-hash=Qznpm_uHiQAba4K3HTRZjrhQei4dU0tmZbaavLrM2FY%3D",`, + }, + "invalid/uppercase": { + input: `"operationId": "Npm_GetScopedPackageVersionFromRecycleBin",`, + }, + } + + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + for name, test := range cases { + t.Run(name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) == 0 { + if test.expected != "" { + t.Error("did not receive result") + } + return + } + + actual := string(results[0].Raw) + if test.expected != actual { + t.Errorf("expected '%s' != actual '%s'", test.expected, actual) + } + }) + } +} diff --git a/pkg/detectors/npm/token/uuid/uuid.go b/pkg/detectors/npm/token/uuid/uuid.go new file mode 100644 index 000000000000..1b6d2ee22040 --- /dev/null +++ b/pkg/detectors/npm/token/uuid/uuid.go @@ -0,0 +1,72 @@ +package uuid + +import ( + "context" + "errors" + + regexp "github.com/wasilibs/go-re2" + + logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token" +) + +type Scanner struct { + token.BaseScanner +} + +// Ensure the Scanner satisfies the interfaces at compile time. +var _ interface { + detectors.Detector + detectors.Versioner +} = (*Scanner)(nil) + +func (s Scanner) Version() int { return int(npm.TokenUuid) } + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s Scanner) Keywords() []string { + return []string{"npm", "NpmToken.", "_authToken"} +} + +var tokenPat = regexp.MustCompile(`(?:NpmToken\.|` + detectors.PrefixRegex([]string{"(?-i:NPM|[Nn]pm)", "(?-i:_authToken)"}) + `)\b(?i)([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})\b`) + +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + logCtx := logContext.AddLogger(ctx) + + // Deduplicate results for more efficient handling. + tokens := make(map[string]struct{}) + for _, match := range tokenPat.FindAllStringSubmatch(dataStr, -1) { + m := match[1] + if detectors.StringShannonEntropy(m) < 3 { + continue + } + tokens[m] = struct{}{} + } + + // Handle results. + for t := range tokens { + r := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(t), + } + + if verify { + verified, extraData, vErr := s.VerifyToken(logCtx, dataStr, t) + r.Verified = verified + r.ExtraData = extraData + if vErr != nil { + if errors.Is(vErr, detectors.ErrNoLocalIP) { + continue + } + r.SetVerificationError(vErr) + } + } + + results = append(results, r) + } + + return +} diff --git a/pkg/detectors/npmtoken/npmtoken_integration_test.go b/pkg/detectors/npm/token/uuid/uuid_integration_test.go similarity index 99% rename from pkg/detectors/npmtoken/npmtoken_integration_test.go rename to pkg/detectors/npm/token/uuid/uuid_integration_test.go index a2d2b2357366..3f217cf209f8 100644 --- a/pkg/detectors/npmtoken/npmtoken_integration_test.go +++ b/pkg/detectors/npm/token/uuid/uuid_integration_test.go @@ -1,7 +1,7 @@ //go:build detectors // +build detectors -package npmtoken +package uuid import ( "context" @@ -10,9 +10,9 @@ import ( "time" "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) diff --git a/pkg/detectors/npm/token/uuid/uuid_test.go b/pkg/detectors/npm/token/uuid/uuid_test.go new file mode 100644 index 000000000000..0a8e2ab5ba46 --- /dev/null +++ b/pkg/detectors/npm/token/uuid/uuid_test.go @@ -0,0 +1,127 @@ +package uuid + +import ( + "context" + "testing" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) + +type npmPatternTestCase struct { + input string + expected string +} + +func TestNpmTokenUuid_Pattern(t *testing.T) { + cases := map[string]npmPatternTestCase{ + "npmrc/_authToken/top_level": { + input: `registry=https://nexus.company.com/repository/npm-group/ +_authToken=NpmToken.3e9adc26-5c1b-3fdf-901f-6df392a48616`, + expected: "3e9adc26-5c1b-3fdf-901f-6df392a48616", + }, + "npmrc/_authToken/scoped/npm": { + input: `loglevel=silent +registry=https://registry.npmjs.org/ +//registry.npmjs.org/:_authToken=fcb3b15d-4d4a-44dc-b92d-13ee9d25582d`, + expected: "fcb3b15d-4d4a-44dc-b92d-13ee9d25582d", + }, + "npmrc/_authToken/scoped/nexus": { + input: ` echo email=jdoe@company.com > .npmrc + echo always-auth=true >> .npmrc + echo registry=https://nexus.company.com:8443/repository/npm-registry/ >> .npmrc + echo //nexus.company.com:8443/repository/npm-registry/:_authToken=NpmToken.de093289-9551-3238-a766-9d2c694f2600 >> .npmrc`, + expected: "de093289-9551-3238-a766-9d2c694f2600", + }, + "npmrc/_authToken/scopegd/other(1)": { + input: `@fontawesome:registry=https://npm.fontawesome.com/ +//npm.fontawesome.com/:_authToken=E8EC7793-A630-49AA-3351-6887EE647296`, + expected: "E8EC7793-A630-49AA-3351-6887EE647296", + }, + "yarn/npmAuthToken/scoped": { + input: `npmScopes: + fortawesome: + npmAlwaysAuth: true + npmRegistryServer: "https://npm.fontawesome.com/" + npmAuthToken: "${20FCC725-C7FF-4BBF-3DE8-632C89A16C87}"`, + expected: "20FCC725-C7FF-4BBF-3DE8-632C89A16C87", + }, + "misc(1)": { + input: `CI: "true" + NPM_PUBLISH_URL: "http://nexus3.company.net:8081/repository/npm-releases/" + NPM_PUBLISH_TOKEN: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb"`, + expected: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb", + }, + "misc(2)": { + input: `- name: NPM_PUBLISH_TOKEN + description: "Npm user used when upload artifacts" + required: true + value: "NpmToken.b5505337-ffb2-3fac-8b3a-fcd81b8ab8fb"`, + expected: "b5505337-ffb2-3fac-8b3a-fcd81b8ab8fb", + }, + "misc(3)": { + input: `root@4f5ec7bfe603:/# cd && cat .npmrc +//192.168.1.253:8081/repository/npm-group-local/:_authToken=NpmToken.7385beb7-2f92-3295-8ccf-8020132d6232`, + expected: "7385beb7-2f92-3295-8ccf-8020132d6232", + }, + "misc(4)": { + input: `ENV NPM_TOKEN "16b46f03-f1fb-4dce-9a98-c7e685751e67"`, + expected: "16b46f03-f1fb-4dce-9a98-c7e685751e67", + }, + "misc(5)": { + input: // https://github.com/arnaud-deprez/jenkins-docker-openshift/blob/60bb4dbe4d5484ff3f81697c26892dda4cd33930/charts/jenkins-openshift/values.yaml#L209 + ` CI: "true" + NPM_MIRROR_URL: "http://nexus3:8081/repository/npm-public/" + NPM_PUBLISH_URL: "http://nexus3:8081/repository/npm-releases/" + NPM_PUBLISH_TOKEN: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb"`, + expected: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb", + }, + + // Invalid + "invalid/_authToken/variable": { + input: `//npm.pkg.github.com/:_authToken=${GITHUB_PACKAGES_AUTH_TOKEN}`, + }, + "invalid/default": { + input: `assert(registry, 'registry not set, example: "https://nexus.foo.com/repository/mynpm/"') +const tokenErrorMsg = 'npm token invalid, example: "NpmToken.00000000-0000-0000-0000-000000000000" before base64 encoded'`, + }, + "invalid/not_uuid": { + input: `# .npmrc +# @ngiq:registry=https://registry.corp.net/repository/npm-group +# //registry.corp.net/repository/:_authToken=NpmToken.xxxx`, + }, + "invalid/wrong_case": { + input: `{\"name\":\"NPmLslOudNeTLpfg\",\"correlationId\":\"9cdc2447-3eaa-4191-b6ed-43e9b6b1b3c3\"}`, + }, + } + + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + for name, test := range cases { + t.Run(name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) == 0 { + if test.expected != "" { + t.Error("did not receive result") + } + return + } + + actual := string(results[0].Raw) + if test.expected != actual { + t.Errorf("expected '%s' != actual '%s'", test.expected, actual) + } + }) + } +} diff --git a/pkg/detectors/npm/version.go b/pkg/detectors/npm/version.go new file mode 100644 index 000000000000..a843c126b38e --- /dev/null +++ b/pkg/detectors/npm/version.go @@ -0,0 +1,31 @@ +package npm + +// DetectorVersion assigns semantic meaning to detector "versions", +// as there are several independent formats. +type DetectorVersion int + +const ( + /* + * TokenUuid: the original NPM token format, also implemented by tools like Nexus Repository 3. + * + * Examples: + * ``` + * //registry.npmjs.org/:_authToken=a5f022f6-71b6-4402-82ca-f7842c12ede8 + * echo //nexus.contoso.com/repository/npm-registry/:_authToken=NpmToken.47174bc4-45b5-4266-9ab9-3f930f03ed04 >> .npmrc + * ``` + */ + TokenUuid DetectorVersion = iota + 1 + /* + * TokenNew: the new NPM token format announced by GitHub (https://github.blog/changelog/2021-09-23-npm-has-a-new-access-token-format/). + * + * Example: `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` + */ + TokenNew + // TODO: these are placeholders for future development. + // TokenGeneric + // https://yarnpkg.com/configuration/yarnrc#npmRegistries + // TokenYarn + // https://forum.unity.com/threads/npm-registry-authentication.836308/ + // https://github.com/openupm/openupm-cli/blob/0b70a3a6f2917888186706ca6838df2ea55ee066/docs/cmd-search.md?plain=1#L5 + // TokenUnity +) diff --git a/pkg/detectors/npmtoken/npmtoken.go b/pkg/detectors/npmtoken/npmtoken.go deleted file mode 100644 index fdd4992f8ccd..000000000000 --- a/pkg/detectors/npmtoken/npmtoken.go +++ /dev/null @@ -1,78 +0,0 @@ -package npmtoken - -import ( - "context" - "fmt" - regexp "github.com/wasilibs/go-re2" - "net/http" - "strings" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -type Scanner struct{} - -// Ensure the Scanner satisfies the interfaces at compile time. -var _ detectors.Detector = (*Scanner)(nil) -var _ detectors.Versioner = (*Scanner)(nil) - -func (s Scanner) Version() int { return 1 } - -var ( - client = common.SaneHttpClient() - - // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"npm"}) + `\b([0-9Aa-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`) -) - -// Keywords are used for efficiently pre-filtering chunks. -// Use identifiers in the secret preferably, or the provider name. -func (s Scanner) Keywords() []string { - return []string{"npm"} -} - -// FromData will find and optionally verify NpmToken secrets in a given set of bytes. -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { - dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { - resMatch := strings.TrimSpace(match[1]) - - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_NpmToken, - Raw: []byte(resMatch), - } - s1.ExtraData = map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/npm/", - } - - if verify { - req, err := http.NewRequestWithContext(ctx, "GET", "https://registry.npmjs.org/-/whoami", nil) - if err != nil { - continue - } - req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", resMatch)) - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true - } - } - } - - results = append(results, s1) - } - - return -} - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_NpmToken -} - -func (s Scanner) Description() string { - return "NPM tokens are used to authenticate and publish packages to the npm registry." -} diff --git a/pkg/detectors/npmtoken/npmtoken_test.go b/pkg/detectors/npmtoken/npmtoken_test.go deleted file mode 100644 index c85bd3418acc..000000000000 --- a/pkg/detectors/npmtoken/npmtoken_test.go +++ /dev/null @@ -1,91 +0,0 @@ -package npmtoken - -import ( - "context" - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" -) - -var ( - validPattern = "3aAcac6c-9847-23d9-ce65-917590b81cf0" - invalidPattern = "3aAcac6c?9847-23d9-ce65-917590b81cf0" - keyword = "npmtoken" -) - -func TestNpmToken_Pattern(t *testing.T) { - d := Scanner{} - ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - tests := []struct { - name string - input string - want []string - }{ - { - name: "valid pattern - with keyword npmtoken", - input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), - want: []string{validPattern}, - }, - { - name: "valid pattern - ignore duplicate", - input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), - want: []string{validPattern}, - }, - { - name: "valid pattern - key out of prefix range", - input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), - want: []string{}, - }, - { - name: "invalid pattern", - input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), - want: []string{}, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) - if len(matchedDetectors) == 0 { - t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) - return - } - - results, err := d.FromData(context.Background(), false, []byte(test.input)) - if err != nil { - t.Errorf("error = %v", err) - return - } - - if len(results) != len(test.want) { - if len(results) == 0 { - t.Errorf("did not receive result") - } else { - t.Errorf("expected %d results, only received %d", len(test.want), len(results)) - } - return - } - - actual := make(map[string]struct{}, len(results)) - for _, r := range results { - if len(r.RawV2) > 0 { - actual[string(r.RawV2)] = struct{}{} - } else { - actual[string(r.Raw)] = struct{}{} - } - } - expected := make(map[string]struct{}, len(test.want)) - for _, v := range test.want { - expected[v] = struct{}{} - } - - if diff := cmp.Diff(expected, actual); diff != "" { - t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) - } - }) - } -} diff --git a/pkg/detectors/npmtokenv2/npmtokenv2.go b/pkg/detectors/npmtokenv2/npmtokenv2.go deleted file mode 100644 index 3e2eb83b7406..000000000000 --- a/pkg/detectors/npmtokenv2/npmtokenv2.go +++ /dev/null @@ -1,79 +0,0 @@ -package npmtokenv2 - -import ( - "context" - "fmt" - regexp "github.com/wasilibs/go-re2" - "net/http" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -type Scanner struct{} - -// Ensure the Scanner satisfies the interfaces at compile time. -var _ detectors.Detector = (*Scanner)(nil) -var _ detectors.Versioner = (*Scanner)(nil) - -func (s Scanner) Version() int { return 2 } - -var ( - client = common.SaneHttpClient() - - // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(`(npm_[0-9a-zA-Z]{36})`) -) - -// Keywords are used for efficiently pre-filtering chunks. -// Use identifiers in the secret preferably, or the provider name. -func (s Scanner) Keywords() []string { - return []string{"npm_"} -} - -// FromData will find and optionally verify NpmTokenV2 secrets in a given set of bytes. -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { - dataStr := string(data) - - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - resMatch := match[1] - - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_NpmToken, - Raw: []byte(resMatch), - } - s1.ExtraData = map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/npm/", - } - - if verify { - req, err := http.NewRequestWithContext(ctx, "GET", "https://registry.npmjs.org/-/whoami", nil) - if err != nil { - continue - } - req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", resMatch)) - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true - } - } - } - - results = append(results, s1) - } - - return -} - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_NpmToken -} - -func (s Scanner) Description() string { - return "NPM tokens are used to authenticate and publish packages to the NPM registry." -} diff --git a/pkg/detectors/npmtokenv2/npmtokenv2_test.go b/pkg/detectors/npmtokenv2/npmtokenv2_test.go deleted file mode 100644 index e74b5c15ea16..000000000000 --- a/pkg/detectors/npmtokenv2/npmtokenv2_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package npmtokenv2 - -import ( - "context" - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" -) - -var ( - validPattern = "npm_hK0FJXBYCkejhEMY4Kp6bOOZn1DlfBOmtbJY" - invalidPattern = "npm_hK0FJXBYCkejhEMY?Kp6bOOZn1DlfBOmtbJY" - keyword = "npmtokenv2" -) - -func TestNpmToken_New_Pattern(t *testing.T) { - d := Scanner{} - ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - tests := []struct { - name string - input string - want []string - }{ - { - name: "valid pattern - with keyword npmtokenv2", - input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), - want: []string{validPattern}, - }, - { - name: "invalid pattern", - input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), - want: []string{}, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) - if len(matchedDetectors) == 0 { - t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) - return - } - - results, err := d.FromData(context.Background(), false, []byte(test.input)) - if err != nil { - t.Errorf("error = %v", err) - return - } - - if len(results) != len(test.want) { - if len(results) == 0 { - t.Errorf("did not receive result") - } else { - t.Errorf("expected %d results, only received %d", len(test.want), len(results)) - } - return - } - - actual := make(map[string]struct{}, len(results)) - for _, r := range results { - if len(r.RawV2) > 0 { - actual[string(r.RawV2)] = struct{}{} - } else { - actual[string(r.Raw)] = struct{}{} - } - } - expected := make(map[string]struct{}, len(test.want)) - for _, v := range test.want { - expected[v] = struct{}{} - } - - if diff := cmp.Diff(expected, actual); diff != "" { - t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) - } - }) - } -} diff --git a/pkg/engine/defaults/defaults.go b/pkg/engine/defaults/defaults.go index 14613ab174bb..da6a1517a7f3 100644 --- a/pkg/engine/defaults/defaults.go +++ b/pkg/engine/defaults/defaults.go @@ -476,8 +476,8 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/noticeable" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/notion" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/nozbeteams" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npmtoken" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npmtokenv2" + npm_token_new "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token/new" + npm_token_uuid "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token/uuid" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/nugetapikey" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/numverify" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/nutritionix" @@ -1317,8 +1317,8 @@ func buildDetectorList() []detectors.Detector { ¬iceable.Scanner{}, ¬ion.Scanner{}, &nozbeteams.Scanner{}, - &npmtoken.Scanner{}, - &npmtokenv2.Scanner{}, + &npm_token_new.Scanner{}, + &npm_token_uuid.Scanner{}, &nugetapikey.Scanner{}, &numverify.Scanner{}, &nutritionix.Scanner{}, diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 0ee4080f49c2..77898db3df81 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -1045,7 +1045,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { } defer common.Recover(ctx) - ctx = context.WithValue(ctx, "detector", data.detector.Key.Loggable()) + ctx = context.WithValues(ctx, "decoder", data.decoder.String(), "detector", data.detector.Key.Loggable()) isFalsePositive := detectors.GetFalsePositiveCheck(data.detector.Detector)