From 6e5692427de525b3063a7ae6699b5f8cfed45447 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 09:44:18 -0500 Subject: [PATCH 01/17] nit --- pkg/sources/source_manager.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/sources/source_manager.go b/pkg/sources/source_manager.go index 43784141f4a8..2e03be1d8127 100644 --- a/pkg/sources/source_manager.go +++ b/pkg/sources/source_manager.go @@ -406,8 +406,7 @@ func (s *SourceManager) enumerate(ctx context.Context, source Source, report *Jo // Check if source units are supported and configured. canUseSourceUnits := s.useSourceUnitsFunc != nil if enumChunker, ok := source.(SourceUnitEnumerator); ok && canUseSourceUnits && s.useSourceUnitsFunc() { - ctx.Logger().Info("running source", - "with_units", true) + ctx.Logger().Info("running source", "with_units", true) return s.enumerateWithUnits(ctx, enumChunker, report, reporter) } return fmt.Errorf("Enumeration not supported or configured for source: %s", source.Type().String()) From e233332fd466a881013655187329b7db2341fa30 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:02:45 -0500 Subject: [PATCH 02/17] update rate limit handler to use reporter --- pkg/sources/github/github.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 4dce437fba47..67babc8764b7 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -739,13 +739,14 @@ var ( rateLimitResumeTime time.Time ) -// handleRateLimit returns true if a rate limit was handled +// handleRateLimit handles GitHub API rate limiting with an optional reporter for unit errors. +// Returns true if a rate limit was handled. // // Unauthenticated users have a rate limit of 60 requests per hour. // Authenticated users have a rate limit of 5,000 requests per hour, // however, certain actions are subject to a stricter "secondary" limit. // https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api -func (s *Source) handleRateLimit(ctx context.Context, errIn error) bool { +func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...sources.UnitReporter) bool { if errIn == nil { return false } @@ -757,7 +758,6 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error) bool { var retryAfter time.Duration if resumeTime.IsZero() || time.Now().After(resumeTime) { rateLimitMu.Lock() - var ( now = time.Now() @@ -785,6 +785,12 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error) bool { retryAfter = retryAfter + jitter rateLimitResumeTime = now.Add(retryAfter) ctx.Logger().Info(fmt.Sprintf("exceeded %s rate limit", limitType), "retry_after", retryAfter.String(), "resume_time", rateLimitResumeTime.Format(time.RFC3339)) + // Only report the error if a reporter was provided + if len(reporter) > 0 { + if err := reporter[0].UnitErr(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { + ctx.Logger().Error(err, "failed to report rate limit error") + } + } } else { retryAfter = (5 * time.Minute) + jitter rateLimitResumeTime = now.Add(retryAfter) @@ -803,6 +809,11 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error) bool { return true } +// handleRateLimitWithUnitReporter is a wrapper around handleRateLimit that includes unit reporting +func (s *Source) handleRateLimitWithUnitReporter(ctx context.Context, reporter sources.UnitReporter, errIn error) bool { + return s.handleRateLimit(ctx, errIn, reporter) +} + func (s *Source) addReposForMembers(ctx context.Context, reporter sources.UnitReporter) { ctx.Logger().Info("Fetching repos from members", "members", len(s.memberCache)) for member := range s.memberCache { From a139a71bcaccdbdb95a69985390f51d10c496bcd Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:04:41 -0500 Subject: [PATCH 03/17] update process repos to use rate limit handler with unit reporter --- pkg/sources/github/repo.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index c5e813578e7c..2f40ee904185 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -181,6 +181,7 @@ func isGitHub404Error(err error) bool { return ghErr.Response.StatusCode == http.StatusNotFound } +// processRepos is the main function for getting repositories from a source. func (s *Source) processRepos(ctx context.Context, target string, reporter sources.UnitReporter, listRepos repoLister, listOpts repoListOptions) error { logger := ctx.Logger().WithValues("target", target) opts := listOpts.getListOptions() @@ -192,7 +193,7 @@ func (s *Source) processRepos(ctx context.Context, target string, reporter sourc for { someRepos, res, err := listRepos(ctx, target, listOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithUnitReporter(ctx, reporter, err) { continue } if err != nil { From bcf1840fcb105a55642608ecc36f176551e53798 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:05:19 -0500 Subject: [PATCH 04/17] update getReposByOrgOrUser to report err --- pkg/sources/github/repo.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index 2f40ee904185..660dfd95551e 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -154,6 +154,9 @@ func (s *Source) getReposByOrgOrUser(ctx context.Context, name string, reporter if err == nil { return organization, nil } else if !isGitHub404Error(err) { + if err := reporter.UnitErr(ctx, fmt.Errorf("error getting repos by org: %w", err)); err != nil { + return unknown, err + } return unknown, err } From 95191c8c504e8e46888d8cdde00e7b08712a4a64 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:08:18 -0500 Subject: [PATCH 05/17] update dedupreporter to report err --- pkg/sources/github/github.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 67babc8764b7..df91845b2d2a 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -420,6 +420,9 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e repo, err := s.ensureRepoInfoCache(ctx, repo) if err != nil { ctx.Logger().Error(err, "error caching repo info") + if err := dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)); err != nil { + ctx.Logger().Error(err, "failed to report unit error") + } } s.repos = append(s.repos, repo) } From edf3ac7a81d4da3bd4837524eb022740c0d546a7 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:28:24 -0500 Subject: [PATCH 06/17] add errReporter interface to handle both types of reporters --- pkg/sources/github/github.go | 54 +++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index df91845b2d2a..80f93d8f316c 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -381,7 +381,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e // See: https://github.com/trufflesecurity/trufflehog/pull/2379#discussion_r1487454788 for _, name := range s.filteredRepoCache.Keys() { url, _ := s.filteredRepoCache.Get(name) - url, err := s.ensureRepoInfoCache(ctx, url) + url, err := s.ensureRepoInfoCache(ctx, url, reporter) if err != nil { if err := dedupeReporter.UnitErr(ctx, err); err != nil { return err @@ -417,7 +417,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e for _, repo := range s.filteredRepoCache.Values() { ctx := context.WithValue(ctx, "repo", repo) - repo, err := s.ensureRepoInfoCache(ctx, repo) + repo, err := s.ensureRepoInfoCache(ctx, repo, reporter) if err != nil { ctx.Logger().Error(err, "error caching repo info") if err := dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)); err != nil { @@ -437,7 +437,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e // provided repository URL. If not, it fetches and stores the metadata for the // repository. In some cases, the gist URL needs to be normalized, which is // returned by this function. -func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string) (string, error) { +func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string, reporter errorReporter) (string, error) { if _, ok := s.repoInfoCache.get(repo); ok { return repo, nil } @@ -453,15 +453,15 @@ func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string) (string, for { gistID := extractGistID(urlParts) gist, _, err := s.connector.APIClient().Gists.Get(ctx, gistID) - // Normalize the URL to the Gist's pull URL. - // See https://github.com/trufflesecurity/trufflehog/pull/2625#issuecomment-2025507937 - repo = gist.GetGitPullURL() - if s.handleRateLimit(ctx, err) { + if s.handleRateLimit(ctx, err, reporter) { continue } if err != nil { return repo, fmt.Errorf("failed to fetch gist") } + // Normalize the URL to the Gist's pull URL. + // See https://github.com/trufflesecurity/trufflehog/pull/2625#issuecomment-2025507937 + repo = gist.GetGitPullURL() s.cacheGistInfo(gist) break } @@ -469,7 +469,7 @@ func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string) (string, // Cache repository info. for { ghRepo, _, err := s.connector.APIClient().Repositories.Get(ctx, urlParts[1], urlParts[2]) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimit(ctx, err, reporter) { continue } if err != nil { @@ -529,7 +529,7 @@ func (s *Source) enumerateWithToken(ctx context.Context, isGithubEnterprise bool var err error for { ghUser, _, err = s.connector.APIClient().Users.Get(ctx, "") - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithUnitReporter(ctx, reporter, err) { continue } if err != nil { @@ -742,14 +742,37 @@ var ( rateLimitResumeTime time.Time ) -// handleRateLimit handles GitHub API rate limiting with an optional reporter for unit errors. +// errorReporter is an interface that captures just the error reporting functionality +type errorReporter interface { + Err(ctx context.Context, err error) error +} + +// wrapper to adapt UnitReporter to errorReporter +type unitErrorReporter struct { + reporter sources.UnitReporter +} + +func (u unitErrorReporter) Err(ctx context.Context, err error) error { + return u.reporter.UnitErr(ctx, err) +} + +// wrapper to adapt ChunkReporter to errorReporter +type chunkErrorReporter struct { + reporter sources.ChunkReporter +} + +func (c chunkErrorReporter) Err(ctx context.Context, err error) error { + return c.reporter.ChunkErr(ctx, err) +} + +// handleRateLimit handles GitHub API rate limiting with an optional error reporter. // Returns true if a rate limit was handled. // // Unauthenticated users have a rate limit of 60 requests per hour. // Authenticated users have a rate limit of 5,000 requests per hour, // however, certain actions are subject to a stricter "secondary" limit. // https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api -func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...sources.UnitReporter) bool { +func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...errorReporter) bool { if errIn == nil { return false } @@ -790,7 +813,7 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...s ctx.Logger().Info(fmt.Sprintf("exceeded %s rate limit", limitType), "retry_after", retryAfter.String(), "resume_time", rateLimitResumeTime.Format(time.RFC3339)) // Only report the error if a reporter was provided if len(reporter) > 0 { - if err := reporter[0].UnitErr(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { + if err := reporter[0].Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { ctx.Logger().Error(err, "failed to report rate limit error") } } @@ -814,7 +837,7 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...s // handleRateLimitWithUnitReporter is a wrapper around handleRateLimit that includes unit reporting func (s *Source) handleRateLimitWithUnitReporter(ctx context.Context, reporter sources.UnitReporter, errIn error) bool { - return s.handleRateLimit(ctx, errIn, reporter) + return s.handleRateLimit(ctx, errIn, &unitErrorReporter{reporter: reporter}) } func (s *Source) addReposForMembers(ctx context.Context, reporter sources.UnitReporter) { @@ -837,7 +860,7 @@ func (s *Source) addUserGistsToCache(ctx context.Context, user string, reporter for { gists, res, err := s.connector.APIClient().Gists.List(ctx, user, gistOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithUnitReporter(ctx, reporter, err) { continue } if err != nil { @@ -1201,7 +1224,6 @@ func (s *Source) processRepoComments(ctx context.Context, repoInfo repoInfo, rep } return nil - } func (s *Source) processIssues(ctx context.Context, repoInfo repoInfo, reporter sources.ChunkReporter) error { @@ -1542,7 +1564,7 @@ func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporte ctx = context.WithValue(ctx, "repo", repoURL) // ChunkUnit is not guaranteed to be called from Enumerate, so we must // check and fetch the repoInfoCache for this repo. - repoURL, err := s.ensureRepoInfoCache(ctx, repoURL) + repoURL, err := s.ensureRepoInfoCache(ctx, repoURL, &chunkErrorReporter{reporter: reporter}) if err != nil { return err } From a21d0a37efa84af266006fc0c84bd02f679df4b2 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:48:02 -0500 Subject: [PATCH 07/17] convert to error reporter types --- pkg/sources/github/github.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 80f93d8f316c..703a7f2d6e0b 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -381,7 +381,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e // See: https://github.com/trufflesecurity/trufflehog/pull/2379#discussion_r1487454788 for _, name := range s.filteredRepoCache.Keys() { url, _ := s.filteredRepoCache.Get(name) - url, err := s.ensureRepoInfoCache(ctx, url, reporter) + url, err := s.ensureRepoInfoCache(ctx, url, &unitErrorReporter{reporter}) if err != nil { if err := dedupeReporter.UnitErr(ctx, err); err != nil { return err @@ -417,7 +417,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e for _, repo := range s.filteredRepoCache.Values() { ctx := context.WithValue(ctx, "repo", repo) - repo, err := s.ensureRepoInfoCache(ctx, repo, reporter) + repo, err := s.ensureRepoInfoCache(ctx, repo, &unitErrorReporter{reporter}) if err != nil { ctx.Logger().Error(err, "error caching repo info") if err := dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)); err != nil { From 3a55b35ecc1dfbe1d340231db3cbe24f8c5112d4 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 10:48:29 -0500 Subject: [PATCH 08/17] update handleRateLimit signature --- pkg/sources/github/connector_token.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sources/github/connector_token.go b/pkg/sources/github/connector_token.go index 88ef8f8f1ec0..469f50b98c2f 100644 --- a/pkg/sources/github/connector_token.go +++ b/pkg/sources/github/connector_token.go @@ -17,14 +17,14 @@ type tokenConnector struct { apiClient *github.Client token string isGitHubEnterprise bool - handleRateLimit func(context.Context, error) bool + handleRateLimit func(context.Context, error, ...errorReporter) bool user string userMu sync.Mutex } var _ connector = (*tokenConnector)(nil) -func newTokenConnector(apiEndpoint string, token string, handleRateLimit func(context.Context, error) bool) (*tokenConnector, error) { +func newTokenConnector(apiEndpoint string, token string, handleRateLimit func(context.Context, error, ...errorReporter) bool) (*tokenConnector, error) { const httpTimeoutSeconds = 60 httpClient := common.RetryableHTTPClientTimeout(int64(httpTimeoutSeconds)) tokenSource := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token}) From e37fa8768ae05685c7b4e17250d0bc9d47c17607 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 11:41:24 -0500 Subject: [PATCH 09/17] use reporters for all rate limit handlers in github --- pkg/sources/github/github.go | 41 ++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 703a7f2d6e0b..d3aca1ccbd90 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -494,7 +494,7 @@ func (s *Source) enumerateBasicAuth(ctx context.Context, reporter sources.UnitRe // TODO: This modifies s.memberCache but it doesn't look like // we do anything with it. if userType == organization && s.conn.ScanUsers { - if err := s.addMembersByOrg(ctx, org); err != nil { + if err := s.addMembersByOrg(ctx, org, reporter); err != nil { orgCtx.Logger().Error(err, "Unable to add members by org") } } @@ -549,11 +549,11 @@ func (s *Source) enumerateWithToken(ctx context.Context, isGithubEnterprise bool } if isGithubEnterprise { - s.addAllVisibleOrgs(ctx) + s.addAllVisibleOrgs(ctx, reporter) } else { // Scan for orgs is default with a token. // GitHub App enumerates the repos that were assigned to it in GitHub App settings. - s.addOrgsByUser(ctx, ghUser.GetLogin()) + s.addOrgsByUser(ctx, ghUser.GetLogin(), reporter) } } @@ -567,7 +567,7 @@ func (s *Source) enumerateWithToken(ctx context.Context, isGithubEnterprise bool } if userType == organization && s.conn.ScanUsers { - if err := s.addMembersByOrg(ctx, org); err != nil { + if err := s.addMembersByOrg(ctx, org, reporter); err != nil { orgCtx.Logger().Error(err, "Unable to add members for org") } } @@ -591,7 +591,7 @@ func (s *Source) enumerateWithApp(ctx context.Context, installationClient *githu // Check if we need to find user repos. if s.conn.ScanUsers { - err := s.addMembersByApp(ctx, installationClient) + err := s.addMembersByApp(ctx, installationClient, reporter) if err != nil { return err } @@ -840,6 +840,11 @@ func (s *Source) handleRateLimitWithUnitReporter(ctx context.Context, reporter s return s.handleRateLimit(ctx, errIn, &unitErrorReporter{reporter: reporter}) } +// handleRateLimitWithChunkReporter is a wrapper around handleRateLimit that includes chunk reporting +func (s *Source) handleRateLimitWithChunkReporter(ctx context.Context, reporter sources.ChunkReporter, errIn error) bool { + return s.handleRateLimit(ctx, errIn, &chunkErrorReporter{reporter: reporter}) +} + func (s *Source) addReposForMembers(ctx context.Context, reporter sources.UnitReporter) { ctx.Logger().Info("Fetching repos from members", "members", len(s.memberCache)) for member := range s.memberCache { @@ -884,7 +889,7 @@ func (s *Source) addUserGistsToCache(ctx context.Context, user string, reporter return nil } -func (s *Source) addMembersByApp(ctx context.Context, installationClient *github.Client) error { +func (s *Source) addMembersByApp(ctx context.Context, installationClient *github.Client, reporter sources.UnitReporter) error { opts := &github.ListOptions{ PerPage: membersAppPagination, } @@ -899,7 +904,7 @@ func (s *Source) addMembersByApp(ctx context.Context, installationClient *github if org.Account.GetType() != "Organization" { continue } - if err := s.addMembersByOrg(ctx, *org.Account.Login); err != nil { + if err := s.addMembersByOrg(ctx, *org.Account.Login, reporter); err != nil { return err } } @@ -907,7 +912,7 @@ func (s *Source) addMembersByApp(ctx context.Context, installationClient *github return nil } -func (s *Source) addAllVisibleOrgs(ctx context.Context) { +func (s *Source) addAllVisibleOrgs(ctx context.Context, reporter sources.UnitReporter) { ctx.Logger().V(2).Info("enumerating all visible organizations on GHE") // Enumeration on this endpoint does not use pages it uses a since ID. // The endpoint will return organizations with an ID greater than the given since ID. @@ -920,7 +925,7 @@ func (s *Source) addAllVisibleOrgs(ctx context.Context) { } for { orgs, _, err := s.connector.APIClient().Organizations.ListAll(ctx, orgOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithUnitReporter(ctx, reporter, err) { continue } if err != nil { @@ -952,14 +957,14 @@ func (s *Source) addAllVisibleOrgs(ctx context.Context) { } } -func (s *Source) addOrgsByUser(ctx context.Context, user string) { +func (s *Source) addOrgsByUser(ctx context.Context, user string, reporter sources.UnitReporter) { orgOpts := &github.ListOptions{ PerPage: defaultPagination, } logger := ctx.Logger().WithValues("user", user) for { orgs, resp, err := s.connector.APIClient().Organizations.List(ctx, "", orgOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithUnitReporter(ctx, reporter, err) { continue } if err != nil { @@ -981,7 +986,7 @@ func (s *Source) addOrgsByUser(ctx context.Context, user string) { } } -func (s *Source) addMembersByOrg(ctx context.Context, org string) error { +func (s *Source) addMembersByOrg(ctx context.Context, org string, reporter sources.UnitReporter) error { opts := &github.ListMembersOptions{ PublicOnly: false, ListOptions: github.ListOptions{ @@ -992,7 +997,7 @@ func (s *Source) addMembersByOrg(ctx context.Context, org string) error { logger := ctx.Logger().WithValues("org", org) for { members, res, err := s.connector.APIClient().Organizations.ListMembers(ctx, org, opts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithUnitReporter(ctx, reporter, err) { continue } if err != nil { @@ -1124,7 +1129,7 @@ func (s *Source) processGistComments(ctx context.Context, gistURL string, urlPar } for { comments, _, err := s.connector.APIClient().Gists.ListComments(ctx, gistID, options) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithChunkReporter(ctx, reporter, err) { continue } if err != nil { @@ -1239,7 +1244,7 @@ func (s *Source) processIssues(ctx context.Context, repoInfo repoInfo, reporter for { issues, _, err := s.connector.APIClient().Issues.ListByRepo(ctx, repoInfo.owner, repoInfo.name, bodyTextsOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithChunkReporter(ctx, reporter, err) { continue } @@ -1308,7 +1313,7 @@ func (s *Source) processIssueComments(ctx context.Context, repoInfo repoInfo, re for { issueComments, _, err := s.connector.APIClient().Issues.ListComments(ctx, repoInfo.owner, repoInfo.name, allComments, issueOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithChunkReporter(ctx, reporter, err) { continue } if err != nil { @@ -1376,7 +1381,7 @@ func (s *Source) processPRs(ctx context.Context, repoInfo repoInfo, reporter sou for { prs, _, err := s.connector.APIClient().PullRequests.List(ctx, repoInfo.owner, repoInfo.name, prOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithChunkReporter(ctx, reporter, err) { continue } if err != nil { @@ -1408,7 +1413,7 @@ func (s *Source) processPRComments(ctx context.Context, repoInfo repoInfo, repor for { prComments, _, err := s.connector.APIClient().PullRequests.ListComments(ctx, repoInfo.owner, repoInfo.name, allComments, prOpts) - if s.handleRateLimit(ctx, err) { + if s.handleRateLimitWithChunkReporter(ctx, reporter, err) { continue } if err != nil { From 7e77026c2f5c6792d7c160a0e2351d4071d6bcd2 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 10:51:29 -0500 Subject: [PATCH 10/17] get repo url before err check --- pkg/sources/github/github.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index d3aca1ccbd90..8b8c1d1d5427 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -453,15 +453,18 @@ func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string, reporter for { gistID := extractGistID(urlParts) gist, _, err := s.connector.APIClient().Gists.Get(ctx, gistID) + // Normalize the URL to the Gist's pull URL. + // See https://github.com/trufflesecurity/trufflehog/pull/2625#issuecomment-2025507937 + repo = gist.GetGitPullURL() + if s.handleRateLimit(ctx, err, reporter) { continue } + if err != nil { return repo, fmt.Errorf("failed to fetch gist") } - // Normalize the URL to the Gist's pull URL. - // See https://github.com/trufflesecurity/trufflehog/pull/2625#issuecomment-2025507937 - repo = gist.GetGitPullURL() + s.cacheGistInfo(gist) break } From 71cdefa07b57b27b5f0babbeb0a05fe38a66c5a1 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 10:53:46 -0500 Subject: [PATCH 11/17] use iterator --- pkg/sources/github/github.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 8b8c1d1d5427..0a10454d1cf2 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -815,8 +815,8 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...e rateLimitResumeTime = now.Add(retryAfter) ctx.Logger().Info(fmt.Sprintf("exceeded %s rate limit", limitType), "retry_after", retryAfter.String(), "resume_time", rateLimitResumeTime.Format(time.RFC3339)) // Only report the error if a reporter was provided - if len(reporter) > 0 { - if err := reporter[0].Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { + for _, reporter := range reporter { + if err := reporter.Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { ctx.Logger().Error(err, "failed to report rate limit error") } } From 034fc53a01d915d2d08ea505dab832d70fd87ed7 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 11:01:12 -0500 Subject: [PATCH 12/17] remove err log --- pkg/sources/github/github.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 0a10454d1cf2..46389d187ba1 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -420,9 +420,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e repo, err := s.ensureRepoInfoCache(ctx, repo, &unitErrorReporter{reporter}) if err != nil { ctx.Logger().Error(err, "error caching repo info") - if err := dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)); err != nil { - ctx.Logger().Error(err, "failed to report unit error") - } + dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)) } s.repos = append(s.repos, repo) } From a3f64f0b773379cef871ff418416d85c340080fc Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Fri, 31 Jan 2025 09:44:18 -0500 Subject: [PATCH 13/17] nit --- pkg/sources/source_manager.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/sources/source_manager.go b/pkg/sources/source_manager.go index 43784141f4a8..2e03be1d8127 100644 --- a/pkg/sources/source_manager.go +++ b/pkg/sources/source_manager.go @@ -406,8 +406,7 @@ func (s *SourceManager) enumerate(ctx context.Context, source Source, report *Jo // Check if source units are supported and configured. canUseSourceUnits := s.useSourceUnitsFunc != nil if enumChunker, ok := source.(SourceUnitEnumerator); ok && canUseSourceUnits && s.useSourceUnitsFunc() { - ctx.Logger().Info("running source", - "with_units", true) + ctx.Logger().Info("running source", "with_units", true) return s.enumerateWithUnits(ctx, enumChunker, report, reporter) } return fmt.Errorf("Enumeration not supported or configured for source: %s", source.Type().String()) From d8bcd2d92bc4355dd0ed40752759c811e09859fb Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 13:38:38 -0500 Subject: [PATCH 14/17] pluralize --- pkg/sources/github/github.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 46389d187ba1..50f574ae7193 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -773,7 +773,7 @@ func (c chunkErrorReporter) Err(ctx context.Context, err error) error { // Authenticated users have a rate limit of 5,000 requests per hour, // however, certain actions are subject to a stricter "secondary" limit. // https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api -func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...errorReporter) bool { +func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporters ...errorReporter) bool { if errIn == nil { return false } @@ -813,7 +813,7 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporter ...e rateLimitResumeTime = now.Add(retryAfter) ctx.Logger().Info(fmt.Sprintf("exceeded %s rate limit", limitType), "retry_after", retryAfter.String(), "resume_time", rateLimitResumeTime.Format(time.RFC3339)) // Only report the error if a reporter was provided - for _, reporter := range reporter { + for _, reporter := range reporters { if err := reporter.Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { ctx.Logger().Error(err, "failed to report rate limit error") } From 01e2f3e637d5e82705f6301afd5247dd0a074305 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 16:04:03 -0500 Subject: [PATCH 15/17] remove err log --- pkg/sources/github/github.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 50f574ae7193..e0e51969ce7f 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -814,9 +814,7 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporters ... ctx.Logger().Info(fmt.Sprintf("exceeded %s rate limit", limitType), "retry_after", retryAfter.String(), "resume_time", rateLimitResumeTime.Format(time.RFC3339)) // Only report the error if a reporter was provided for _, reporter := range reporters { - if err := reporter.Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)); err != nil { - ctx.Logger().Error(err, "failed to report rate limit error") - } + reporter.Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)) } } else { retryAfter = (5 * time.Minute) + jitter From 56a30aadcc0d9a66dae145db0afc48a385b51bde Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 16:08:24 -0500 Subject: [PATCH 16/17] update tests --- pkg/sources/github/github_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/sources/github/github_test.go b/pkg/sources/github/github_test.go index 3516017dbe08..01401f7d0945 100644 --- a/pkg/sources/github/github_test.go +++ b/pkg/sources/github/github_test.go @@ -197,7 +197,7 @@ func TestAddMembersByOrg(t *testing.T) { }) s := initTestSource(&sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}}) - err := s.addMembersByOrg(context.Background(), "org1") + err := s.addMembersByOrg(context.Background(), "org1", noopReporter()) assert.Nil(t, err) assert.Equal(t, 2, len(s.memberCache)) _, ok := s.memberCache["testman1"] @@ -221,7 +221,7 @@ func TestAddMembersByOrg_AuthFailure(t *testing.T) { }}) s := initTestSource(&sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}}) - err := s.addMembersByOrg(context.Background(), "org1") + err := s.addMembersByOrg(context.Background(), "org1", noopReporter()) assert.True(t, strings.HasPrefix(err.Error(), "could not list organization")) assert.False(t, gock.HasUnmatchedRequest()) assert.True(t, gock.IsDone()) @@ -236,7 +236,7 @@ func TestAddMembersByOrg_NoMembers(t *testing.T) { JSON([]map[string]string{}) s := initTestSource(&sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}}) - err := s.addMembersByOrg(context.Background(), "org1") + err := s.addMembersByOrg(context.Background(), "org1", noopReporter()) assert.Equal(t, fmt.Sprintf("organization (%q) had 0 members: account may not have access to list organization members", "org1"), err.Error()) assert.False(t, gock.HasUnmatchedRequest()) @@ -276,7 +276,7 @@ func TestAddMembersByApp(t *testing.T) { AppId: "4141", }, }}) - err := s.addMembersByApp(context.Background(), s.connector.(*appConnector).InstallationClient()) + err := s.addMembersByApp(context.Background(), s.connector.(*appConnector).InstallationClient(), noopReporter()) assert.Nil(t, err) assert.Equal(t, 3, len(s.memberCache)) _, ok := s.memberCache["ssm1"] @@ -327,7 +327,7 @@ func TestAddOrgsByUser(t *testing.T) { }) s := initTestSource(&sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}}) - s.addOrgsByUser(context.Background(), "super-secret-user") + s.addOrgsByUser(context.Background(), "super-secret-user", noopReporter()) assert.Equal(t, 1, s.orgsCache.Count()) ok := s.orgsCache.Exists("sso2") assert.True(t, ok) From 8bd764da21277af254c751fa09f22aaeb14a6f68 Mon Sep 17 00:00:00 2001 From: ahmed <ahmed.zahran@trufflesec.com> Date: Mon, 3 Feb 2025 16:56:27 -0500 Subject: [PATCH 17/17] make linter happy --- pkg/sources/github/github.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index e0e51969ce7f..b0b77f3f6ee4 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -420,7 +420,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e repo, err := s.ensureRepoInfoCache(ctx, repo, &unitErrorReporter{reporter}) if err != nil { ctx.Logger().Error(err, "error caching repo info") - dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)) + _ = dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err)) } s.repos = append(s.repos, repo) } @@ -814,7 +814,7 @@ func (s *Source) handleRateLimit(ctx context.Context, errIn error, reporters ... ctx.Logger().Info(fmt.Sprintf("exceeded %s rate limit", limitType), "retry_after", retryAfter.String(), "resume_time", rateLimitResumeTime.Format(time.RFC3339)) // Only report the error if a reporter was provided for _, reporter := range reporters { - reporter.Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)) + _ = reporter.Err(ctx, fmt.Errorf("exceeded %s rate limit", limitType)) } } else { retryAfter = (5 * time.Minute) + jitter