Skip to content

Commit

Permalink
refactor: fetch branches before shallow fetch to reduce the total com…
Browse files Browse the repository at this point in the history
…mits collected (#7760)
  • Loading branch information
klesh authored and action_bot committed Jul 29, 2024
1 parent e196ad3 commit 4afe516
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 11 deletions.
8 changes: 5 additions & 3 deletions backend/plugins/gitextractor/impl/impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,11 @@ func (p GitExtractor) PrepareTaskData(taskCtx plugin.TaskContext, options map[st

func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
if taskData, ok := taskCtx.GetData().(*parser.GitExtractorTaskData); ok {
if taskData.GitRepo != nil {
if err := taskData.GitRepo.Close(taskCtx.GetContext()); err != nil {
return errors.Convert(err)
if !taskCtx.GetConfigReader().GetBool("GIT_EXTRACTOR_KEEP_REPO") {
if taskData.GitRepo != nil {
if err := taskData.GitRepo.Close(taskCtx.GetContext()); err != nil {
return errors.Convert(err)
}
}
}
}
Expand Down
20 changes: 12 additions & 8 deletions backend/plugins/gitextractor/parser/clone_gitcli.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,11 @@ func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir s
// https://stackoverflow.com/questions/23708231/git-shallow-clone-clone-depth-misses-remote-branches

// 1. clone the repo with depth 1
if err := g.execGitCommand(ctx, "clone", taskData.Options.Url, localDir, "--depth=1", "--bare"); err != nil {
cloneArgs := append([]string{"clone", taskData.Options.Url, localDir, "--depth=1", "--bare"}, args...)
if err := g.execGitCommand(ctx, cloneArgs...); err != nil {
return err
}
// 2. set remote for all branches
// if err := g.execGitCommandIn(ctx, localDir, "remote", "set-branches", "origin", "'*'"); err != nil {
// return err
// } // someshow it fails siliently on my local machine, don't know why
// 2. configure to fetch all branches from the remote server so we can collect new commits from them
gitConfig, err := os.OpenFile(path.Join(localDir, "config"), os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return errors.Default.Wrap(err, "failed to open git config file")
Expand All @@ -139,14 +137,20 @@ func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir s
if err != nil {
return errors.Default.Wrap(err, "failed to write to git config file")
}
// 3. fetch all new commits from all branches since the given time
args = append([]string{"fetch", "--progress", fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
// 3. fetch all branches with depth=1 so the next step would collect less commits
// (I don't know why, but it reduced total number of commits from 18k to 7k on https://gitlab.com/gitlab-org/gitlab-foss.git with the same parameters)
fetchBranchesArgs := append([]string{"fetch", "--depth=1", "origin"}, args...)
if err := g.execGitCommandIn(ctx, localDir, fetchBranchesArgs...); err != nil {
return errors.Default.Wrap(err, "failed to fetch all branches from the remote server")
}
// 4. fetch all new commits from all branches since the given time
args = append([]string{"fetch", fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
if err := g.execGitCommandIn(ctx, localDir, args...); err != nil {
g.logger.Warn(err, "shallow fetch failed")
}
return nil
} else {
args = append([]string{"clone", taskData.Options.Url, localDir, "--progress", "--bare"}, args...)
args = append([]string{"clone", taskData.Options.Url, localDir, "--bare"}, args...)
return g.execGitCommand(ctx, args...)
}
}
Expand Down

0 comments on commit 4afe516

Please sign in to comment.