From 42fde012d8f0171aef3c3b569cb29b98e90547e7 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Fri, 5 Apr 2019 13:50:49 +0200 Subject: [PATCH] perf: improve the way we check if refs are not pointing to commits Fixes #775 Fixes #777 Before, each time we got a new reference, we were checking if it pointed to a commit. This incurred in a performance penalty and an increase of 2x in memory usage and 4x in allocations, as more objects were being read from the repository. Instead, now we use the already present resolveCommit to check it the moment we get the object, since we have to get it anyway. This way, we have the exact same behaviour without having any performance penalty or memory usage increase. Signed-off-by: Miguel Molina --- commits.go | 50 ++++++++++++++++++++++++--------------------- integration_test.go | 2 +- ref_commits.go | 17 ++------------- references.go | 25 +++-------------------- squash_iterator.go | 37 +++++---------------------------- 5 files changed, 38 insertions(+), 93 deletions(-) diff --git a/commits.go b/commits.go index 2ff12caac..d3d4cd21a 100644 --- a/commits.go +++ b/commits.go @@ -229,46 +229,50 @@ func (i *commitIter) loadNextRef() (err error) { return err } - ignored, err := isIgnoredReference(i.repo.Repository, i.ref) - if err != nil { - if i.skipGitErrors { - continue - } - - return err - } - - if ignored { + if isIgnoredReference(i.ref) { continue } - i.queue = append(i.queue, i.ref.Hash()) - return nil } } func (i *commitIter) Next() (*object.Commit, error) { for { + var commit *object.Commit + var err error + if i.ref == nil { if err := i.loadNextRef(); err != nil { return nil, err } - } - if len(i.queue) == 0 { - i.ref = nil - continue - } + if _, ok := i.seen[i.ref.Hash()]; ok { + continue + } + i.seen[i.ref.Hash()] = struct{}{} - hash := i.queue[0] - i.queue = i.queue[1:] - if _, ok := i.seen[hash]; ok { - continue + commit, err = resolveCommit(i.repo, i.ref.Hash()) + if errInvalidCommit.Is(err) { + i.ref = nil + continue + } + } else { + if len(i.queue) == 0 { + i.ref = nil + continue + } + + hash := i.queue[0] + i.queue = i.queue[1:] + if _, ok := i.seen[hash]; ok { + continue + } + i.seen[hash] = struct{}{} + + commit, err = i.repo.CommitObject(hash) } - i.seen[hash] = struct{}{} - commit, err := i.repo.CommitObject(hash) if err != nil { if i.skipGitErrors { continue diff --git a/integration_test.go b/integration_test.go index 2454f3629..7313d3bdb 100644 --- a/integration_test.go +++ b/integration_test.go @@ -594,7 +594,7 @@ func TestMissingHeadRefs(t *testing.T) { rows, err := sql.RowIterToRows(iter) require.NoError(err) - require.Len(rows, 54) + require.Len(rows, 56) } func BenchmarkQueries(b *testing.B) { diff --git a/ref_commits.go b/ref_commits.go index 0c329dde2..d6a0f72b7 100644 --- a/ref_commits.go +++ b/ref_commits.go @@ -305,32 +305,19 @@ func (i *refCommitsRowIter) next() (sql.Row, error) { i.repo.Close() return nil, err } - - ignored, err := isIgnoredReference(i.repo.Repository, ref) - if err != nil { - if i.skipGitErrors { - continue - } - - return nil, err - } - - if ignored { - continue - } } else { ref = plumbing.NewHashReference(plumbing.ReferenceName("HEAD"), i.head.Hash()) i.head = nil } i.ref = ref - if !i.shouldVisitRef(ref) { + if !i.shouldVisitRef(ref) || isIgnoredReference(ref) { continue } commit, err := resolveCommit(i.repo, ref.Hash()) if err != nil { - if i.skipGitErrors { + if errInvalidCommit.Is(err) || i.skipGitErrors { continue } diff --git a/references.go b/references.go index 506c30514..ae278ee17 100644 --- a/references.go +++ b/references.go @@ -6,7 +6,6 @@ import ( "io" "strings" - git "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-mysql-server.v0/sql" "gopkg.in/src-d/go-git.v4/plumbing" @@ -300,16 +299,7 @@ func (i *refRowIter) next() (sql.Row, error) { return nil, err } - ignored, err := isIgnoredReference(i.repo.Repository, o) - if err != nil { - if i.skipGitErrors { - continue - } - - return nil, err - } - - if ignored { + if isIgnoredReference(o) { continue } @@ -351,15 +341,6 @@ func referenceToRow(repositoryID string, c *plumbing.Reference) sql.Row { ) } -func isIgnoredReference(repo *git.Repository, ref *plumbing.Reference) (bool, error) { - if ref.Type() != plumbing.HashReference { - return true, nil - } - - obj, err := repo.Object(plumbing.AnyObject, ref.Hash()) - if err != nil { - return false, err - } - - return obj.Type() != plumbing.CommitObject, nil +func isIgnoredReference(r *plumbing.Reference) bool { + return r.Type() != plumbing.HashReference } diff --git a/squash_iterator.go b/squash_iterator.go index b9acd2eba..7b6a4ba1e 100644 --- a/squash_iterator.go +++ b/squash_iterator.go @@ -489,16 +489,7 @@ func (i *squashRefIter) Advance() error { } } - ignored, err := isIgnoredReference(i.repo.Repository, ref) - if err != nil { - if i.skipGitErrors { - continue - } - - return err - } - - if ignored { + if isIgnoredReference(ref) { continue } @@ -743,16 +734,7 @@ func (i *squashRepoRefsIter) Advance() error { } } - ignored, err := isIgnoredReference(i.repos.Repository().Repository, ref) - if err != nil { - if i.skipGitErrors { - continue - } - - return err - } - - if ignored { + if isIgnoredReference(ref) { continue } @@ -890,16 +872,7 @@ func (i *squashRemoteRefsIter) Advance() error { } } - ignored, err := isIgnoredReference(i.Repository().Repository, ref) - if err != nil { - if i.skipGitErrors { - continue - } - - return err - } - - if ignored { + if isIgnoredReference(ref) { continue } @@ -1010,7 +983,7 @@ func (i *squashRefRefCommitsIter) Advance() error { "error": err, }).Error("unable to get commit") - if i.skipGitErrors { + if errInvalidCommit.Is(err) || i.skipGitErrors { continue } @@ -1116,7 +1089,7 @@ func (i *squashRefHeadRefCommitsIter) Advance() error { "error": err, }).Error("unable to get commit") - if i.skipGitErrors { + if errInvalidCommit.Is(err) || i.skipGitErrors { continue }