Skip to content

Commit

Permalink
Switch file_to_blob() and blob_to_file() to work without spawning Git…
Browse files Browse the repository at this point in the history
… subprocesses

Substituting `git cat-file` to Odb.Read() and `git hash-object -w` to
Odb.Write().

Timing for restoring only files from lab.nexedi.com backup:

before: ~95s
after:   ~8s

Timings for making backup in file part should have similar effect.
  • Loading branch information
navytux committed Jul 29, 2016
1 parent 87283e4 commit fbd72c0
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 16 deletions.
35 changes: 19 additions & 16 deletions git-backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
pathpkg "path"
"path/filepath"
Expand Down Expand Up @@ -108,9 +109,8 @@ func debugf(format string, a ...interface{}) {
// -------- git operations (like create/extract blob, commit tree ...) --------

// file -> blob_sha1, mode
func file_to_blob(path string) (Sha1, uint32) {
argv := []string{"hash-object", "-w", "--no-filters"}
stdin := ""
func file_to_blob(g *git.Repository, path string) (Sha1, uint32) {
var blob_content []byte

// because we want to pass mode to outside world (to e.g. `git update-index`)
// we need to get native OS mode, not translated one as os.Lstat() would give us.
Expand All @@ -121,32 +121,35 @@ func file_to_blob(path string) (Sha1, uint32) {
}

if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
// git hash-object does not handle symlinks
argv = append(argv, "--stdin")
stdin, err = os.Readlink(path)
__, err := os.Readlink(path)
blob_content = Bytes(__)
raiseif(err)
} else {
argv = append(argv, "--", path)
// stdin = "" already
blob_content, err = ioutil.ReadFile(path)
raiseif(err)
}

blob_sha1 := xgit2Sha1(argv, RunWith{stdin: stdin})
blob_sha1, err := WriteObject(g, blob_content, git.ObjectBlob)
raiseif(err)

return blob_sha1, st.Mode
}

// blob_sha1, mode -> file
func blob_to_file(blob_sha1 Sha1, mode uint32, path string) {
blob_content := xgit("cat-file", "blob", blob_sha1, RunWith{raw: true})
func blob_to_file(g *git.Repository, blob_sha1 Sha1, mode uint32, path string) {
blob, err := ReadObject(g, blob_sha1, git.ObjectBlob)
raiseif(err)
blob_content := blob.Data()

err := os.MkdirAll(pathpkg.Dir(path), 0777)
err = os.MkdirAll(pathpkg.Dir(path), 0777)
raiseif(err)

if mode&syscall.S_IFMT == syscall.S_IFLNK {
err = os.Symlink(blob_content, path)
err = os.Symlink(String(blob_content), path)
raiseif(err)
} else {
// NOTE mode is native - we cannot use ioutil.WriteFile() directly
err = writefile(path, Bytes(blob_content), mode)
err = writefile(path, blob_content, mode)
raiseif(err)
}
}
Expand Down Expand Up @@ -434,7 +437,7 @@ func cmd_pull_(gb *git.Repository, pullspecv []PullSpec) {
// files -> add directly to index to commit later
if !info.IsDir() {
infof("# file %s\t<- %s", prefix, path)
blob, mode := file_to_blob(path)
blob, mode := file_to_blob(gb, path)
xgit("update-index", "--add", "--cacheinfo",
fmt.Sprintf("%o,%s,%s", mode, blob, reprefix(dir, prefix, path)))
return nil
Expand Down Expand Up @@ -792,7 +795,7 @@ func cmd_restore_(gb *git.Repository, HEAD_ string, restorespecv []RestoreSpec)

filename = reprefix(prefix, dir, filename)
infof("# file %s\t-> %s", prefix, filename)
blob_to_file(sha1, mode, filename)
blob_to_file(gb, sha1, mode, filename)

// make sure git will recognize *.git as repo:
// - it should have refs/{heads,tags}/ and objects/pack/ inside.
Expand Down
59 changes: 59 additions & 0 deletions gitobjects.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,67 @@ package main
import (
"errors"
"fmt"

git "github.com/libgit2/git2go"
)

// read/write raw objects
func ReadObject(g *git.Repository, sha1 Sha1, objtype git.ObjectType) (*git.OdbObject, error) {
obj, err := ReadObject2(g, sha1)
if err != nil {
return nil, err
}
if objtype != obj.Type() {
return nil, &UnexpectedObjType{obj, objtype}
}
return obj, nil
}

func ReadObject2(g *git.Repository, sha1 Sha1) (*git.OdbObject, error) {
odb, err := g.Odb()
if err != nil {
return nil, &OdbNotReady{g, err}
}
obj, err := odb.Read(sha1.AsOid())
if err != nil {
return nil, err
}
return obj, nil
}

func WriteObject(g *git.Repository, content []byte, objtype git.ObjectType) (Sha1, error) {
odb, err := g.Odb()
if err != nil {
return Sha1{}, &OdbNotReady{g, err}
}
oid, err := odb.Write(content, objtype)
if err != nil {
// err is e.g. "Failed to create temporary file '.../objects/tmp_object_git2_G045iN': Permission denied"
return Sha1{}, err
}
return Sha1FromOid(oid), nil
}

type OdbNotReady struct {
g *git.Repository
err error
}

func (e *OdbNotReady) Error() string {
return fmt.Sprintf("git(%q): odb not ready: %s", e.g.Path(), e.err)
}

type UnexpectedObjType struct {
obj *git.OdbObject
wantType git.ObjectType
}

func (e *UnexpectedObjType) Error() string {
return fmt.Sprintf("%s: type is %s (expected %s)", e.obj.Id(), e.obj.Type(), e.wantType)
}



type Tag struct {
tagged_type string
tagged_sha1 Sha1
Expand Down

0 comments on commit fbd72c0

Please sign in to comment.