option to skip hashing

Signed-off-by: Avi Deitcher <avi@deitcher.net>
oras-project · Oct 29, 2020 · 2ba70c0 · 2ba70c0
1 parent 4cae1db
commit 2ba70c0
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 12 deletions.
diff --git a/pkg/content/gunzip.go b/pkg/content/gunzip.go
@@ -9,11 +9,16 @@ import (
 )
 
 // NewGunzipWriter wrap a writer with a gunzip, so that the stream is gunzipped
-func NewGunzipWriter(writer content.Writer, blocksize int) content.Writer {
+//
+// By default, it calculates the hash when writing. If the option `skipHash` is true,
+// it will skip doing the hash. Skipping the hash is intended to be used only
+// if you are confident about the validity of the data being passed to the writer,
+// and wish to save on the hashing time.
+func NewGunzipWriter(writer content.Writer, skipHash bool, blocksize int) content.Writer {
 	if blocksize == 0 {
 		blocksize = DefaultBlocksize
 	}
-	return NewPassthroughWriter(writer, func(r io.Reader, w io.Writer, done chan<- error) {
+	return NewPassthroughWriter(writer, skipHash, func(r io.Reader, w io.Writer, done chan<- error) {
 		gr, err := gzip.NewReader(r)
 		if err != nil {
 			done <- fmt.Errorf("error creating gzip reader: %v", err)

diff --git a/pkg/content/iowriter.go b/pkg/content/iowriter.go
@@ -15,12 +15,18 @@ type IoContentWriter struct {
 	writer   io.Writer
 	digester digest.Digester
 	size     int64
+	skipHash bool
 }
 
 // NewIoContentWriter create a new IoContentWriter. blocksize is the size of the block to copy,
 // in bytes, between the parent and child. The default, when 0, is to simply use
-// whatever golang defaults to with io.Copy
-func NewIoContentWriter(writer io.Writer, blocksize int) content.Writer {
+// whatever golang defaults to with io.Copy.
+//
+// By default, it calculates the hash when writing. If the option `skipHash` is true,
+// it will skip doing the hash. Skipping the hash is intended to be used only
+// if you are confident about the validity of the data being passed to the writer,
+// and wish to save on the hashing time.
+func NewIoContentWriter(writer io.Writer, skipHash bool, blocksize int) content.Writer {
 	w := writer
 	if w == nil {
 		w = ioutil.Discard
@@ -29,7 +35,7 @@ func NewIoContentWriter(writer io.Writer, blocksize int) content.Writer {
 		writer:   w,
 		digester: digest.Canonical.Digester(),
 	}
-	return NewPassthroughWriter(ioc, func(r io.Reader, w io.Writer, done chan<- error) {
+	return NewPassthroughWriter(ioc, skipHash, func(r io.Reader, w io.Writer, done chan<- error) {
 		// write out the data to the io writer
 		var (
 			err error
@@ -49,8 +55,10 @@ func (w *IoContentWriter) Write(p []byte) (n int, err error) {
 	if err != nil {
 		return 0, err
 	}
-	w.digester.Hash().Write(p[:n])
 	w.size += int64(n)
+	if !w.skipHash {
+		w.digester.Hash().Write(p[:n])
+	}
 	return
 }
 

diff --git a/pkg/content/passthrough.go b/pkg/content/passthrough.go
@@ -18,21 +18,23 @@ type PassthroughWriter struct {
 	underlyingDigester digest.Digester
 	underlyingSize     int64
 	reader             *io.PipeReader
+	skipHash           bool
 	done               chan error
 }
 
 // NewPassthroughWriter creates a pass-through writer that allows for processing
 // the content via an arbitrary function. The function should do whatever processing it
 // wants, reading from the Reader to the Writer. When done, it must indicate via
 // sending an error or nil to the Done
-func NewPassthroughWriter(writer content.Writer, f func(r io.Reader, w io.Writer, done chan<- error)) content.Writer {
+func NewPassthroughWriter(writer content.Writer, skipHash bool, f func(r io.Reader, w io.Writer, done chan<- error)) content.Writer {
 	r, w := io.Pipe()
 	pw := &PassthroughWriter{
 		writer:             writer,
 		pipew:              w,
 		digester:           digest.Canonical.Digester(),
 		underlyingDigester: digest.Canonical.Digester(),
 		reader:             r,
+		skipHash:           skipHash,
 		done:               make(chan error, 1),
 	}
 	uw := &underlyingWriter{
@@ -44,7 +46,9 @@ func NewPassthroughWriter(writer content.Writer, f func(r io.Reader, w io.Writer
 
 func (pw *PassthroughWriter) Write(p []byte) (n int, err error) {
 	n, err = pw.pipew.Write(p)
-	pw.digester.Hash().Write(p[:n])
+	if !pw.skipHash {
+		pw.digester.Hash().Write(p[:n])
+	}
 	pw.size += int64(n)
 	return
 }
@@ -71,6 +75,9 @@ func (pw *PassthroughWriter) Commit(ctx context.Context, size int64, expected di
 	if err != nil && err != io.EOF {
 		return err
 	}
+
+	// Some underlying writers will validate an expected digest, so we need the option to pass it
+	// that digest. That is why we caluclate the digest of the underlying writer throughout the write process.
 	return pw.writer.Commit(ctx, pw.underlyingSize, pw.underlyingDigester.Digest(), opts...)
 }
 
@@ -97,7 +104,9 @@ func (u *underlyingWriter) Write(p []byte) (int, error) {
 		return 0, err
 	}
 
+	if !u.pw.skipHash {
+		u.pw.underlyingDigester.Hash().Write(p)
+	}
 	u.pw.underlyingSize += int64(len(p))
-	u.pw.underlyingDigester.Hash().Write(p)
 	return n, nil
 }
diff --git a/pkg/content/passthrough_test.go b/pkg/content/passthrough_test.go
@@ -75,7 +75,7 @@ func TestPassthroughWriter(t *testing.T) {
 	if err != nil {
 		t.Fatalf("unexpected error getting the memory store writer: %v", err)
 	}
-	writer := content.NewPassthroughWriter(memw, f)
+	writer := content.NewPassthroughWriter(memw, false, f)
 	n, err := writer.Write(testContent)
 	if err != nil {
 		t.Fatalf("unexpected error on Write: %v", err)

diff --git a/pkg/content/untar.go b/pkg/content/untar.go
@@ -9,11 +9,16 @@ import (
 )
 
 // NewUntarWriter wrap a writer with an untar, so that the stream is untarred
-func NewUntarWriter(writer content.Writer, blocksize int) content.Writer {
+//
+// By default, it calculates the hash when writing. If the option `skipHash` is true,
+// it will skip doing the hash. Skipping the hash is intended to be used only
+// if you are confident about the validity of the data being passed to the writer,
+// and wish to save on the hashing time.
+func NewUntarWriter(writer content.Writer, skipHash bool, blocksize int) content.Writer {
 	if blocksize == 0 {
 		blocksize = DefaultBlocksize
 	}
-	return NewPassthroughWriter(writer, func(r io.Reader, w io.Writer, done chan<- error) {
+	return NewPassthroughWriter(writer, skipHash, func(r io.Reader, w io.Writer, done chan<- error) {
 		tr := tar.NewReader(r)
 		var err error
 		for {