diff --git a/go.mod b/go.mod index 8503b101b2c89..3cf2128f5bc2d 100644 --- a/go.mod +++ b/go.mod @@ -38,7 +38,7 @@ require ( github.com/morikuni/aec v1.0.0 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f github.com/opentracing/opentracing-go v1.1.1-0.20200124165624-2876d2018785 - github.com/pierrec/lz4 v2.3.1-0.20191115212037-9085dacd1e1e+incompatible + github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.5.0 github.com/prometheus/client_model v0.2.0 diff --git a/go.sum b/go.sum index 4abfb118076f0..7d5a7b3132ff7 100644 --- a/go.sum +++ b/go.sum @@ -680,8 +680,8 @@ github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144T github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4 v2.3.1-0.20191115212037-9085dacd1e1e+incompatible h1:5isCJDRADbeSlWx6KVXAYwrcihyCGVXr7GNCdLEVDr8= -github.com/pierrec/lz4 v2.3.1-0.20191115212037-9085dacd1e1e+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible h1:wPraQD8xUZ14zNJcKn9cz/+n3r6H2NklrGqq7J+c5qY= +github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/vendor/github.com/pierrec/lz4/block.go b/vendor/github.com/pierrec/lz4/block.go index ee178a992b11a..664d9be580d41 100644 --- a/vendor/github.com/pierrec/lz4/block.go +++ b/vendor/github.com/pierrec/lz4/block.go @@ -2,8 +2,8 @@ package lz4 import ( "encoding/binary" - "fmt" "math/bits" + "sync" ) // blockHash hashes the lower 6 bytes into a value < htSize. @@ -35,24 +35,31 @@ func UncompressBlock(src, dst []byte) (int, error) { // CompressBlock compresses the source buffer into the destination one. // This is the fast version of LZ4 compression and also the default one. -// The size of hashTable must be at least 64Kb. // -// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible. +// The argument hashTable is scratch space for a hash table used by the +// compressor. If provided, it should have length at least 1<<16. If it is +// shorter (or nil), CompressBlock allocates its own hash table. +// +// The size of the compressed data is returned. +// +// If the destination buffer size is lower than CompressBlockBound and +// the compressed size is 0 and no error, then the data is incompressible. // // An error is returned if the destination buffer is too small. func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { - if len(hashTable) < htSize { - return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize) - } defer recoverBlock(&err) + // Return 0, nil only if the destination buffer size is < CompressBlockBound. + isNotCompressible := len(dst) < CompressBlockBound(len(src)) + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. - // This significantly speeds up incompressible data and usually has very small impact on compresssion. + // This significantly speeds up incompressible data and usually has very small impact on compression. // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) const adaptSkipLog = 7 - sn, dn := len(src)-mfLimit, len(dst) - if sn <= 0 || dn == 0 { - return 0, nil + if len(hashTable) < htSize { + htIface := htPool.Get() + defer htPool.Put(htIface) + hashTable = (*(htIface).(*[htSize]int))[:] } // Prove to the compiler the table has at least htSize elements. // The compiler can see that "uint32() >> hashShift" cannot be out of bounds. @@ -61,6 +68,10 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { // si: Current position of the search. // anchor: Position of the current literals. var si, di, anchor int + sn := len(src) - mfLimit + if sn <= 0 { + goto lastLiterals + } // Fast scan strategy: the hash table only stores the last 4 bytes sequences. for si < sn { @@ -124,7 +135,7 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { si, mLen = si+mLen, si+minMatch // Find the longest match by looking by batches of 8 bytes. - for si < sn { + for si+8 < sn { x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:]) if x == 0 { si += 8 @@ -184,7 +195,8 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { hashTable[h] = si - 2 } - if anchor == 0 { +lastLiterals: + if isNotCompressible && anchor == 0 { // Incompressible. return 0, nil } @@ -205,7 +217,7 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { di++ // Write the last literals. - if di >= anchor { + if isNotCompressible && di >= anchor { // Incompressible. return 0, nil } @@ -213,6 +225,13 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { return di, nil } +// Pool of hash tables for CompressBlock. +var htPool = sync.Pool{ + New: func() interface{} { + return new([htSize]int) + }, +} + // blockHash hashes 4 bytes into a value < winSize. func blockHashHC(x uint32) uint32 { const hasher uint32 = 2654435761 // Knuth multiplicative hash. @@ -224,22 +243,24 @@ func blockHashHC(x uint32) uint32 { // // CompressBlockHC compression ratio is better than CompressBlock but it is also slower. // -// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible. +// The size of the compressed data is returned. +// +// If the destination buffer size is lower than CompressBlockBound and +// the compressed size is 0 and no error, then the data is incompressible. // // An error is returned if the destination buffer is too small. func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { defer recoverBlock(&err) + // Return 0, nil only if the destination buffer size is < CompressBlockBound. + isNotCompressible := len(dst) < CompressBlockBound(len(src)) + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. - // This significantly speeds up incompressible data and usually has very small impact on compresssion. + // This significantly speeds up incompressible data and usually has very small impact on compression. // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) const adaptSkipLog = 7 - sn, dn := len(src)-mfLimit, len(dst) - if sn <= 0 || dn == 0 { - return 0, nil - } - var si, di int + var si, di, anchor int // hashTable: stores the last position found for a given hash // chainTable: stores previous positions for a given hash @@ -249,7 +270,11 @@ func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { depth = winSize } - anchor := si + sn := len(src) - mfLimit + if sn <= 0 { + goto lastLiterals + } + for si < sn { // Hash the next 4 bytes (sequence). match := binary.LittleEndian.Uint32(src[si:]) @@ -356,12 +381,13 @@ func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { } } - if anchor == 0 { + if isNotCompressible && anchor == 0 { // Incompressible. return 0, nil } // Last literals. +lastLiterals: lLen := len(src) - anchor if lLen < 0xF { dst[di] = byte(lLen << 4) @@ -378,7 +404,7 @@ func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { di++ // Write the last literals. - if di >= anchor { + if isNotCompressible && di >= anchor { // Incompressible. return 0, nil } diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go index 21dcfaeb93d16..6c73539a34396 100644 --- a/vendor/github.com/pierrec/lz4/lz4.go +++ b/vendor/github.com/pierrec/lz4/lz4.go @@ -38,7 +38,7 @@ const ( hashLog = 16 htSize = 1 << hashLog - mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes. + mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes. ) // map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb. @@ -98,7 +98,7 @@ func blockSizeValueToIndex(size int) byte { // (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html). // // NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls. -// It is the caller responsibility to check them if necessary. +// It is the caller's responsibility to check them if necessary. type Header struct { BlockChecksum bool // Compressed blocks checksum flag. NoChecksum bool // Frame checksum flag. diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go index 324f1386b8ad8..6a60a9a6a57c8 100644 --- a/vendor/github.com/pierrec/lz4/writer.go +++ b/vendor/github.com/pierrec/lz4/writer.go @@ -3,9 +3,10 @@ package lz4 import ( "encoding/binary" "fmt" - "github.com/pierrec/lz4/internal/xxh32" "io" "runtime" + + "github.com/pierrec/lz4/internal/xxh32" ) // zResult contains the results of compressing a block. @@ -370,6 +371,10 @@ func (z *Writer) Reset(w io.Writer) { z.checksum.Reset() z.idx = 0 z.err = nil + // reset hashtable to ensure deterministic output. + for i := range z.hashtable { + z.hashtable[i] = 0 + } z.WithConcurrency(n) } diff --git a/vendor/modules.txt b/vendor/modules.txt index e2179fa00d37a..0c7503a4d41ac 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -542,7 +542,7 @@ github.com/opentracing-contrib/go-stdlib/nethttp github.com/opentracing/opentracing-go github.com/opentracing/opentracing-go/ext github.com/opentracing/opentracing-go/log -# github.com/pierrec/lz4 v2.3.1-0.20191115212037-9085dacd1e1e+incompatible +# github.com/pierrec/lz4 v2.5.3-0.20200429092203-e876bbd321b3+incompatible github.com/pierrec/lz4 github.com/pierrec/lz4/internal/xxh32 # github.com/pkg/errors v0.9.1