diff --git a/README.md b/README.md index 2d3c88282d..e2682be6fe 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,11 @@ This package provides various compression algorithms. # changelog +* June 1, 2020 (v1.10.7): Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries). +* June 1, 2020: Increase zstd decompression speed up to 1.19x. [#259](https://github.com/klauspost/compress/pull/259) +* June 1, 2020: Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263) * May 21, 2020: (v1.10.6) zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252) -* May 21, 2020: zstd: Stricter decoding checks. +* May 21, 2020: zstd: Stricter decompression checks. * April 12, 2020: (v1.10.5) s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239) * Apr 8, 2020: (v1.10.4) zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251), [#250](https://github.com/klauspost/compress/pull/250), [#249](https://github.com/klauspost/compress/pull/249), [#247](https://github.com/klauspost/compress/pull/247) * Mar 11, 2020: (v1.10.3) s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245) diff --git a/huff0/decompress.go b/huff0/decompress.go index 97ae66a4ac..fb42a398be 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -155,20 +155,70 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { // The length of the supplied input must match the end of a block exactly. // Before this is called, the table must be initialized with ReadTable unless // the encoder re-used the table. +// deprecated: Use the stateless Decoder() to get a concurrent version. func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) { - if len(s.dt.single) == 0 { + if cap(s.Out) < s.MaxDecodedSize { + s.Out = make([]byte, s.MaxDecodedSize) + } + s.Out = s.Out[:0:s.MaxDecodedSize] + s.Out, err = s.Decoder().Decompress1X(s.Out, in) + return s.Out, err +} + +// Decompress4X will decompress a 4X encoded stream. +// Before this is called, the table must be initialized with ReadTable unless +// the encoder re-used the table. +// The length of the supplied input must match the end of a block exactly. +// The destination size of the uncompressed data must be known and provided. +// deprecated: Use the stateless Decoder() to get a concurrent version. +func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { + if dstSize > s.MaxDecodedSize { + return nil, ErrMaxDecodedSizeExceeded + } + if cap(s.Out) < dstSize { + s.Out = make([]byte, s.MaxDecodedSize) + } + s.Out = s.Out[:0:dstSize] + s.Out, err = s.Decoder().Decompress4X(s.Out, in) + return s.Out, err +} + +// Decoder will return a stateless decoder that can be used by multiple +// decompressors concurrently. +// Before this is called, the table must be initialized with ReadTable. +// The Decoder is still linked to the scratch buffer so that cannot be reused. +// However, it is safe to discard the scratch. +func (s *Scratch) Decoder() *Decoder { + return &Decoder{ + dt: s.dt, + actualTableLog: s.actualTableLog, + } +} + +// Decoder provides stateless decoding. +type Decoder struct { + dt dTable + actualTableLog uint8 +} + +// Decompress1X will decompress a 1X encoded stream. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { return nil, errors.New("no table loaded") } var br bitReader - err = br.init(in) + err := br.init(src) if err != nil { - return nil, err + return dst, err } - s.Out = s.Out[:0] + maxDecodedSize := cap(dst) + dst = dst[:0] decode := func() byte { - val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := s.dt.single[val] + val := br.peekBitsFast(d.actualTableLog) /* note : actualTableLog >= 1 */ + v := d.dt.single[val] br.bitsRead += uint8(v.entry) return uint8(v.entry >> 8) } @@ -180,88 +230,80 @@ func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) { // Avoid bounds check by always having full sized table. const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 - dt := s.dt.single[:tlSize] + dt := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var tmp = s.huffWeight[:256] + var buf [256]byte var off uint8 for br.off >= 8 { br.fillFast() - tmp[off+0] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) - tmp[off+1] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + buf[off+0] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) + buf[off+1] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) br.fillFast() - tmp[off+2] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) - tmp[off+3] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + buf[off+2] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) + buf[off+3] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) off += 4 if off == 0 { - if len(s.Out)+256 > s.MaxDecodedSize { + if len(dst)+256 > maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, tmp...) + dst = append(dst, buf[:]...) } } - if len(s.Out)+int(off) > s.MaxDecodedSize { + if len(dst)+int(off) > maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, tmp[:off]...) + dst = append(dst, buf[:off]...) for !br.finished() { br.fill() - if len(s.Out) >= s.MaxDecodedSize { + if len(dst) >= maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, decode()) + dst = append(dst, decode()) } - return s.Out, br.close() + return dst, br.close() } // Decompress4X will decompress a 4X encoded stream. -// Before this is called, the table must be initialized with ReadTable unless -// the encoder re-used the table. // The length of the supplied input must match the end of a block exactly. -// The destination size of the uncompressed data must be known and provided. -func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { if len(s.dt.single) == 0 { return nil, errors.New("no table loaded") } - if len(in) < 6+(4*1) { + if len(src) < 6+(4*1) { return nil, errors.New("input too small") } - if dstSize > s.MaxDecodedSize { - return nil, ErrMaxDecodedSizeExceeded - } - // TODO: We do not detect when we overrun a buffer, except if the last one does. var br [4]bitReader start := 6 for i := 0; i < 3; i++ { - length := int(in[i*2]) | (int(in[i*2+1]) << 8) - if start+length >= len(in) { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { return nil, errors.New("truncated input (or invalid offset)") } - err = br[i].init(in[start : start+length]) + err := br[i].init(src[start : start+length]) if err != nil { return nil, err } start += length } - err = br[3].init(in[start:]) + err := br[3].init(src[start:]) if err != nil { return nil, err } - // Prepare output - if cap(s.Out) < dstSize { - s.Out = make([]byte, 0, dstSize) - } - s.Out = s.Out[:dstSize] // destination, offset to match first output - dstOut := s.Out + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst dstEvery := (dstSize + 3) / 4 const tlSize = 1 << tableLogMax @@ -276,7 +318,7 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { } // Use temp table to avoid bound checks/append penalty. - var tmp = s.huffWeight[:256] + var buf [256]byte var off uint8 var decoded int @@ -300,8 +342,8 @@ bigloop: val2 := br[stream].peekBitsFast(s.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) + buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) + buf[off+bufoff*stream] = uint8(v.entry >> 8) br[stream].bitsRead += uint8(v2.entry) } @@ -313,8 +355,8 @@ bigloop: val2 := br[stream].peekBitsFast(s.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) + buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) + buf[off+bufoff*stream] = uint8(v.entry >> 8) br[stream].bitsRead += uint8(v2.entry) } @@ -326,8 +368,8 @@ bigloop: val2 := br[stream].peekBitsFast(s.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) + buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) + buf[off+bufoff*stream] = uint8(v.entry >> 8) br[stream].bitsRead += uint8(v2.entry) } @@ -339,8 +381,8 @@ bigloop: val2 := br[stream].peekBitsFast(s.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) + buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) + buf[off+bufoff*stream] = uint8(v.entry >> 8) br[stream].bitsRead += uint8(v2.entry) } @@ -350,30 +392,30 @@ bigloop: if bufoff > dstEvery { return nil, errors.New("corruption detected: stream overrun 1") } - copy(dstOut, tmp[:bufoff]) - copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2]) - copy(dstOut[dstEvery*2:], tmp[bufoff*2:bufoff*3]) - copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4]) + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) off = 0 - dstOut = dstOut[bufoff:] + out = out[bufoff:] decoded += 256 // There must at least be 3 buffers left. - if len(dstOut) < dstEvery*3 { + if len(out) < dstEvery*3 { return nil, errors.New("corruption detected: stream overrun 2") } } } if off > 0 { ioff := int(off) - if len(dstOut) < dstEvery*3+ioff { + if len(out) < dstEvery*3+ioff { return nil, errors.New("corruption detected: stream overrun 3") } - copy(dstOut, tmp[:off]) - copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2]) - copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3]) - copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4]) + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) decoded += int(off) * 4 - dstOut = dstOut[off:] + out = out[off:] } // Decode remaining. @@ -382,10 +424,10 @@ bigloop: br := &br[i] for !br.finished() { br.fill() - if offset >= len(dstOut) { + if offset >= len(out) { return nil, errors.New("corruption detected: stream overrun 4") } - dstOut[offset] = decode(br) + out[offset] = decode(br) offset++ } decoded += offset - dstEvery*i @@ -397,7 +439,7 @@ bigloop: if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } - return s.Out, nil + return dst, nil } // matches will compare a decoding table to a coding table. diff --git a/zstd/README.md b/zstd/README.md index bc977a3023..f2a80b5d06 100644 --- a/zstd/README.md +++ b/zstd/README.md @@ -309,6 +309,20 @@ The decoder can be used for *concurrent* decompression of multiple buffers. It will only allow a certain number of concurrent operations to run. To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +### Dictionaries + +Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed. + +Dictionaries are added individually to Decoders. +Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder. +To add a dictionary use the `RegisterDict(data)` with the dictionary data before starting any decompression. + +The dictionary will be used automatically for the data that specifies them. + +A re-used Decoder will still contain the dictionaries registered. + +When registering a dictionary with the same ID it will override the existing. + ### Allocation-less operation The decoder has been designed to operate without allocations after a warmup. diff --git a/zstd/blockdec.go b/zstd/blockdec.go index 19181caea1..4a14242c76 100644 --- a/zstd/blockdec.go +++ b/zstd/blockdec.go @@ -461,26 +461,22 @@ func (b *blockDec) decodeCompressed(hist *history) error { if huff == nil { huff = &huff0.Scratch{} } - huff.Out = b.literalBuf[:0] huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) return err } // Use our out buffer. - huff.Out = b.literalBuf[:0] - huff.MaxDecodedSize = litRegenSize if fourStreams { - literals, err = huff.Decompress4X(literals, litRegenSize) + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { - literals, err = huff.Decompress1X(literals) + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) } if err != nil { println("decoding compressed literals:", err) return err } // Make sure we don't leak our literals buffer - huff.Out = nil if len(literals) != litRegenSize { return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } @@ -631,15 +627,12 @@ func (b *blockDec) decodeCompressed(hist *history) error { var err error // Use our out buffer. huff = hist.huffTree - huff.Out = b.literalBuf[:0] - huff.MaxDecodedSize = litRegenSize if fourStreams { - literals, err = huff.Decompress4X(literals, litRegenSize) + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { - literals, err = huff.Decompress1X(literals) + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) } // Make sure we don't leak our literals buffer - huff.Out = nil if err != nil { println("decompressing literals:", err) return err @@ -649,12 +642,13 @@ func (b *blockDec) decodeCompressed(hist *history) error { } } else { if hist.huffTree != nil && huff != nil { - huffDecoderPool.Put(hist.huffTree) + if hist.dict == nil || hist.dict.litDec != hist.huffTree { + huffDecoderPool.Put(hist.huffTree) + } hist.huffTree = nil } } if huff != nil { - huff.Out = nil hist.huffTree = huff } if debug { @@ -687,14 +681,20 @@ func (b *blockDec) decodeCompressed(hist *history) error { // If only recent offsets were not transferred, this would be an obvious win. // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded. - if err := seqs.initialize(br, hist, literals, b.dst); err != nil { - println("initializing sequences:", err) - return err - } hbytes := hist.b if len(hbytes) > hist.windowSize { hbytes = hbytes[len(hbytes)-hist.windowSize:] + // We do not need history any more. + if hist.dict != nil { + hist.dict.content = nil + } } + + if err := seqs.initialize(br, hist, literals, b.dst); err != nil { + println("initializing sequences:", err) + return err + } + err = seqs.decode(nSeqs, br, hbytes) if err != nil { return err diff --git a/zstd/bytereader.go b/zstd/bytereader.go index dc4378b640..f708df1c4c 100644 --- a/zstd/bytereader.go +++ b/zstd/bytereader.go @@ -4,6 +4,8 @@ package zstd +import "encoding/binary" + // byteReader provides a byte reader that reads // little endian values from a byte stream. // The input stream is manually advanced. @@ -55,12 +57,7 @@ func (b byteReader) Uint32() uint32 { } return v } - b2 := b.b[b.off : b.off+4 : b.off+4] - v3 := uint32(b2[3]) - v2 := uint32(b2[2]) - v1 := uint32(b2[1]) - v0 := uint32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) + return binary.LittleEndian.Uint32(b.b[b.off : b.off+4]) } // unread returns the unread portion of the input. diff --git a/zstd/decoder.go b/zstd/decoder.go index 324347623c..8e34479ff8 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -32,8 +32,9 @@ type Decoder struct { // Current read position used for Reader functionality. current decoderState - // Custom dictionaries - dicts map[uint32]struct{} + // Custom dictionaries. + // Always uses copies. + dicts map[uint32]dict // streamWg is the waitgroup for all streams streamWg sync.WaitGroup @@ -295,10 +296,18 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { frame.bBuf = input for { + frame.history.reset() err := frame.reset(&frame.bBuf) if err == io.EOF { return dst, nil } + if frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + return nil, ErrUnknownDictionary + } + frame.history.setDict(&dict) + } if err != nil { return dst, err } @@ -393,6 +402,19 @@ func (d *Decoder) Close() { d.current.err = ErrDecoderClosed } +// RegisterDict will load a dictionary +func (d *Decoder) RegisterDict(b []byte) error { + dc, err := loadDict(b) + if err != nil { + return err + } + if d.dicts == nil { + d.dicts = make(map[uint32]dict, 1) + } + d.dicts[dc.id] = *dc + return nil +} + // IOReadCloser returns the decoder as an io.ReadCloser for convenience. // Any changes to the decoder will be reflected, so the returned ReadCloser // can be reused along with the decoder. @@ -466,6 +488,14 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { if debug && err != nil { println("Frame decoder returned", err) } + if err == nil && frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + err = ErrUnknownDictionary + } else { + frame.history.setDict(&dict) + } + } if err != nil { stream.output <- decodeOutput{ err: err, diff --git a/zstd/decoder_test.go b/zstd/decoder_test.go index 7e886d3715..bd6e189a14 100644 --- a/zstd/decoder_test.go +++ b/zstd/decoder_test.go @@ -1154,6 +1154,9 @@ func testDecoderDecodeAll(t *testing.T, fn string, dec *Decoder) { if err != nil { t.Error(err) } + if len(got) < 10 { + t.Fatal("didn't get input back") + } got = got[10:] if !bytes.Equal(wantB, got) { if len(wantB)+len(got) < 1000 { diff --git a/zstd/dict.go b/zstd/dict.go new file mode 100644 index 0000000000..8eb6f6ba33 --- /dev/null +++ b/zstd/dict.go @@ -0,0 +1,104 @@ +package zstd + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/klauspost/compress/huff0" +) + +type dict struct { + id uint32 + + litDec *huff0.Scratch + llDec, ofDec, mlDec sequenceDec + offsets [3]int + content []byte +} + +var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} + +// Load a dictionary as described in +// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format +func loadDict(b []byte) (*dict, error) { + // Check static field size. + if len(b) <= 8+(3*4) { + return nil, io.ErrUnexpectedEOF + } + d := dict{ + llDec: sequenceDec{fse: &fseDecoder{}}, + ofDec: sequenceDec{fse: &fseDecoder{}}, + mlDec: sequenceDec{fse: &fseDecoder{}}, + } + if !bytes.Equal(b[:4], dictMagic[:]) { + return nil, ErrMagicMismatch + } + d.id = binary.LittleEndian.Uint32(b[4:8]) + if d.id == 0 { + return nil, errors.New("dictionaries cannot have ID 0") + } + + // Read literal table + var err error + d.litDec, b, err = huff0.ReadTable(b[8:], nil) + if err != nil { + return nil, err + } + + br := byteReader{ + b: b, + off: 0, + } + readDec := func(i tableIndex, dec *fseDecoder) error { + if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil { + return err + } + if br.overread() { + return io.ErrUnexpectedEOF + } + err = dec.transform(symbolTableX[i]) + if err != nil { + println("Transform table error:", err) + return err + } + if debug { + println("Read table ok", "symbolLen:", dec.symbolLen) + } + // Set decoders as predefined so they aren't reused. + dec.preDefined = true + return nil + } + + if err := readDec(tableOffsets, d.ofDec.fse); err != nil { + return nil, err + } + if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil { + return nil, err + } + if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil { + return nil, err + } + if br.remain() < 12 { + return nil, io.ErrUnexpectedEOF + } + + d.offsets[0] = int(br.Uint32()) + br.advance(4) + d.offsets[1] = int(br.Uint32()) + br.advance(4) + d.offsets[2] = int(br.Uint32()) + br.advance(4) + if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 { + return nil, errors.New("invalid offset in dictionary") + } + d.content = make([]byte, br.remain()) + copy(d.content, br.unread()) + if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) { + return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets) + } + + return &d, nil +} diff --git a/zstd/dict_test.go b/zstd/dict_test.go new file mode 100644 index 0000000000..2f4bbc6185 --- /dev/null +++ b/zstd/dict_test.go @@ -0,0 +1,137 @@ +package zstd + +import ( + "bytes" + "io/ioutil" + "strings" + "testing" + + "github.com/klauspost/compress/zip" +) + +func TestDecoder_SmallDict(t *testing.T) { + // All files have CRC + fn := "testdata/dict-tests-small.zip" + data, err := ioutil.ReadFile(fn) + if err != nil { + t.Fatal(err) + } + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + dec, err := NewReader(nil, WithDecoderConcurrency(1)) + if err != nil { + t.Fatal(err) + return + } + for _, tt := range zr.File { + if !strings.HasSuffix(tt.Name, ".dict") { + continue + } + func() { + r, err := tt.Open() + if err != nil { + t.Fatal(err) + } + defer r.Close() + in, err := ioutil.ReadAll(r) + if err != nil { + t.Fatal(err) + } + err = dec.RegisterDict(in) + if err != nil { + t.Fatal(tt.Name, err) + } + }() + } + defer dec.Close() + for _, tt := range zr.File { + if !strings.HasSuffix(tt.Name, ".zst") { + continue + } + t.Run("decodeall-"+tt.Name, func(t *testing.T) { + r, err := tt.Open() + if err != nil { + t.Fatal(err) + } + defer r.Close() + in, err := ioutil.ReadAll(r) + if err != nil { + t.Fatal(err) + } + got, err := dec.DecodeAll(in, nil) + if err != nil { + t.Fatal(err) + } + _, err = dec.DecodeAll(in, got[:0]) + if err != nil { + t.Fatal(err) + } + }) + } +} + +func TestDecoder_MoreDicts(t *testing.T) { + // All files have CRC + // https://files.klauspost.com/compress/zstd-dict-tests.zip + fn := "testdata/zstd-dict-tests.zip" + data, err := ioutil.ReadFile(fn) + if err != nil { + t.Skip("extended dict test not found.") + } + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + dec, err := NewReader(nil, WithDecoderConcurrency(1)) + if err != nil { + t.Fatal(err) + return + } + for _, tt := range zr.File { + if !strings.HasSuffix(tt.Name, ".dict") { + continue + } + func() { + r, err := tt.Open() + if err != nil { + t.Fatal(err) + } + defer r.Close() + in, err := ioutil.ReadAll(r) + if err != nil { + t.Fatal(err) + } + err = dec.RegisterDict(in) + if err != nil { + t.Fatal(tt.Name, err) + } + }() + } + defer dec.Close() + for _, tt := range zr.File { + if !strings.HasSuffix(tt.Name, ".zst") { + continue + } + t.Run("decodeall-"+tt.Name, func(t *testing.T) { + r, err := tt.Open() + if err != nil { + t.Fatal(err) + } + defer r.Close() + in, err := ioutil.ReadAll(r) + if err != nil { + t.Fatal(err) + } + got, err := dec.DecodeAll(in, nil) + if err != nil { + t.Fatal(err) + } + _, err = dec.DecodeAll(in, got[:0]) + if err != nil { + t.Fatal(err) + } + }) + } +} diff --git a/zstd/framedec.go b/zstd/framedec.go index 780880ebe4..fc4a566d39 100644 --- a/zstd/framedec.go +++ b/zstd/framedec.go @@ -40,7 +40,7 @@ type frameDec struct { FrameContentSize uint64 frameDone sync.WaitGroup - DictionaryID uint32 + DictionaryID *uint32 HasCheckSum bool SingleSegment bool @@ -142,7 +142,7 @@ func (d *frameDec) reset(br byteBuffer) error { // Read Dictionary_ID // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - d.DictionaryID = 0 + d.DictionaryID = nil if size := fhd & 3; size != 0 { if size == 3 { size = 4 @@ -154,19 +154,22 @@ func (d *frameDec) reset(br byteBuffer) error { } return io.ErrUnexpectedEOF } + var id uint32 switch size { case 1: - d.DictionaryID = uint32(b[0]) + id = uint32(b[0]) case 2: - d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) + id = uint32(b[0]) | (uint32(b[1]) << 8) case 4: - d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) } if debug { - println("Dict size", size, "ID:", d.DictionaryID) + println("Dict size", size, "ID:", id) } - if d.DictionaryID != 0 { - return ErrUnknownDictionary + if id > 0 { + // ID 0 means "sorry, no dictionary anyway". + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format + d.DictionaryID = &id } } @@ -351,8 +354,6 @@ func (d *frameDec) initAsync() { // When the frame has finished decoding the *bufio.Reader // containing the remaining input will be sent on frameDec.frameDone. func (d *frameDec) startDecoder(output chan decodeOutput) { - // TODO: Init to dictionary - d.history.reset() written := int64(0) defer func() { @@ -445,8 +446,6 @@ func (d *frameDec) startDecoder(output chan decodeOutput) { // runDecoder will create a sync decoder that will decode a block of data. func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { - // TODO: Init to dictionary - d.history.reset() saved := d.history.b // We use the history for output to avoid copying it. diff --git a/zstd/fse_decoder.go b/zstd/fse_decoder.go index e002be98b9..957cfeb79c 100644 --- a/zstd/fse_decoder.go +++ b/zstd/fse_decoder.go @@ -19,7 +19,7 @@ const ( * Increasing memory usage improves compression ratio * Reduced memory usage can improve speed, due to cache effect * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ - maxMemoryUsage = 11 + maxMemoryUsage = tablelogAbsoluteMax + 2 maxTableLog = maxMemoryUsage - 2 maxTablesize = 1 << maxTableLog diff --git a/zstd/history.go b/zstd/history.go index e8c419bd53..f418f50fcd 100644 --- a/zstd/history.go +++ b/zstd/history.go @@ -17,6 +17,7 @@ type history struct { windowSize int maxSize int error bool + dict *dict } // reset will reset the history to initial state of a frame. @@ -36,12 +37,27 @@ func (h *history) reset() { } h.decoders = sequenceDecs{} if h.huffTree != nil { - huffDecoderPool.Put(h.huffTree) + if h.dict == nil || h.dict.litDec != h.huffTree { + huffDecoderPool.Put(h.huffTree) + } } h.huffTree = nil + h.dict = nil //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) } +func (h *history) setDict(dict *dict) { + if dict == nil { + return + } + h.dict = dict + h.decoders.litLengths = dict.llDec + h.decoders.offsets = dict.ofDec + h.decoders.matchLengths = dict.mlDec + h.recentOffsets = dict.offsets + h.huffTree = dict.litDec +} + // append bytes to history. // This function will make sure there is space for it, // if the buffer has been allocated with enough extra space. diff --git a/zstd/seqdec.go b/zstd/seqdec.go index 634e6497ad..7ff870400d 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -62,6 +62,7 @@ type sequenceDecs struct { matchLengths sequenceDec prevOffset [3]int hist []byte + dict []byte literals []byte out []byte windowSize int @@ -85,6 +86,10 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits s.windowSize = hist.windowSize s.out = out + s.dict = nil + if hist.dict != nil { + s.dict = hist.dict.content + } return nil } @@ -185,20 +190,38 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { if ml > maxMatchLen { return fmt.Errorf("match len (%d) bigger than max allowed length", ml) } - if mo > len(s.out)+len(hist)+ll { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)+ll) - } - if mo > s.windowSize { - return fmt.Errorf("match offset (%d) bigger than window size (%d)", mo, s.windowSize) - } - if mo == 0 && ml > 0 { - return fmt.Errorf("zero matchoff and matchlen > 0") - } + // Add literals s.out = append(s.out, s.literals[:ll]...) s.literals = s.literals[ll:] out := s.out + if mo > len(s.out)+len(hist) || mo > s.windowSize { + if len(s.dict) == 0 { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + } + + // we may be in dictionary. + dictO := len(s.dict) - (mo - (len(s.out) + len(hist))) + if dictO < 0 || dictO >= len(s.dict) { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + } + end := dictO + ml + if end > len(s.dict) { + out = append(out, s.dict[dictO:]...) + mo -= len(s.dict) - dictO + ml -= len(s.dict) - dictO + } else { + out = append(out, s.dict[dictO:end]...) + mo = 0 + ml = 0 + } + } + + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + // Copy from history. // TODO: Blocks without history could be made to ignore this completely. if v := mo - len(s.out); v > 0 { diff --git a/zstd/testdata/dict-tests-small.zip b/zstd/testdata/dict-tests-small.zip new file mode 100644 index 0000000000..89fbf165a8 Binary files /dev/null and b/zstd/testdata/dict-tests-small.zip differ