Skip to content

Commit

Permalink
zstd: Increment buffer offset (#263)
Browse files Browse the repository at this point in the history
Increment buffer offset when encoding without history.

Since it does not store history, we must offset `e.cur` to avoid false matches for next user.

Fixes the need for double resets to clear history.

Only allocate history when we need to encode more than 1 block.

Replaces #262
  • Loading branch information
klauspost authored May 27, 2020
1 parent 5389f1e commit 057ea20
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
4 changes: 4 additions & 0 deletions zstd/enc_dfast.go
Original file line number Diff line number Diff line change
Expand Up @@ -671,4 +671,8 @@ encodeLoop:
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}

// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
e.cur += int32(len(src))
}
}
15 changes: 13 additions & 2 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
panic("src too big")
}
}

// Protect against e.cur wraparound.
if e.cur >= bufferReset {
for i := range e.table[:] {
Expand Down Expand Up @@ -516,6 +517,9 @@ encodeLoop:
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
}
break
}

Expand Down Expand Up @@ -548,6 +552,9 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && t < 0 {
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
Expand Down Expand Up @@ -647,6 +654,10 @@ encodeLoop:
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
e.cur += int32(len(src))
}
}

func (e *fastBase) addBlock(src []byte) int32 {
Expand Down Expand Up @@ -714,7 +725,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
}

// Reset the encoding table.
func (e *fastBase) Reset() {
func (e *fastBase) Reset(singleBlock bool) {
if e.blk == nil {
e.blk = &blockEnc{}
e.blk.init()
Expand All @@ -727,7 +738,7 @@ func (e *fastBase) Reset() {
} else {
e.crc.Reset()
}
if cap(e.hist) < int(e.maxMatchOff*2) {
if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) {
l := e.maxMatchOff * 2
// Make it at least 1MB.
if l < 1<<20 {
Expand Down
21 changes: 13 additions & 8 deletions zstd/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type encoder interface {
AppendCRC([]byte) []byte
WindowSize(size int) int32
UseBlock(*blockEnc)
Reset()
Reset(singleBlock bool)
}

type encoderState struct {
Expand Down Expand Up @@ -82,7 +82,10 @@ func (e *Encoder) initialize() {
}
e.encoders = make(chan encoder, e.o.concurrent)
for i := 0; i < e.o.concurrent; i++ {
e.encoders <- e.o.encoder()
enc := e.o.encoder()
// If not single block, history will be allocated on first use.
enc.Reset(true)
e.encoders <- enc
}
}

Expand Down Expand Up @@ -112,7 +115,7 @@ func (e *Encoder) Reset(w io.Writer) {
s.filling = s.filling[:0]
s.current = s.current[:0]
s.previous = s.previous[:0]
s.encoder.Reset()
s.encoder.Reset(false)
s.headerWritten = false
s.eofWritten = false
s.fullFrameWritten = false
Expand Down Expand Up @@ -445,11 +448,10 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
enc := <-e.encoders
defer func() {
// Release encoder reference to last block.
enc.Reset()
// If a non-single block is needed the encoder will reset again.
enc.Reset(true)
e.encoders <- enc
}()
enc.Reset()
blk := enc.Block()
// Use single segments when above minimum window and below 1MB.
single := len(src) < 1<<20 && len(src) > MinWindowSize
if e.o.single != nil {
Expand All @@ -472,12 +474,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
panic(err)
}

if len(src) <= e.o.blockSize && len(src) <= maxBlockSize {
// If we can do everything in one block, prefer that.
if len(src) <= maxCompressedBlockSize {
// Slightly faster with no history and everything in one block.
if e.o.crc {
_, _ = enc.CRC().Write(src)
}
blk.reset(nil)
blk := enc.Block()
blk.last = true
enc.EncodeNoHist(blk, src)

Expand All @@ -504,6 +507,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
blk.output = oldout
} else {
enc.Reset(false)
blk := enc.Block()
for len(src) > 0 {
todo := src
if len(todo) > e.o.blockSize {
Expand Down

0 comments on commit 057ea20

Please sign in to comment.