Merge pull request #206 from klauspost/flate-fix-huffman-bit-estimates

Flate: Fix/tweak huffman bit estimates
klauspost · Jan 18, 2020 · 1cf5cb2 · 1cf5cb2
2 parents 81b3ddd + 9ff01f5
commit 1cf5cb2
Show file tree

Hide file tree

Showing 9 changed files with 108 additions and 53 deletions.
diff --git a/flate/deflate.go b/flate/deflate.go
@@ -644,21 +644,21 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).store
 	case level == ConstantCompression:
-		d.w.logReusePenalty = uint(4)
+		d.w.logNewTablePenalty = 4
 		d.window = make([]byte, maxStoreBlockSize)
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeHuff
 	case level == DefaultCompression:
 		level = 5
 		fallthrough
 	case level >= 1 && level <= 6:
-		d.w.logReusePenalty = uint(level + 1)
+		d.w.logNewTablePenalty = 6
 		d.fast = newFastEnc(level)
 		d.window = make([]byte, maxStoreBlockSize)
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeFast
 	case 7 <= level && level <= 9:
-		d.w.logReusePenalty = uint(level)
+		d.w.logNewTablePenalty = 10
 		d.state = &advancedState{}
 		d.compressionLevel = levels[level]
 		d.initDeflate()

diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go
@@ -93,12 +93,12 @@ type huffmanBitWriter struct {
 	err             error
 	lastHeader      int
 	// Set between 0 (reused block can be up to 2x the size)
-	logReusePenalty uint
-	lastHuffMan     bool
-	bytes           [256]byte
-	literalFreq     [lengthCodesStart + 32]uint16
-	offsetFreq      [32]uint16
-	codegenFreq     [codegenCodeCount]uint16
+	logNewTablePenalty uint
+	lastHuffMan        bool
+	bytes              [256]byte
+	literalFreq        [lengthCodesStart + 32]uint16
+	offsetFreq         [32]uint16
+	codegenFreq        [codegenCodeCount]uint16
 
 	// codegen must have an extra space for the final symbol.
 	codegen [literalCount + offsetCodeCount + 1]uint8
@@ -119,7 +119,7 @@ type huffmanBitWriter struct {
 // If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
 //
 // An incoming block estimates the output size of a new table using a 'fresh' by calculating the
-// optimal size and adding a penalty in 'logReusePenalty'.
+// optimal size and adding a penalty in 'logNewTablePenalty'.
 // A Huffman table is not optimal, which is why we add a penalty, and generating a new table
 // is slower both for compression and decompression.
 
@@ -349,6 +349,13 @@ func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
 		int(w.codegenFreq[18])*7, numCodegens
 }
 
+// dynamicSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
+	size = litEnc.bitLength(w.literalFreq[:]) +
+		offEnc.bitLength(w.offsetFreq[:])
+	return size
+}
+
 // dynamicSize returns the size of dynamically encoded data in bits.
 func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
 	header, numCodegens := w.headerSize()
@@ -451,12 +458,12 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
 
 	i := 0
 	for {
-		var codeWord int = int(w.codegen[i])
+		var codeWord = uint32(w.codegen[i])
 		i++
 		if codeWord == badCode {
 			break
 		}
-		w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
+		w.writeCode(w.codegenEncoding.codes[codeWord])
 
 		switch codeWord {
 		case 16:
@@ -602,14 +609,14 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
 	var size int
 	// Check if we should reuse.
 	if w.lastHeader > 0 {
-		// Estimate size for using a new table
+		// Estimate size for using a new table.
+		// Use the previous header size as the best estimate.
 		newSize := w.lastHeader + tokens.EstimatedBits()
+		newSize += newSize >> w.logNewTablePenalty
 
 		// The estimated size is calculated as an optimal table.
 		// We add a penalty to make it more realistic and re-use a bit more.
-		newSize += newSize >> (w.logReusePenalty & 31)
-		extra := w.extraBitSize()
-		reuseSize, _ := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extra)
+		reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize()
 
 		// Check if a new table is better.
 		if newSize < reuseSize {
@@ -801,21 +808,30 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
 	}
 
 	// Add everything as literals
-	estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15
+	// We have to estimate the header size.
+	// Assume header is around 70 bytes:
+	// https://stackoverflow.com/a/25454430
+	const guessHeaderSizeBits = 70 * 8
+	estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
+	estBits += w.lastHeader + 15
+	if w.lastHeader == 0 {
+		estBits += guessHeaderSizeBits
+	}
+	estBits += estBits >> w.logNewTablePenalty
 
 	// Store bytes, if we don't get a reasonable improvement.
 	ssize, storable := w.storedSize(input)
-	if storable && ssize < (estBits+estBits>>4) {
+	if storable && ssize < estBits {
 		w.writeStoredHeader(len(input), eof)
 		w.writeBytes(input)
 		return
 	}
 
 	if w.lastHeader > 0 {
-		size, _ := w.dynamicSize(w.literalEncoding, huffOffset, w.lastHeader)
-		estBits += estBits >> (w.logReusePenalty)
+		reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
+		estBits += estExtra
 
-		if estBits < size {
+		if estBits < reuseSize {
 			// We owe an EOB
 			w.writeCode(w.literalEncoding.codes[endBlockMarker])
 			w.lastHeader = 0

diff --git a/flate/huffman_bit_writer_test.go b/flate/huffman_bit_writer_test.go
@@ -33,7 +33,9 @@ func TestBlockHuff(t *testing.T) {
 		if strings.HasSuffix(in, ".in") {
 			out = in[:len(in)-len(".in")] + ".golden"
 		}
-		testBlockHuff(t, in, out)
+		t.Run(in, func(t *testing.T) {
+			testBlockHuff(t, in, out)
+		})
 	}
 }
 
@@ -45,6 +47,7 @@ func testBlockHuff(t *testing.T, in, out string) {
 	}
 	var buf bytes.Buffer
 	bw := newHuffmanBitWriter(&buf)
+	bw.logNewTablePenalty = 8
 	bw.writeBlockHuff(false, all, false)
 	bw.flush()
 	got := buf.Bytes()

diff --git a/flate/huffman_code.go b/flate/huffman_code.go
@@ -320,26 +320,44 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
 	h.assignEncodingAndSize(bitCount, list)
 }
 
+func atLeastOne(v float32) float32 {
+	if v < 1 {
+		return 1
+	}
+	return v
+}
+
 // histogramSize accumulates a histogram of b in h.
 // An estimated size in bits is returned.
 // Unassigned values are assigned '1' in the histogram.
 // len(h) must be >= 256, and h's elements must be all zeroes.
-func histogramSize(b []byte, h []uint16, fill bool) int {
+func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
 	h = h[:256]
 	for _, t := range b {
 		h[t]++
 	}
-	invTotal := 1.0 / float64(len(b))
-	shannon := 0.0
-	single := math.Ceil(-math.Log2(invTotal))
-	for i, v := range h[:] {
-		if v > 0 {
-			n := float64(v)
-			shannon += math.Ceil(-math.Log2(n*invTotal) * n)
-		} else if fill {
-			shannon += single
-			h[i] = 1
+	invTotal := 1.0 / float32(len(b))
+	shannon := float32(0.0)
+	var extra float32
+	if fill {
+		oneBits := atLeastOne(-mFastLog2(invTotal))
+		for i, v := range h[:] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+			} else {
+				h[i] = 1
+				extra += oneBits
+			}
+		}
+	} else {
+		for _, v := range h[:] {
+			if v > 0 {
+				n := float32(v)
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
+			}
 		}
 	}
-	return int(shannon + 0.99)
+
+	return int(shannon + 0.99), int(extra + 0.99)
 }
diff --git a/flate/testdata/huffman-rand-limit.golden b/flate/testdata/huffman-rand-limit.golden
diff --git a/flate/testdata/huffman-text-shift.golden b/flate/testdata/huffman-text-shift.golden
diff --git a/flate/testdata/huffman-text.golden b/flate/testdata/huffman-text.golden
diff --git a/flate/token.go b/flate/token.go
@@ -184,9 +184,7 @@ func (t *tokens) indexTokens(in []token) {
 	t.Reset()
 	for _, tok := range in {
 		if tok < matchType {
-			t.tokens[t.n] = tok
-			t.litHist[tok]++
-			t.n++
+			t.AddLiteral(tok.literal())
 			continue
 		}
 		t.AddMatch(uint32(tok.length()), tok.offset())
@@ -211,43 +209,53 @@ func (t *tokens) AddLiteral(lit byte) {
 	t.nLits++
 }
 
+// from https://stackoverflow.com/a/28730362
+func mFastLog2(val float32) float32 {
+	ux := int32(math.Float32bits(val))
+	log2 := (float32)(((ux >> 23) & 255) - 128)
+	ux &= -0x7f800001
+	ux += 127 << 23
+	uval := math.Float32frombits(uint32(ux))
+	log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
+	return log2
+}
+
 // EstimatedBits will return an minimum size estimated by an *optimal*
 // compression of the block.
 // The size of the block
 func (t *tokens) EstimatedBits() int {
-	shannon := float64(0)
+	shannon := float32(0)
 	bits := int(0)
 	nMatches := 0
 	if t.nLits > 0 {
-		invTotal := 1.0 / float64(t.nLits)
+		invTotal := 1.0 / float32(t.nLits)
 		for _, v := range t.litHist[:] {
 			if v > 0 {
-				n := float64(v)
-				shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+				n := float32(v)
+				shannon += -mFastLog2(n*invTotal) * n
 			}
 		}
 		// Just add 15 for EOB
 		shannon += 15
-		for _, v := range t.extraHist[1 : literalCount-256] {
+		for i, v := range t.extraHist[1 : literalCount-256] {
 			if v > 0 {
-				n := float64(v)
-				shannon += math.Ceil(-math.Log2(n*invTotal) * n)
-				bits += int(lengthExtraBits[v&31]) * int(v)
+				n := float32(v)
+				shannon += -mFastLog2(n*invTotal) * n
+				bits += int(lengthExtraBits[i&31]) * int(v)
 				nMatches += int(v)
 			}
 		}
 	}
 	if nMatches > 0 {
-		invTotal := 1.0 / float64(nMatches)
-		for _, v := range t.offHist[:offsetCodeCount] {
+		invTotal := 1.0 / float32(nMatches)
+		for i, v := range t.offHist[:offsetCodeCount] {
 			if v > 0 {
-				n := float64(v)
-				shannon += math.Ceil(-math.Log2(n*invTotal) * n)
-				bits += int(offsetExtraBits[v&31]) * int(n)
+				n := float32(v)
+				shannon += -mFastLog2(n*invTotal) * n
+				bits += int(offsetExtraBits[i&31]) * int(v)
 			}
 		}
 	}
-
 	return int(shannon) + bits
 }
 

diff --git a/flate/token_test.go b/flate/token_test.go
@@ -1,6 +1,7 @@
 package flate
 
 import (
+	"bytes"
 	"io/ioutil"
 	"testing"
 )
@@ -27,8 +28,17 @@ func loadTestTokens(t testFatal) *tokens {
 func Test_tokens_EstimatedBits(t *testing.T) {
 	tok := loadTestTokens(t)
 	// The estimated size, update if method changes.
-	const expect = 199380
-	if n := tok.EstimatedBits(); n != expect {
+	const expect = 221057
+	n := tok.EstimatedBits()
+	var buf bytes.Buffer
+	wr := newHuffmanBitWriter(&buf)
+	wr.writeBlockDynamic(tok, true, nil, true)
+	if wr.err != nil {
+		t.Fatal(wr.err)
+	}
+	wr.flush()
+	t.Log("got:", n, "actual:", buf.Len()*8, "(header not part of estimate)")
+	if n != expect {
 		t.Error("want:", expect, "bits, got:", n)
 	}
 }