From da41adc7bdce767853f6d2c6f4029aa03c39920a Mon Sep 17 00:00:00 2001 From: aimuz Date: Mon, 6 Nov 2023 16:19:35 +0800 Subject: [PATCH] internal/zstd: avoid panic when the output buffer is too small MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refactor literal decoding into decodeSymbol function. Added output buffer checking to avoid panic caused by too small output buffer. Benchmarking hasn't changed much goos: darwin goarch: amd64 pkg: internal/zstd cpu: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Large-12 7.823m ± 2% 7.854m ± 6% ~ (p=0.247 n=10) │ old.txt │ new.txt │ │ B/s │ B/s vs base │ Large-12 33.17Mi ± 2% 33.04Mi ± 6% ~ (p=0.217 n=10) │ old.txt │ new.txt │ │ B/op │ B/op vs base │ Large-12 73.62Ki ± 4% 74.32Ki ± 3% ~ (p=0.424 n=10) │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ Large-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal Fixes: #63824 --- src/internal/zstd/fuzz_test.go | 2 ++ src/internal/zstd/literals.go | 62 ++++++++++++++-------------------- 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/src/internal/zstd/fuzz_test.go b/src/internal/zstd/fuzz_test.go index bb6f0a9721f803..b849e124b4d46b 100644 --- a/src/internal/zstd/fuzz_test.go +++ b/src/internal/zstd/fuzz_test.go @@ -22,6 +22,8 @@ var badStrings = []string{ "(\xb5/\xfd\x1002000$\x05\x0010\xcc0\xa8100000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "(\xb5/\xfd\x1002000$\x05\x0000\xcc0\xa8100d\x0000001000000000000000000000000000000000000000000000000000000000000000000000000\x000000000000000000000000000000000000000000000000000000000000000000000000000000", "(\xb5/\xfd001\x00\x0000000000000000000", + "(\xb5/\xfd00\xec\x00\x00&@\x05\x05A7002\x02\x00\x02\x00\x02\x0000000000000000", + "(\xb5/\xfd00\xec\x00\x00V@\x05\x0517002\x02\x00\x02\x00\x02\x0000000000000000", } // This is a simple fuzzer to see if the decompressor panics. diff --git a/src/internal/zstd/literals.go b/src/internal/zstd/literals.go index b46d668f262e0e..4ab0922100ccc7 100644 --- a/src/internal/zstd/literals.go +++ b/src/internal/zstd/literals.go @@ -187,17 +187,13 @@ func (r *Reader) readLiteralsOneStream(data block, off, compressedSize, regenera huffTable := r.huffmanTable huffBits := uint32(r.huffmanTableBits) huffMask := (uint32(1) << huffBits) - 1 - + out := len(outbuf) + outbuf = append(outbuf, make([]byte, regeneratedSize)...) for i := 0; i < regeneratedSize; i++ { - if !rbr.fetch(uint8(huffBits)) { - return nil, rbr.makeError("literals Huffman stream out of bits") + out, err = decodeSymbol(huffBits, huffMask, huffTable, &rbr, outbuf, out) + if err != nil { + return nil, err } - - var t uint16 - idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask - t = huffTable[idx] - outbuf = append(outbuf, byte(t>>8)) - rbr.cnt -= uint32(t & 0xff) } return outbuf, nil @@ -271,60 +267,54 @@ func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, rege regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3 + // grow the buffer outbuf = append(outbuf, make([]byte, regeneratedSize)...) huffTable := r.huffmanTable huffBits := uint32(r.huffmanTableBits) huffMask := (uint32(1) << huffBits) - 1 + // Iterate over the stream, decoding one symbol at a time for i := 0; i < regeneratedStreamSize; i++ { use4 := i < regeneratedStreamSize4 - fetchHuff := func(rbr *reverseBitReader) (uint16, error) { - if !rbr.fetch(uint8(huffBits)) { - return 0, rbr.makeError("literals Huffman stream out of bits") - } - idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask - return huffTable[idx], nil - } + // TODO(aimuz): Parallel execution is possible here, maybe optimised? - t1, err := fetchHuff(&rbr1) + out1, err = decodeSymbol(huffBits, huffMask, huffTable, &rbr1, outbuf, out1) if err != nil { return nil, err } - - t2, err := fetchHuff(&rbr2) + out2, err = decodeSymbol(huffBits, huffMask, huffTable, &rbr2, outbuf, out2) if err != nil { return nil, err } - - t3, err := fetchHuff(&rbr3) + out3, err = decodeSymbol(huffBits, huffMask, huffTable, &rbr3, outbuf, out3) if err != nil { return nil, err } - if use4 { - t4, err := fetchHuff(&rbr4) + out4, err = decodeSymbol(huffBits, huffMask, huffTable, &rbr4, outbuf, out4) if err != nil { return nil, err } - outbuf[out4] = byte(t4 >> 8) - out4++ - rbr4.cnt -= uint32(t4 & 0xff) } + } - outbuf[out1] = byte(t1 >> 8) - out1++ - rbr1.cnt -= uint32(t1 & 0xff) + return outbuf, nil +} - outbuf[out2] = byte(t2 >> 8) - out2++ - rbr2.cnt -= uint32(t2 & 0xff) +func decodeSymbol(huffBits, huffMask uint32, huffTable []uint16, rbr *reverseBitReader, outbuf []byte, off int) (int, error) { + if off >= len(outbuf) { + return 0, rbr.makeError("output buffer too small for output") + } - outbuf[out3] = byte(t3 >> 8) - out3++ - rbr3.cnt -= uint32(t3 & 0xff) + if !rbr.fetch(uint8(huffBits)) { + return 0, rbr.makeError("literals Huffman stream out of bits") } + idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask + t := huffTable[idx] - return outbuf, nil + outbuf[off] = byte(t >> 8) + rbr.cnt -= uint32(t & 0xff) + return off + 1, nil }