Skip to content

Commit

Permalink
bzip2: reject missing repeater when decoding RLE1
Browse files Browse the repository at this point in the history
The last repeater value in RLE1 can technically be omitted and still be
non-ambiguous since the end of the buffer implies that the data is complete.
However, the C implementation does not handle this case, so we must reject
it as well.
  • Loading branch information
dsnet committed Dec 31, 2016
1 parent ca9a1d2 commit f98e3a5
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 14 deletions.
6 changes: 4 additions & 2 deletions bzip2/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ func (zr *Reader) Reset(r io.Reader) {

func (zr *Reader) Read(buf []byte) (int, error) {
for {
cnt, _ := zr.rle.Read(buf)
cnt, err := zr.rle.Read(buf)
if err != rleDone && zr.err == nil {
zr.err = err
}
if cnt > 0 {
zr.wantBlkCRC = zr.crc.update(zr.wantBlkCRC, buf[:cnt])
zr.OutputOffset += int64(cnt)
Expand Down Expand Up @@ -105,7 +108,6 @@ func (zr *Reader) Read(buf []byte) (int, error) {
buf := zr.decodeBlock()
zr.rle.Init(buf)
}()
var err error
if zr.InputOffset, err = zr.rd.Flush(); zr.err == nil {
zr.err = err
}
Expand Down
5 changes: 1 addition & 4 deletions bzip2/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,6 @@ func TestReader(t *testing.T) {
output: db(`>>> X:41*4`),
}, {
// RLE1 stage with missing repeater value.
//
// NOTE: The C library rejects this file. Currently, our implementation
// liberally allows this since it simplifies the implementation.
// This may change in the future.
name: "RLE3",
input: db(`>>>
"BZh1"
Expand All @@ -196,6 +192,7 @@ func TestReader(t *testing.T) {
> H48:177245385090 H32:e16e6571
`),
output: db(`>>> X:41*4`),
errf: "IsCorrupted",
}, {
// RLE1 stage with sub-optimal repeater usage.
name: "RLE4",
Expand Down
8 changes: 4 additions & 4 deletions bzip2/rle1.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ import (
"github.com/dsnet/compress/internal/errors"
)

// rleDone is a special "error" to indicate that the RLE buffer is depleted.
var rleDone = errorf(errors.Unknown, "RLE1 buffer is full")
// rleDone is a special "error" to indicate that the RLE stage is done.
var rleDone = errorf(errors.Unknown, "RLE1 stage is completed")

// runLengthEncoding implements the first RLE stage of bzip2. Every sequence
// of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a
// single byte representing the repeat length. Similar to the C bzip2
// implementation, the encoder will always terminate repeat sequences with a
// count (even if it is the end of the buffer), and it will also never produce
// run lengths of 256..259. The decoder, however, will handle both cases.
// run lengths of 256..259. The decoder can handle the latter case.
//
// For example, if the input was:
// input: "AAAAAAABBBBCCCD"
Expand Down Expand Up @@ -75,7 +75,7 @@ func (rle *runLengthEncoding) Read(buf []byte) (int, error) {
switch {
case rle.lastCnt == -4:
if rle.idx >= len(rle.buf) {
return i, rleDone
return i, errorf(errors.Corrupted, "missing terminating run-length repeater")
}
rle.lastCnt = int(rle.buf[rle.idx])
rle.idx++
Expand Down
13 changes: 10 additions & 3 deletions bzip2/rle1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ func TestRunLengthEncoder(t *testing.T) {

buf := make([]byte, 3)
for i, v := range vectors {
rd := strings.NewReader(v.input)
rle := new(runLengthEncoding)
rle.Init(make([]byte, v.size))
_, err := io.CopyBuffer(rle, strings.NewReader(v.input), buf)
_, err := io.CopyBuffer(rle, struct{ io.Reader }{rd}, buf)
output := string(rle.Bytes())

if output != v.output {
Expand All @@ -89,6 +90,7 @@ func TestRunLengthDecoder(t *testing.T) {
vectors := []struct {
input string
output string
fail bool
}{{
input: "",
output: "",
Expand All @@ -98,9 +100,11 @@ func TestRunLengthDecoder(t *testing.T) {
}, {
input: "aaaa",
output: "aaaa",
fail: true,
}, {
input: "baaaa\x00aaaa",
output: "baaaaaaaa",
fail: true,
}, {
input: "abcccc\x00",
output: "abcccc",
Expand Down Expand Up @@ -141,14 +145,17 @@ func TestRunLengthDecoder(t *testing.T) {

buf := make([]byte, 3)
for i, v := range vectors {
rle := new(runLengthEncoding)
wr := new(bytes.Buffer)
rle := new(runLengthEncoding)
rle.Init([]byte(v.input))
io.CopyBuffer(wr, rle, buf)
_, err := io.CopyBuffer(struct{ io.Writer }{wr}, rle, buf)
output := wr.String()

if output != v.output {
t.Errorf("test %d, output mismatch:\ngot %q\nwant %q", i, output, v.output)
}
if fail := err != rleDone; fail != v.fail {
t.Errorf("test %d, failure mismatch: got %t, want %t", i, fail, v.fail)
}
}
}
5 changes: 4 additions & 1 deletion bzip2/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ func (zw *Writer) Write(buf []byte) (int, error) {

cnt := len(buf)
for {
wrCnt, _ := zw.rle.Write(buf)
wrCnt, err := zw.rle.Write(buf)
if err != rleDone && zw.err == nil {
zw.err = err
}
zw.blkCRC = zw.crc.update(zw.blkCRC, buf[:wrCnt])
buf = buf[wrCnt:]
if len(buf) == 0 {
Expand Down

0 comments on commit f98e3a5

Please sign in to comment.