Skip to content

Commit

Permalink
Add String() methods to parsed types (#48)
Browse files Browse the repository at this point in the history
This enables clients to move back and forth between parsed objects and
text patches. The generated patches are semantically equal to the parsed
object and should re-parse to the same object, but may not be
byte-for-byte identical to the original input.

In my testing, formatted text patches are usually identical to the
input, but there may be cases where this is not true. Binary patches
always differ. This is because Go's 'compress/flate' package ends
streams with an empty block instead of adding the end-of-stream flag to
the last non-empty block, like Git's C implementation. Since the streams
will always be different for this reason, I chose to also enable default
compression (the test patches I generated with Git used no compression.)

The main tests for this feature involve parsing, formatting, and then
re-parsing a patch to make sure we get equal objects.

Formatting is handled by a new internal formatter type, which allows
writing all data to the same stream. This isn't exposed publicly right
now, but will be useful if there's a need for more flexible formatting
functions in the future, like formatting to a user-provided io.Writer.
  • Loading branch information
bluekeyes authored Aug 11, 2024
1 parent 9e0997e commit 8584cd5
Show file tree
Hide file tree
Showing 20 changed files with 746 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ issues:
exclude-use-default: false

linters-settings:
errcheck:
exclude-functions:
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte
- fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter)
goimports:
local-prefixes: github.com/bluekeyes/go-gitdiff
revive:
Expand Down
43 changes: 41 additions & 2 deletions gitdiff/base85.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ func init() {
}

// base85Decode decodes Base85-encoded data from src into dst. It uses the
// alphabet defined by base85.c in the Git source tree, which appears to be
// unique. src must contain at least len(dst) bytes of encoded data.
// alphabet defined by base85.c in the Git source tree. src must contain at
// least len(dst) bytes of encoded data.
func base85Decode(dst, src []byte) error {
var v uint32
var n, ndst int
Expand Down Expand Up @@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error {
}
return nil
}

// base85Encode encodes src in Base85, writing the result to dst. It uses the
// alphabet defined by base85.c in the Git source tree.
func base85Encode(dst, src []byte) {
var di, si int

encode := func(v uint32) {
dst[di+0] = b85Alpha[(v/(85*85*85*85))%85]
dst[di+1] = b85Alpha[(v/(85*85*85))%85]
dst[di+2] = b85Alpha[(v/(85*85))%85]
dst[di+3] = b85Alpha[(v/85)%85]
dst[di+4] = b85Alpha[v%85]
}

n := (len(src) / 4) * 4
for si < n {
encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]))
si += 4
di += 5
}

var v uint32
switch len(src) - si {
case 3:
v |= uint32(src[si+2]) << 8
fallthrough
case 2:
v |= uint32(src[si+1]) << 16
fallthrough
case 1:
v |= uint32(src[si+0]) << 24
encode(v)
}
}

// base85Len returns the length of n bytes of Base85 encoded data.
func base85Len(n int) int {
return (n + 3) / 4 * 5
}
58 changes: 58 additions & 0 deletions gitdiff/base85_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package gitdiff

import (
"bytes"
"testing"
)

Expand Down Expand Up @@ -58,3 +59,60 @@ func TestBase85Decode(t *testing.T) {
})
}
}

func TestBase85Encode(t *testing.T) {
tests := map[string]struct {
Input []byte
Output string
}{
"zeroBytes": {
Input: []byte{},
Output: "",
},
"twoBytes": {
Input: []byte{0xCA, 0xFE},
Output: "%KiWV",
},
"fourBytes": {
Input: []byte{0x0, 0x0, 0xCA, 0xFE},
Output: "007GV",
},
"sixBytes": {
Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE},
Output: "007GV%KiWV",
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
dst := make([]byte, len(test.Output))
base85Encode(dst, test.Input)
for i, b := range test.Output {
if dst[i] != byte(b) {
t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i])
}
}
})
}
}

func FuzzBase85Roundtrip(f *testing.F) {
f.Add([]byte{0x2b, 0x0d})
f.Add([]byte{0xbc, 0xb4, 0x3f})
f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25})
f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8})

f.Fuzz(func(t *testing.T, in []byte) {
n := len(in)
dst := make([]byte, base85Len(n))
out := make([]byte, n)

base85Encode(dst, in)
if err := base85Decode(out, dst); err != nil {
t.Fatalf("unexpected error decoding base85 data: %v", err)
}
if !bytes.Equal(in, out) {
t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst))
}
})
}
Loading

0 comments on commit 8584cd5

Please sign in to comment.