-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reuse a byte buffer for holding XML (#3118)
Previously the data was read into a []byte encoded as UTF16. Then that data was converted to []uint16 so that we can use utf16.Decode(). Then the []rune slice was converted to a string which did another data copy. The XML was unmarshalled from the string. This PR changes the code to convert the UTF16 []byte directly to UTF8 and puts the result into a reusable bytes.Buffer. The XML is then unmarshalled directly from the data in buffer. ``` BenchmarkUTF16ToUTF8-4 2000000 1044 ns/op 4 B/op 1 allocs/op ``` ``` git checkout 6ba7700 PS > go test github.com/elastic/beats/winlogbeat/eventlog -run TestBenc -benchtest -benchtime 10s -v === RUN TestBenchmarkBatchReadSize --- PASS: TestBenchmarkBatchReadSize (67.89s) bench_test.go:100: batch_size=10, total_events=30000, batch_time=5.119626ms, events_per_sec=1953.2676801000696, bytes_alloced_per_event=44 kB, total_allocs=7385952 bench_test.go:100: batch_size=100, total_events=30000, batch_time=51.366271ms, events_per_sec=1946.802795943665, bytes_alloced_per_event=44 kB, total_allocs=7354448 bench_test.go:100: batch_size=500, total_events=25000, batch_time=250.974356ms, events_per_sec=1992.2354138842775, bytes_alloced_per_event=43 kB, total_allocs=6125812 bench_test.go:100: batch_size=1000, total_events=30000, batch_time=514.796113ms, events_per_sec=1942.5166094834128, bytes_alloced_per_event=43 kB, total_allocs=7350550 PASS ok github.com/elastic/beats/winlogbeat/eventlog 67.950s git checkout 833a806 (#3113) PS > go test github.com/elastic/beats/winlogbeat/eventlog -run TestBenc -benchtest -benchtime 10s -v === RUN TestBenchmarkBatchReadSize --- PASS: TestBenchmarkBatchReadSize (65.69s) bench_test.go:100: batch_size=10, total_events=30000, batch_time=4.858277ms, events_per_sec=2058.3429063431336, bytes_alloced_per_event=25 kB, total_allocs=7385847 bench_test.go:100: batch_size=100, total_events=30000, batch_time=51.612952ms, events_per_sec=1937.49816906423, bytes_alloced_per_event=24 kB, total_allocs=7354362 bench_test.go:100: batch_size=500, total_events=25000, batch_time=241.713826ms, events_per_sec=2068.561853801445, bytes_alloced_per_event=24 kB, total_allocs=6125757 bench_test.go:100: batch_size=1000, total_events=30000, batch_time=494.961643ms, events_per_sec=2020.3585755431961, bytes_alloced_per_event=24 kB, total_allocs=7350474 PASS ok github.com/elastic/beats/winlogbeat/eventlog 65.747s This PR (#3118) PS > go test github.com/elastic/beats/winlogbeat/eventlog -run TestBenc -benchtest -benchtime 10s -v === RUN TestBenchmarkBatchReadSize --- PASS: TestBenchmarkBatchReadSize (65.80s) bench_test.go:100: batch_size=10, total_events=30000, batch_time=4.925281ms, events_per_sec=2030.341009985014, bytes_alloced_per_event=14 kB, total_allocs=7295817 bench_test.go:100: batch_size=100, total_events=30000, batch_time=48.976134ms, events_per_sec=2041.8108134055658, bytes_alloced_per_event=14 kB, total_allocs=7264329 bench_test.go:100: batch_size=500, total_events=25000, batch_time=250.314316ms, events_per_sec=1997.4886294557757, bytes_alloced_per_event=14 kB, total_allocs=6050719 bench_test.go:100: batch_size=1000, total_events=30000, batch_time=499.861923ms, events_per_sec=2000.5524605641945, bytes_alloced_per_event=14 kB, total_allocs=7260400 PASS ok github.com/elastic/beats/winlogbeat/eventlog 65.856s ```
- Loading branch information
1 parent
7a9f1bf
commit 88d68dc
Showing
7 changed files
with
272 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package sys | ||
|
||
// ByteBuffer is an expandable buffer backed by a byte slice. | ||
type ByteBuffer struct { | ||
buf []byte | ||
offset int | ||
} | ||
|
||
// NewByteBuffer creates a new ByteBuffer with an initial capacity of | ||
// initialSize. | ||
func NewByteBuffer(initialSize int) *ByteBuffer { | ||
return &ByteBuffer{buf: make([]byte, initialSize)} | ||
} | ||
|
||
// Write appends the contents of p to the buffer, growing the buffer as needed. | ||
// The return value is the length of p; err is always nil. | ||
func (b *ByteBuffer) Write(p []byte) (int, error) { | ||
if len(b.buf) < b.offset+len(p) { | ||
// Create a buffer larger than needed so we don't spend lots of time | ||
// allocating and copying. | ||
spaceNeeded := len(b.buf) - b.offset + len(p) | ||
largerBuf := make([]byte, 2*len(b.buf)+spaceNeeded) | ||
copy(largerBuf, b.buf[:b.offset]) | ||
b.buf = largerBuf | ||
} | ||
n := copy(b.buf[b.offset:], p) | ||
b.offset += n | ||
return n, nil | ||
} | ||
|
||
// Reset resets the buffer to be empty. It retains the same underlying storage. | ||
func (b *ByteBuffer) Reset() { | ||
b.offset = 0 | ||
b.buf = b.buf[:cap(b.buf)] | ||
} | ||
|
||
// Bytes returns a slice of length b.Len() holding the bytes that have been | ||
// written to the buffer. | ||
func (b *ByteBuffer) Bytes() []byte { | ||
return b.buf[:b.offset] | ||
} | ||
|
||
// Len returns the number of bytes that have been written to the buffer. | ||
func (b *ByteBuffer) Len() int { | ||
return b.offset | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package sys | ||
|
||
import ( | ||
"bytes" | ||
"io" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
var _ io.Writer = &ByteBuffer{} | ||
|
||
func TestByteBuffer(t *testing.T) { | ||
input := "hello" | ||
length := len(input) | ||
buf := NewByteBuffer(1024) | ||
|
||
n, err := buf.Write([]byte(input)) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
assert.Equal(t, length, n) | ||
|
||
assert.Equal(t, input, string(buf.Bytes())) | ||
assert.Equal(t, length, len(buf.Bytes())) | ||
assert.Equal(t, length, buf.Len()) | ||
} | ||
|
||
func TestByteBufferGrow(t *testing.T) { | ||
input := "hello" | ||
length := len(input) | ||
buf := NewByteBuffer(0) | ||
|
||
n, err := buf.Write([]byte(input)) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
assert.Equal(t, length, n) | ||
|
||
assert.Equal(t, input, string(buf.Bytes())) | ||
assert.Equal(t, length, len(buf.Bytes())) | ||
assert.Equal(t, length, buf.Len()) | ||
assert.Equal(t, length, len(buf.buf)) | ||
|
||
n, err = buf.Write([]byte(input)) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
assert.Equal(t, length, n) | ||
|
||
assert.Equal(t, input+input, string(buf.Bytes())) | ||
assert.Equal(t, 2*length, len(buf.Bytes())) | ||
assert.Equal(t, 2*length, buf.Len()) | ||
} | ||
|
||
func BenchmarkByteBuffer(b *testing.B) { | ||
input := []byte("test writing this sentence to a buffer") | ||
|
||
b.Run("byteBuffer", func(b *testing.B) { | ||
buf := NewByteBuffer(1024) | ||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
buf.Write(input) | ||
buf.Bytes() | ||
buf.Reset() | ||
} | ||
}) | ||
|
||
b.Run("bytes.Buffer", func(b *testing.B) { | ||
buf := bytes.NewBuffer(make([]byte, 0, 1024)) | ||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
buf.Write(input) | ||
buf.Bytes() | ||
buf.Reset() | ||
} | ||
}) | ||
} | ||
|
||
func BenchmarkByteBufferGrow(b *testing.B) { | ||
b.Run("byteBuffer", func(b *testing.B) { | ||
buf := NewByteBuffer(0) | ||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
buf.Write([]byte("a")) | ||
buf.Bytes() | ||
} | ||
}) | ||
|
||
b.Run("bytes.Buffer", func(b *testing.B) { | ||
buf := bytes.NewBuffer(make([]byte, 0)) | ||
b.ResetTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
buf.Write([]byte("a")) | ||
buf.Bytes() | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.