Skip to content

Commit

Permalink
fix: Use buffered writing mode in parquet (#203)
Browse files Browse the repository at this point in the history
Closes #137 

#### Benchmarks
Performed by the following command in `parquet` dir:
```sh
go test \
  -test.run=BenchmarkWrite \
  -test.bench=BenchmarkWrite \
-test.count 10 -test.benchmem -test.benchtime 10000x
```

<details><summary>Before this update</summary>

```
goos: darwin
goarch: arm64
pkg: github.com/cloudquery/filetypes/v3/parquet
BenchmarkWrite-10          10000           4628263 ns/op         5796480 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4480788 ns/op         5796474 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4591783 ns/op         5796471 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4610580 ns/op         5796477 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4524806 ns/op         5796473 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4557667 ns/op         5796466 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4869530 ns/op         5796476 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4894571 ns/op         5796474 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4700499 ns/op         5796468 B/op      44245 allocs/op
BenchmarkWrite-10          10000           4793868 ns/op         5796473 B/op      44245 allocs/op
PASS
ok      github.com/cloudquery/filetypes/v3/parquet      539.889s
```

</details> 

<details><summary>After this update</summary>

```
goos: darwin
goarch: arm64
pkg: github.com/cloudquery/filetypes/v3/parquet
BenchmarkWrite-10          10000            923740 ns/op         1146573 B/op      15695 allocs/op
BenchmarkWrite-10          10000            970047 ns/op         1146193 B/op      15695 allocs/op
BenchmarkWrite-10          10000            920979 ns/op         1146542 B/op      15695 allocs/op
BenchmarkWrite-10          10000            923738 ns/op         1146486 B/op      15695 allocs/op
BenchmarkWrite-10          10000            918581 ns/op         1146055 B/op      15694 allocs/op
BenchmarkWrite-10          10000            906547 ns/op         1146690 B/op      15695 allocs/op
BenchmarkWrite-10          10000            912946 ns/op         1146381 B/op      15695 allocs/op
BenchmarkWrite-10          10000            921024 ns/op         1146378 B/op      15695 allocs/op
BenchmarkWrite-10          10000            905637 ns/op         1146371 B/op      15695 allocs/op
BenchmarkWrite-10          10000            919410 ns/op         1146494 B/op      15695 allocs/op
PASS
ok      github.com/cloudquery/filetypes/v3/parquet      158.831s
```

</details>
  • Loading branch information
candiduslynx authored Jun 14, 2023
1 parent a94a865 commit 5ad67f3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 11 deletions.
2 changes: 1 addition & 1 deletion parquet/write.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (h *Handle) WriteFooter() error {

func (h *Handle) WriteContent(records []arrow.Record) error {
for _, rec := range records {
if err := h.w.Write(transformRecord(h.s, rec)); err != nil {
if err := h.w.WriteBuffered(transformRecord(h.s, rec)); err != nil {
return err
}
}
Expand Down
18 changes: 8 additions & 10 deletions parquet/write_read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func BenchmarkWrite(b *testing.B) {
opts := schema.GenTestDataOptions{
SourceName: sourceName,
SyncTime: syncTime,
MaxRows: 1000,
MaxRows: b.N,
}
records := schema.GenTestData(table, opts)

Expand All @@ -85,14 +85,12 @@ func BenchmarkWrite(b *testing.B) {
var buf bytes.Buffer
writer := bufio.NewWriter(&buf)
b.ResetTimer()
for i := 0; i < b.N; i++ {
if err := types.WriteAll(cl, writer, table, records); err != nil {
b.Fatal(err)
}
err = writer.Flush()
if err != nil {
b.Fatal(err)
}
buf.Reset()

if err := types.WriteAll(cl, writer, table, records); err != nil {
b.Fatal(err)
}
err = writer.Flush()
if err != nil {
b.Fatal(err)
}
}

0 comments on commit 5ad67f3

Please sign in to comment.