-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Just like #50 but uses https://github.com/xitongsys/parquet-go instead of https://github.com/segmentio/parquet-go
- Loading branch information
Showing
15 changed files
with
1,526 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package parquet | ||
|
||
type Options func(*Client) | ||
|
||
// Client is a parquet client. | ||
type Client struct { | ||
spec Spec | ||
} | ||
|
||
func NewClient(options ...Options) (*Client, error) { | ||
c := &Client{} | ||
for _, option := range options { | ||
option(c) | ||
} | ||
|
||
return c, nil | ||
} | ||
|
||
func WithSpec(spec Spec) Options { | ||
return func(c *Client) { | ||
c.spec = spec | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package parquet | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
|
||
"github.com/xitongsys/parquet-go/source" | ||
) | ||
|
||
type pqReader struct { | ||
data []byte | ||
|
||
*bytes.Reader | ||
} | ||
|
||
var _ source.ParquetFile = (*pqReader)(nil) | ||
|
||
func newPQReader(data []byte) *pqReader { | ||
bu := make([]byte, len(data)) | ||
copy(bu, data) | ||
|
||
return &pqReader{ | ||
Reader: bytes.NewReader(bu), | ||
data: bu, | ||
} | ||
} | ||
func (pq *pqReader) Open(string) (source.ParquetFile, error) { | ||
return newPQReader(pq.data), nil | ||
} | ||
|
||
func (*pqReader) Close() error { | ||
return nil | ||
} | ||
|
||
func (*pqReader) Write([]byte) (n int, err error) { | ||
return 0, fmt.Errorf("not implemented") | ||
} | ||
|
||
func (*pqReader) Create(string) (source.ParquetFile, error) { | ||
return nil, fmt.Errorf("not implemented") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package parquet | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
|
||
"github.com/cloudquery/plugin-sdk/schema" | ||
"github.com/xitongsys/parquet-go/reader" | ||
) | ||
|
||
func (*Client) Read(f io.Reader, table *schema.Table, sourceName string, res chan<- []any) error { | ||
sourceNameIndex := int64(table.Columns.Index(schema.CqSourceNameColumn.Name)) | ||
if sourceNameIndex == -1 { | ||
return fmt.Errorf("could not find column %s in table %s", schema.CqSourceNameColumn.Name, table.Name) | ||
} | ||
|
||
buf := &bytes.Buffer{} | ||
if _, err := io.Copy(buf, f); err != nil { | ||
return err | ||
} | ||
|
||
s := makeSchema(table.Columns) | ||
r, err := reader.NewParquetReader(newPQReader(buf.Bytes()), s, 2) | ||
if err != nil { | ||
return fmt.Errorf("can't create parquet reader: %w", err) | ||
} | ||
defer r.ReadStop() | ||
|
||
for row := int64(0); row < r.GetNumRows(); row++ { | ||
record := make([]any, len(table.Columns)) | ||
for col := 0; col < len(table.Columns); col++ { | ||
vals, _, _, err := r.ReadColumnByIndex(int64(col), 1) | ||
if err != nil { | ||
return err | ||
} | ||
if len(vals) == 1 { | ||
record[col] = vals[0] | ||
} else { | ||
record[col] = vals | ||
} | ||
} | ||
|
||
if record[sourceNameIndex] == sourceName { | ||
res <- record | ||
} | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package parquet | ||
|
||
import ( | ||
"encoding/json" | ||
"strings" | ||
|
||
"github.com/cloudquery/plugin-sdk/schema" | ||
pschema "github.com/xitongsys/parquet-go/schema" | ||
) | ||
|
||
func makeSchema(cols schema.ColumnList) string { | ||
s := pschema.JSONSchemaItemType{ | ||
Tag: `name=parquet_go_root, repetitiontype=REQUIRED`, | ||
} | ||
|
||
for i := range cols { | ||
tag := `name=` + cols[i].Name | ||
if opts := structOptsForColumn(cols[i]); len(opts) > 0 { | ||
tag += ", " + strings.Join(opts, ", ") | ||
} | ||
s.Fields = append(s.Fields, &pschema.JSONSchemaItemType{Tag: tag}) | ||
} | ||
|
||
b, _ := json.Marshal(s) | ||
return string(b) | ||
} | ||
|
||
func structOptsForColumn(col schema.Column) []string { | ||
opts := []string{} | ||
|
||
switch col.Type { | ||
case schema.TypeJSON: | ||
opts = append(opts, "type=BYTE_ARRAY", "convertedtype=UTF8") | ||
case schema.TypeTimestamp: | ||
opts = append(opts, "type=INT64", "convertedtype=TIMESTAMP_MILLIS") | ||
case schema.TypeString, schema.TypeUUID, schema.TypeCIDR, schema.TypeInet, schema.TypeMacAddr, | ||
schema.TypeStringArray, schema.TypeUUIDArray, schema.TypeCIDRArray, schema.TypeInetArray, schema.TypeMacAddrArray: | ||
opts = append(opts, "type=BYTE_ARRAY", "convertedtype=UTF8") | ||
case schema.TypeFloat: | ||
opts = append(opts, "type=DOUBLE") | ||
case schema.TypeInt, schema.TypeIntArray: | ||
opts = append(opts, "type=INT64") | ||
case schema.TypeByteArray: | ||
opts = append(opts, "type=BYTE_ARRAY") | ||
case schema.TypeBool: | ||
opts = append(opts, "type=BOOLEAN") | ||
default: | ||
panic("unhandled type: " + col.Type.String()) | ||
} | ||
|
||
switch col.Type { | ||
case schema.TypeStringArray, schema.TypeIntArray, schema.TypeUUIDArray, schema.TypeCIDRArray, schema.TypeInetArray, schema.TypeMacAddrArray: | ||
opts = append(opts, "repetitiontype=REPEATED") | ||
default: | ||
if col.CreationOptions.PrimaryKey || col.CreationOptions.IncrementalKey { | ||
opts = append(opts, "repetitiontype=REQUIRED") | ||
} else { | ||
opts = append(opts, "repetitiontype=OPTIONAL") | ||
} | ||
} | ||
|
||
return opts | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package parquet | ||
|
||
type Spec struct { | ||
} | ||
|
||
func (*Spec) SetDefaults() { | ||
} | ||
|
||
func (*Spec) Validate() error { | ||
return nil | ||
} |
Oops, something went wrong.