diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 785e1c9..b968f68 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -4,3 +4,4 @@ LICENSE NOTICE go.sum build +rat-results.txt diff --git a/errors.go b/errors.go index 801f5c5..c5bd870 100644 --- a/errors.go +++ b/errors.go @@ -24,4 +24,5 @@ var ( ErrNotImplemented = errors.New("not implemented") ErrInvalidArgument = errors.New("invalid argument") ErrInvalidSchema = errors.New("invalid schema") + ErrInvalidTransform = errors.New("invalid transform syntax") ) diff --git a/go.mod b/go.mod index 030d65e..6bea61d 100644 --- a/go.mod +++ b/go.mod @@ -21,11 +21,13 @@ go 1.20 require ( github.com/stretchr/testify v1.8.4 - golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/kr/pretty v0.3.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 172fe17..460287c 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,24 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb h1:mIKbk8weKhSeLH2GmUTrvx8CjkyJmnU1wFmg59CUjFA= -golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/partitions.go b/partitions.go new file mode 100644 index 0000000..c24f082 --- /dev/null +++ b/partitions.go @@ -0,0 +1,232 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +import ( + "encoding/json" + "fmt" + "strings" + + "golang.org/x/exp/slices" +) + +const ( + partitionDataIDStart = 1000 + InitialPartitionSpecID = 0 +) + +// UnpartitionedSpec is the default unpartitioned spec which can +// be used for comparisons or to just provide a convenience for referencing +// the same unpartitioned spec object. +var UnpartitionedSpec = &PartitionSpec{id: 0} + +// PartitionField represents how one partition value is derived from the +// source column by transformation. +type PartitionField struct { + // SourceID is the source column id of the table's schema + SourceID int `json:"source-id"` + // FieldID is the partition field id across all the table partition specs + FieldID int `json:"field-id"` + // Name is the name of the partition field itself + Name string `json:"name"` + // Transform is the transform used to produce the partition value + Transform Transform `json:"transform"` +} + +func (p *PartitionField) String() string { + return fmt.Sprintf("%d: %s: %s(%d)", p.FieldID, p.Name, p.Transform, p.SourceID) +} + +func (p *PartitionField) UnmarshalJSON(b []byte) error { + type Alias PartitionField + aux := struct { + TransformString string `json:"transform"` + *Alias + }{ + Alias: (*Alias)(p), + } + + err := json.Unmarshal(b, &aux) + if err != nil { + return err + } + + if p.Transform, err = ParseTransform(aux.TransformString); err != nil { + return err + } + + return nil +} + +// PartitionSpec captures the transformation from table data to partition values +type PartitionSpec struct { + // any change to a PartitionSpec will produce a new spec id + id int + fields []PartitionField + + // this is populated by initialize after creation + sourceIdToFields map[int][]PartitionField +} + +func NewPartitionSpec(fields ...PartitionField) PartitionSpec { + return NewPartitionSpecID(InitialPartitionSpecID, fields...) +} + +func NewPartitionSpecID(id int, fields ...PartitionField) PartitionSpec { + ret := PartitionSpec{id: id, fields: fields} + ret.initialize() + return ret +} + +// CompatibleWith returns true if this partition spec is considered +// compatible with the passed in partition spec. This means that the two +// specs have equivalent field lists regardless of the spec id. +func (ps *PartitionSpec) CompatibleWith(other *PartitionSpec) bool { + if ps == other { + return true + } + + if len(ps.fields) != len(other.fields) { + return false + } + + return slices.EqualFunc(ps.fields, other.fields, func(left, right PartitionField) bool { + return left.SourceID == right.SourceID && left.Name == right.Name && + left.Transform == right.Transform + }) +} + +// Equals returns true iff the field lists are the same AND the spec id +// is the same between this partition spec and the provided one. +func (ps *PartitionSpec) Equals(other PartitionSpec) bool { + return ps.id == other.id && slices.Equal(ps.fields, other.fields) +} + +func (ps PartitionSpec) MarshalJSON() ([]byte, error) { + if ps.fields == nil { + ps.fields = []PartitionField{} + } + return json.Marshal(struct { + ID int `json:"spec-id"` + Fields []PartitionField `json:"fields"` + }{ps.id, ps.fields}) +} + +func (ps *PartitionSpec) UnmarshalJSON(b []byte) error { + aux := struct { + ID int `json:"spec-id"` + Fields []PartitionField `json:"fields"` + }{ID: ps.id, Fields: ps.fields} + + if err := json.Unmarshal(b, &aux); err != nil { + return err + } + + ps.id, ps.fields = aux.ID, aux.Fields + ps.initialize() + return nil +} + +func (ps *PartitionSpec) initialize() { + ps.sourceIdToFields = make(map[int][]PartitionField) + for _, f := range ps.fields { + ps.sourceIdToFields[f.SourceID] = + append(ps.sourceIdToFields[f.SourceID], f) + } +} + +func (ps *PartitionSpec) ID() int { return ps.id } +func (ps *PartitionSpec) NumFields() int { return len(ps.fields) } +func (ps *PartitionSpec) Field(i int) PartitionField { return ps.fields[i] } + +func (ps *PartitionSpec) IsUnpartitioned() bool { + if len(ps.fields) == 0 { + return true + } + + for _, f := range ps.fields { + if _, ok := f.Transform.(VoidTransform); !ok { + return false + } + } + + return true +} + +func (ps *PartitionSpec) FieldsBySourceID(fieldID int) []PartitionField { + return slices.Clone(ps.sourceIdToFields[fieldID]) +} + +func (ps PartitionSpec) String() string { + var b strings.Builder + b.WriteByte('[') + for i, f := range ps.fields { + if i == 0 { + b.WriteString("\n") + } + b.WriteString("\t") + b.WriteString(f.String()) + b.WriteString("\n") + } + b.WriteByte(']') + + return b.String() +} + +func (ps *PartitionSpec) LastAssignedFieldID() int { + if len(ps.fields) == 0 { + return partitionDataIDStart - 1 + } + + id := ps.fields[0].FieldID + for _, f := range ps.fields[1:] { + if f.FieldID > id { + id = f.FieldID + } + } + return id +} + +// PartitionType produces a struct of the partition spec. +// +// The partition fields should be optional: +// - All partition transforms are required to produce null if the input value +// is null. This can happen when the source column is optional. +// - Partition fields may be added later, in which case not all files would +// have the result field and it may be null. +// +// There is a case where we can guarantee that a partition field in the first +// and only parittion spec that uses a required source column will never be +// null, but it doesn't seem worth tracking this case. +func (ps *PartitionSpec) PartitionType(schema *Schema) *StructType { + nestedFields := []NestedField{} + for _, field := range ps.fields { + sourceType, ok := schema.FindTypeByID(field.SourceID) + if !ok { + continue + } + resultType := field.Transform.ResultType(sourceType) + nestedFields = append(nestedFields, NestedField{ + ID: field.FieldID, + Name: field.Name, + Type: resultType, + Required: false, + }) + } + return &StructType{FieldList: nestedFields} +} diff --git a/partitions_test.go b/partitions_test.go new file mode 100644 index 0000000..fd29190 --- /dev/null +++ b/partitions_test.go @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg_test + +import ( + "encoding/json" + "testing" + + "github.com/apache/iceberg-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPartitionSpec(t *testing.T) { + assert.Equal(t, 999, iceberg.UnpartitionedSpec.LastAssignedFieldID()) + + bucket := iceberg.BucketTransform{NumBuckets: 4} + idField1 := iceberg.PartitionField{ + SourceID: 3, FieldID: 1001, Name: "id", Transform: bucket} + spec1 := iceberg.NewPartitionSpec(idField1) + + assert.Zero(t, spec1.ID()) + assert.Equal(t, 1, spec1.NumFields()) + assert.Equal(t, idField1, spec1.Field(0)) + assert.NotEqual(t, idField1, spec1) + assert.False(t, spec1.IsUnpartitioned()) + assert.True(t, spec1.CompatibleWith(&spec1)) + assert.True(t, spec1.Equals(spec1)) + assert.Equal(t, 1001, spec1.LastAssignedFieldID()) + assert.Equal(t, "[\n\t1001: id: bucket[4](3)\n]", spec1.String()) + + // only differs by PartitionField FieldID + idField2 := iceberg.PartitionField{ + SourceID: 3, FieldID: 1002, Name: "id", Transform: bucket} + spec2 := iceberg.NewPartitionSpec(idField2) + + assert.False(t, spec1.Equals(spec2)) + assert.True(t, spec1.CompatibleWith(&spec2)) + assert.Equal(t, []iceberg.PartitionField{idField1}, spec1.FieldsBySourceID(3)) + assert.Empty(t, spec1.FieldsBySourceID(1925)) + + spec3 := iceberg.NewPartitionSpec(idField1, idField2) + assert.False(t, spec1.CompatibleWith(&spec3)) + assert.Equal(t, 1002, spec3.LastAssignedFieldID()) +} + +func TestUnpartitionedWithVoidField(t *testing.T) { + spec := iceberg.NewPartitionSpec(iceberg.PartitionField{ + SourceID: 3, FieldID: 1001, Name: "void", Transform: iceberg.VoidTransform{}, + }) + + assert.True(t, spec.IsUnpartitioned()) + + spec2 := iceberg.NewPartitionSpec(iceberg.PartitionField{ + SourceID: 3, FieldID: 1001, Name: "void", Transform: iceberg.VoidTransform{}, + }, iceberg.PartitionField{ + SourceID: 3, FieldID: 1002, Name: "bucket", Transform: iceberg.BucketTransform{NumBuckets: 2}, + }) + + assert.False(t, spec2.IsUnpartitioned()) +} + +func TestSerializeUnpartitionedSpec(t *testing.T) { + data, err := json.Marshal(iceberg.UnpartitionedSpec) + require.NoError(t, err) + + assert.JSONEq(t, `{"spec-id": 0, "fields": []}`, string(data)) + assert.True(t, iceberg.UnpartitionedSpec.IsUnpartitioned()) +} + +func TestSerializePartitionSpec(t *testing.T) { + spec := iceberg.NewPartitionSpecID(3, + iceberg.PartitionField{SourceID: 1, FieldID: 1000, + Transform: iceberg.TruncateTransform{Width: 19}, Name: "str_truncate"}, + iceberg.PartitionField{SourceID: 2, FieldID: 1001, + Transform: iceberg.BucketTransform{NumBuckets: 25}, Name: "int_bucket"}, + ) + + data, err := json.Marshal(spec) + require.NoError(t, err) + + assert.JSONEq(t, `{ + "spec-id": 3, + "fields": [ + { + "source-id": 1, + "field-id": 1000, + "transform": "truncate[19]", + "name": "str_truncate" + }, + { + "source-id": 2, + "field-id": 1001, + "transform": "bucket[25]", + "name": "int_bucket" + } + ] + }`, string(data)) + + var outspec iceberg.PartitionSpec + require.NoError(t, json.Unmarshal(data, &outspec)) + + assert.True(t, spec.Equals(outspec)) +} + +func TestPartitionType(t *testing.T) { + spec := iceberg.NewPartitionSpecID(3, + iceberg.PartitionField{SourceID: 1, FieldID: 1000, + Transform: iceberg.TruncateTransform{Width: 19}, Name: "str_truncate"}, + iceberg.PartitionField{SourceID: 2, FieldID: 1001, + Transform: iceberg.BucketTransform{NumBuckets: 25}, Name: "int_bucket"}, + iceberg.PartitionField{SourceID: 3, FieldID: 1002, + Transform: iceberg.IdentityTransform{}, Name: "bool_identity"}, + iceberg.PartitionField{SourceID: 1, FieldID: 1003, + Transform: iceberg.VoidTransform{}, Name: "str_void"}, + ) + + expected := &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 1000, Name: "str_truncate", Type: iceberg.PrimitiveTypes.String}, + {ID: 1001, Name: "int_bucket", Type: iceberg.PrimitiveTypes.Int32}, + {ID: 1002, Name: "bool_identity", Type: iceberg.PrimitiveTypes.Bool}, + {ID: 1003, Name: "str_void", Type: iceberg.PrimitiveTypes.String}, + }, + } + actual := spec.PartitionType(tableSchemaSimple) + assert.Truef(t, expected.Equals(actual), "expected: %s, got: %s", expected, actual) +} diff --git a/schema.go b/schema.go index e9e559b..8edce1f 100644 --- a/schema.go +++ b/schema.go @@ -214,7 +214,7 @@ func (s *Schema) FindFieldByNameCaseInsensitive(name string) (NestedField, bool) return s.FindFieldByID(id) } -// FindFieldByID is like [*Schema.FindColumnByName], but returns the whole +// FindFieldByID is like [*Schema.FindColumnName], but returns the whole // field rather than just the field name. func (s *Schema) FindFieldByID(id int) (NestedField, bool) { idx, _ := s.lazyIDToField() diff --git a/transforms.go b/transforms.go new file mode 100644 index 0000000..58d2011 --- /dev/null +++ b/transforms.go @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +import ( + "encoding" + "fmt" + "strconv" + "strings" +) + +// ParseTransform takes the string representation of a transform as +// defined in the iceberg spec, and produces the appropriate Transform +// object or an error if the string is not a valid transform string. +func ParseTransform(s string) (Transform, error) { + s = strings.ToLower(s) + switch { + case strings.HasPrefix(s, "bucket"): + matches := regexFromBrackets.FindStringSubmatch(s) + if len(matches) != 2 { + break + } + + n, _ := strconv.Atoi(matches[1]) + return BucketTransform{NumBuckets: n}, nil + case strings.HasPrefix(s, "truncate"): + matches := regexFromBrackets.FindStringSubmatch(s) + if len(matches) != 2 { + break + } + + n, _ := strconv.Atoi(matches[1]) + return TruncateTransform{Width: n}, nil + default: + switch s { + case "identity": + return IdentityTransform{}, nil + case "void": + return VoidTransform{}, nil + case "year": + return YearTransform{}, nil + case "month": + return MonthTransform{}, nil + case "day": + return DayTransform{}, nil + case "hour": + return HourTransform{}, nil + } + } + + return nil, fmt.Errorf("%w: %s", ErrInvalidTransform, s) +} + +// Transform is an interface for the various Transformation types +// in partition specs. Currently, they do not yet provide actual +// transformation functions or implementation. That will come later as +// data reading gets implemented. +type Transform interface { + fmt.Stringer + encoding.TextMarshaler + ResultType(t Type) Type +} + +// IdentityTransform uses the identity function, performing no transformation +// but instead partitioning on the value itself. +type IdentityTransform struct{} + +func (t IdentityTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (IdentityTransform) String() string { return "identity" } + +func (IdentityTransform) ResultType(t Type) Type { return t } + +// VoidTransform is a transformation that always returns nil. +type VoidTransform struct{} + +func (t VoidTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (VoidTransform) String() string { return "void" } + +func (VoidTransform) ResultType(t Type) Type { return t } + +// BucketTransform transforms values into a bucket partition value. It is +// parameterized by a number of buckets. Bucket partition transforms use +// a 32-bit hash of the source value to produce a positive value by mod +// the bucket number. +type BucketTransform struct { + NumBuckets int +} + +func (t BucketTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (t BucketTransform) String() string { return fmt.Sprintf("bucket[%d]", t.NumBuckets) } + +func (BucketTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 } + +// TruncateTransform is a transformation for truncating a value to a specified width. +type TruncateTransform struct { + Width int +} + +func (t TruncateTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (t TruncateTransform) String() string { return fmt.Sprintf("truncate[%d]", t.Width) } + +func (TruncateTransform) ResultType(t Type) Type { return t } + +// YearTransform transforms a datetime value into a year value. +type YearTransform struct{} + +func (t YearTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (YearTransform) String() string { return "year" } + +func (YearTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 } + +// MonthTransform transforms a datetime value into a month value. +type MonthTransform struct{} + +func (t MonthTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (MonthTransform) String() string { return "month" } + +func (MonthTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 } + +// DayTransform transforms a datetime value into a date value. +type DayTransform struct{} + +func (t DayTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (DayTransform) String() string { return "day" } + +func (DayTransform) ResultType(Type) Type { return PrimitiveTypes.Date } + +// HourTransform transforms a datetime value into an hour value. +type HourTransform struct{} + +func (t HourTransform) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (HourTransform) String() string { return "hour" } + +func (HourTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 } diff --git a/transforms_test.go b/transforms_test.go new file mode 100644 index 0000000..a455ede --- /dev/null +++ b/transforms_test.go @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg_test + +import ( + "strings" + "testing" + + "github.com/apache/iceberg-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseTransform(t *testing.T) { + tests := []struct { + toparse string + expected iceberg.Transform + }{ + {"identity", iceberg.IdentityTransform{}}, + {"IdEnTiTy", iceberg.IdentityTransform{}}, + {"void", iceberg.VoidTransform{}}, + {"VOId", iceberg.VoidTransform{}}, + {"year", iceberg.YearTransform{}}, + {"yEAr", iceberg.YearTransform{}}, + {"month", iceberg.MonthTransform{}}, + {"MONtH", iceberg.MonthTransform{}}, + {"day", iceberg.DayTransform{}}, + {"DaY", iceberg.DayTransform{}}, + {"hour", iceberg.HourTransform{}}, + {"hOuR", iceberg.HourTransform{}}, + {"bucket[5]", iceberg.BucketTransform{NumBuckets: 5}}, + {"bucket[100]", iceberg.BucketTransform{NumBuckets: 100}}, + {"BUCKET[5]", iceberg.BucketTransform{NumBuckets: 5}}, + {"bUCKeT[100]", iceberg.BucketTransform{NumBuckets: 100}}, + {"truncate[10]", iceberg.TruncateTransform{Width: 10}}, + {"truncate[255]", iceberg.TruncateTransform{Width: 255}}, + {"TRUNCATE[10]", iceberg.TruncateTransform{Width: 10}}, + {"tRuNCATe[255]", iceberg.TruncateTransform{Width: 255}}, + } + + for _, tt := range tests { + t.Run(tt.toparse, func(t *testing.T) { + transform, err := iceberg.ParseTransform(tt.toparse) + require.NoError(t, err) + assert.Equal(t, tt.expected, transform) + + txt, err := transform.MarshalText() + assert.NoError(t, err) + assert.Equal(t, strings.ToLower(tt.toparse), string(txt)) + }) + } + + errorTests := []struct { + name string + toparse string + }{ + {"foobar", "foobar"}, + {"bucket no brackets", "bucket"}, + {"truncate no brackets", "truncate"}, + {"bucket no val", "bucket[]"}, + {"truncate no val", "truncate[]"}, + {"bucket neg", "bucket[-1]"}, + {"truncate neg", "truncate[-1]"}, + } + + for _, tt := range errorTests { + t.Run(tt.name, func(t *testing.T) { + tr, err := iceberg.ParseTransform(tt.toparse) + assert.Nil(t, tr) + assert.ErrorIs(t, err, iceberg.ErrInvalidTransform) + assert.ErrorContains(t, err, tt.toparse) + }) + } +} diff --git a/utils.go b/utils.go index ccb6bbc..907a35f 100644 --- a/utils.go +++ b/utils.go @@ -19,6 +19,7 @@ package iceberg import ( "runtime/debug" + "strings" "golang.org/x/exp/constraints" ) @@ -29,9 +30,9 @@ func init() { version = "(unknown version)" if info, ok := debug.ReadBuildInfo(); ok { for _, dep := range info.Deps { - switch { - case dep.Path == "github.com/apache/iceberg/go/iceberg": + if strings.HasPrefix(dep.Path, "github.com/apache/iceberg-go") { version = dep.Version + break } } }