From 245404e2d462f0eb214f42787cdfc4bf5956cd9f Mon Sep 17 00:00:00 2001 From: Chris Hoff Date: Mon, 12 Jun 2023 15:28:53 -0400 Subject: [PATCH] GH-32832: [Go] support building with tinygo (#35723) ### Rationale for this change To support compiling with tinygo which enables use of arrow in environments where binary size is important, like web assembly. ### What changes are included in this PR? Using an internal JSON package that uses `goccy/go-json` for regular builds as it does currently, but uses the native `encoding/json` for tinygo builds. This is necessary because go-json has a lot of code that is incompatible with tinygo. Remove dependency on `parquet` package from non-parquet code since it is also incompatible with tinygo. Other minor tweaks for compatibility with tinygo. ### Are these changes tested? Should we add a build step that compiles the example with tinygo? ### Are there any user-facing changes? None. * Closes: #32832 Lead-authored-by: Chris Casola Co-authored-by: Adam Gaynor Signed-off-by: Matt Topol --- .github/workflows/go.yml | 17 ++ ci/scripts/go_tinygo_example.sh | 27 +++ go/arrow/_examples/helloworld/main.go | 28 ++- go/arrow/_tools/tmpl/main.go | 3 +- go/arrow/array.go | 18 +- go/arrow/array/binary.go | 2 +- go/arrow/array/binarybuilder.go | 2 +- go/arrow/array/boolean.go | 2 +- go/arrow/array/booleanbuilder.go | 2 +- go/arrow/array/builder.go | 2 +- go/arrow/array/decimal128.go | 2 +- go/arrow/array/decimal256.go | 2 +- go/arrow/array/dictionary.go | 2 +- go/arrow/array/diff_test.go | 6 +- go/arrow/array/encoded.go | 30 +-- go/arrow/array/encoded_test.go | 5 +- go/arrow/array/extension.go | 25 ++- go/arrow/array/fixed_size_list.go | 2 +- go/arrow/array/fixedsize_binary.go | 2 +- go/arrow/array/fixedsize_binarybuilder.go | 2 +- go/arrow/array/float16.go | 2 +- go/arrow/array/float16_builder.go | 2 +- go/arrow/array/interval.go | 2 +- go/arrow/array/json_reader.go | 2 +- go/arrow/array/list.go | 2 +- go/arrow/array/map.go | 2 +- go/arrow/array/null.go | 2 +- go/arrow/array/numeric.gen.go | 2 +- go/arrow/array/numeric.gen.go.tmpl | 8 +- go/arrow/array/numeric_test.go | 2 +- go/arrow/array/numericbuilder.gen.go | 2 +- go/arrow/array/numericbuilder.gen.go.tmpl | 8 +- go/arrow/array/record.go | 2 +- go/arrow/array/string.go | 28 +-- go/arrow/array/struct.go | 2 +- go/arrow/array/timestamp.go | 2 +- go/arrow/array/union.go | 2 +- go/arrow/array/util.go | 43 ++-- go/arrow/array/util_test.go | 2 +- go/arrow/bitutil/bitutil.go | 13 +- go/arrow/compute/arithmetic_test.go | 6 +- go/arrow/compute/cast_test.go | 8 +- go/arrow/datatype_fixedwidth.go | 3 +- go/arrow/doc.go | 2 + go/arrow/internal/arrjson/arrjson.go | 2 +- go/arrow/internal/arrjson/reader.go | 2 +- go/arrow/internal/arrjson/writer.go | 2 +- go/arrow/memory/checked_allocator.go | 3 + go/arrow/record.go | 2 +- go/arrow/type_traits_decimal128.go | 16 +- go/arrow/type_traits_decimal256.go | 16 +- go/arrow/type_traits_float16.go | 18 +- go/arrow/type_traits_interval.go | 48 +---- go/arrow/type_traits_numeric.gen.go | 240 +++------------------- go/arrow/type_traits_numeric.gen.go.tmpl | 16 +- go/arrow/type_traits_timestamp.go | 16 +- go/internal/hashing/hash_string.go | 2 +- go/internal/hashing/hash_string_go1.19.go | 2 +- go/internal/hashing/xxh3_memo_table.go | 28 ++- go/internal/json/json.go | 51 +++++ go/internal/json/json_tinygo.go | 51 +++++ go/internal/types/extension_types.go | 3 +- go/internal/types/extension_types_test.go | 2 +- go/parquet/cmd/parquet_reader/main.go | 3 +- go/parquet/schema/logical_types.go | 2 +- go/parquet/schema/logical_types_test.go | 2 +- go/parquet/types.go | 8 + 67 files changed, 382 insertions(+), 483 deletions(-) create mode 100755 ci/scripts/go_tinygo_example.sh create mode 100644 go/internal/json/json.go create mode 100644 go/internal/json/json_tinygo.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index eee2c8cdfc97d..5edc579e69866 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -394,3 +394,20 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) + + tinygo: + name: TinyGo + runs-on: ubuntu-latest + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + env: + TINYGO_VERSION: 0.27.0 + timeout-minutes: 60 + steps: + - name: Checkout Arrow + uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: recursive + - name: Build and Run Example + run: | + docker run --rm -v $(pwd)/go:/src -v $(pwd)/ci/scripts:/ci-scripts "tinygo/tinygo:$TINYGO_VERSION" /ci-scripts/go_tinygo_example.sh diff --git a/ci/scripts/go_tinygo_example.sh b/ci/scripts/go_tinygo_example.sh new file mode 100755 index 0000000000000..7bde56226db7b --- /dev/null +++ b/ci/scripts/go_tinygo_example.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +cd ~ +pushd /src +tinygo build -tags noasm -o ~/example_tinygo arrow/_examples/helloworld/main.go +popd + +./example_tinygo diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go index 3c940ad2fe977..358214464659d 100644 --- a/go/arrow/_examples/helloworld/main.go +++ b/go/arrow/_examples/helloworld/main.go @@ -17,16 +17,36 @@ package main import ( + "os" + + "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/math" "github.com/apache/arrow/go/v13/arrow/memory" ) func main() { - fb := array.NewFloat64Builder(memory.DefaultAllocator) + schema := arrow.NewSchema([]arrow.Field{ + {Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, + {Name: "stringField", Type: arrow.BinaryTypes.String, Nullable: false}, + {Name: "floatField", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + }, nil) + + builder := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer builder.Release() + + builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4, 5}, nil) + builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b", "c", "d", "e"}, nil) + builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0, 3, 0, 5}, []bool{true, false, true, false, true}) + + rec := builder.NewRecord() + defer rec.Release() - fb.AppendValues([]float64{1, 3, 5, 7, 9, 11}, nil) + tbl := array.NewTableFromRecords(schema, []arrow.Record{rec}) + defer tbl.Release() - vec := fb.NewFloat64Array() - math.Float64.Sum(vec) + sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64)) + if sum != 9 { + defer os.Exit(1) + } } diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go index 436d48fb63701..ddbbfb78a035b 100644 --- a/go/arrow/_tools/tmpl/main.go +++ b/go/arrow/_tools/tmpl/main.go @@ -18,7 +18,6 @@ package main import ( "bytes" - "encoding/json" "flag" "fmt" "go/format" @@ -28,6 +27,8 @@ import ( "path/filepath" "strings" "text/template" + + "github.com/apache/arrow/go/v13/internal/json" ) const Ext = ".tmpl" diff --git a/go/arrow/array.go b/go/arrow/array.go index beeaa5ed6472d..c08c568fdf4b7 100644 --- a/go/arrow/array.go +++ b/go/arrow/array.go @@ -17,10 +17,10 @@ package arrow import ( - "encoding/json" "fmt" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" ) // ArrayData is the underlying memory and metadata of an Arrow array, corresponding @@ -32,21 +32,21 @@ import ( // which allows for manipulating the internal data and casting. For example, // one could cast the raw bytes from int64 to float64 like so: // -// arrdata := GetMyInt64Data().Data() -// newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(), -// arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset()) -// defer newdata.Release() -// float64arr := array.NewFloat64Data(newdata) -// defer float64arr.Release() +// arrdata := GetMyInt64Data().Data() +// newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(), +// arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset()) +// defer newdata.Release() +// float64arr := array.NewFloat64Data(newdata) +// defer float64arr.Release() // // This is also useful in an analytics setting where memory may be reused. For // example, if we had a group of operations all returning float64 such as: // -// Log(Sqrt(Expr(arr))) +// Log(Sqrt(Expr(arr))) // // The low-level implementations could have signatures such as: // -// func Log(values arrow.ArrayData) arrow.ArrayData +// func Log(values arrow.ArrayData) arrow.ArrayData // // Another example would be a function that consumes one or more memory buffers // in an input array and replaces them with newly-allocated data, changing the diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index 787059f0718dd..3ed9ff3cd00ac 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -24,7 +24,7 @@ import ( "unsafe" "github.com/apache/arrow/go/v13/arrow" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) type BinaryLike interface { diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go index f86dae8504681..31bc35240bc10 100644 --- a/go/arrow/array/binarybuilder.go +++ b/go/arrow/array/binarybuilder.go @@ -27,7 +27,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A BinaryBuilder is used to build a Binary array using the Append methods. diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go index 0ad9c9b06715e..7584aec98fd6b 100644 --- a/go/arrow/array/boolean.go +++ b/go/arrow/array/boolean.go @@ -24,7 +24,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A type which represents an immutable sequence of boolean values. diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go index c81e44cad4879..1264c53542d01 100644 --- a/go/arrow/array/booleanbuilder.go +++ b/go/arrow/array/booleanbuilder.go @@ -27,7 +27,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) type BooleanBuilder struct { diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index ca8fbe99106d7..1a24e35be2ec1 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -23,7 +23,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) const ( diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go index f943e0c3da442..23d9f5b69d857 100644 --- a/go/arrow/array/decimal128.go +++ b/go/arrow/array/decimal128.go @@ -30,7 +30,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/decimal128" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A type which represents an immutable sequence of 128-bit decimal values. diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index 8cd4476030958..9890c280a02cb 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -30,7 +30,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/decimal256" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index 48f37e91d1012..12223a39431d1 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -33,8 +33,8 @@ import ( "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/apache/arrow/go/v13/internal/hashing" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/internal/utils" - "github.com/goccy/go-json" ) // Dictionary represents the type for dictionary-encoded data with a data diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go index 058dd89ca4315..eeec38d476259 100644 --- a/go/arrow/array/diff_test.go +++ b/go/arrow/array/diff_test.go @@ -17,7 +17,6 @@ package array_test import ( - "encoding/json" "fmt" "math/rand" "reflect" @@ -27,6 +26,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/internal/types" ) @@ -640,13 +640,13 @@ func TestEdits_UnifiedDiff(t *testing.T) { dataType: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32), baseJSON: `[ [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], - [{"key": "quux", "value": 13}] + [{"key": "quux", "value": 13}], [] ]`, targetJSON: `[ [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], [{"key": "ytho", "value": 11}], - [{"key": "quux", "value": 13}] + [{"key": "quux", "value": 13}], [] ]`, want: `@@ -1, +1 @@ diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index 5e9fa652590b4..bd2933695c9e3 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -27,8 +27,8 @@ import ( "github.com/apache/arrow/go/v13/arrow/encoded" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/internal/utils" - "github.com/goccy/go-json" ) // RunEndEncoded represents an array containing two children: @@ -74,22 +74,24 @@ func (r *RunEndEncoded) Release() { // run, only over the range of run values inside the logical offset/length // range of the parent array. // -// Example +// # Example // // For this array: -// RunEndEncoded: { Offset: 150, Length: 1500 } -// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] -// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] +// +// RunEndEncoded: { Offset: 150, Length: 1500 } +// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] +// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] // // LogicalValuesArray will return the following array: -// [ "f", "g" ] +// +// [ "f", "g" ] // // This is because the offset of 150 tells it to skip the values until // "f" which corresponds with the logical offset (the run from 10 - 1000), // and stops after "g" because the length + offset goes to 1650 which is // within the run from 1000 - 1750, corresponding to the "g" value. // -// Note +// # Note // // The return from this needs to be Released. func (r *RunEndEncoded) LogicalValuesArray() arrow.Array { @@ -109,15 +111,17 @@ func (r *RunEndEncoded) LogicalValuesArray() arrow.Array { // that are adjusted so the new array can have an offset of 0. As a result // this method can be expensive to call for an array with a non-zero offset. // -// Example +// # Example // // For this array: -// RunEndEncoded: { Offset: 150, Length: 1500 } -// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] -// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] +// +// RunEndEncoded: { Offset: 150, Length: 1500 } +// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] +// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] // // LogicalRunEndsArray will return the following array: -// [ 850, 1500 ] +// +// [ 850, 1500 ] // // This is because the offset of 150 tells us to skip all run-ends less // than 150 (by finding the physical offset), and we adjust the run-ends @@ -125,7 +129,7 @@ func (r *RunEndEncoded) LogicalValuesArray() arrow.Array { // so we know we don't want to go past the 1750 run end. Thus the last // run-end is determined by doing: min(1750 - 150, 1500) = 1500. // -// Note +// # Note // // The return from this needs to be Released func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array { diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go index c8be6d193ae64..1043af40e9732 100644 --- a/go/arrow/array/encoded_test.go +++ b/go/arrow/array/encoded_test.go @@ -17,13 +17,14 @@ package array_test import ( - "encoding/json" "strings" "testing" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -435,7 +436,7 @@ func TestRunEndEncodedUnmarshalNestedJSON(t *testing.T) { defer bldr.Release() const testJSON = ` - [null, [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, null, 3], [4, 5, null], null, null, + [null, [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, null, 3], [4, 5, null], null, null, [4, 5, null], [4, 5, null], [4, 5, null]] ` diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go index 91c45c740497c..7ed4d79c99d7c 100644 --- a/go/arrow/array/extension.go +++ b/go/arrow/array/extension.go @@ -22,7 +22,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // ExtensionArray is the interface that needs to be implemented to handle @@ -104,10 +104,9 @@ func NewExtensionData(data arrow.ArrayData) ExtensionArray { // ExtensionArrayBase is the base struct for user-defined Extension Array types // and must be embedded in any user-defined extension arrays like so: // -// type UserDefinedArray struct { -// array.ExtensionArrayBase -// } -// +// type UserDefinedArray struct { +// array.ExtensionArrayBase +// } type ExtensionArrayBase struct { array storage arrow.Array @@ -196,18 +195,18 @@ type ExtensionBuilder struct { // the appropriate extension array type and set the storage correctly, resetting the builder for // reuse. // -// Example +// # Example // // Simple example assuming an extension type of a UUID defined as a FixedSizeBinary(16) was registered // using the type name "uuid": // -// uuidType := arrow.GetExtensionType("uuid") -// bldr := array.NewExtensionBuilder(memory.DefaultAllocator, uuidType) -// defer bldr.Release() -// uuidBldr := bldr.StorageBuilder().(*array.FixedSizeBinaryBuilder) -// /* build up the fixed size binary array as usual via Append/AppendValues */ -// uuidArr := bldr.NewExtensionArray() -// defer uuidArr.Release() +// uuidType := arrow.GetExtensionType("uuid") +// bldr := array.NewExtensionBuilder(memory.DefaultAllocator, uuidType) +// defer bldr.Release() +// uuidBldr := bldr.StorageBuilder().(*array.FixedSizeBinaryBuilder) +// /* build up the fixed size binary array as usual via Append/AppendValues */ +// uuidArr := bldr.NewExtensionArray() +// defer uuidArr.Release() // // Because the storage builder is embedded in the Extension builder it also means // that any of the functions available on the Builder interface can be called on diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go index cadcfeb6038ed..c049a11ddadac 100644 --- a/go/arrow/array/fixed_size_list.go +++ b/go/arrow/array/fixed_size_list.go @@ -26,7 +26,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // FixedSizeList represents an immutable sequence of N array values. diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go index c7f0b6f47913e..c55cd24152253 100644 --- a/go/arrow/array/fixedsize_binary.go +++ b/go/arrow/array/fixedsize_binary.go @@ -23,7 +23,7 @@ import ( "strings" "github.com/apache/arrow/go/v13/arrow" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A type which represents an immutable sequence of fixed-length binary strings. diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go index 5939905306bb2..ba7a8dd794ebc 100644 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ b/go/arrow/array/fixedsize_binarybuilder.go @@ -26,7 +26,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go index 9ccecd3dcd121..93045857e1bf4 100644 --- a/go/arrow/array/float16.go +++ b/go/arrow/array/float16.go @@ -22,7 +22,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/float16" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A type which represents an immutable sequence of Float16 values. diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go index 01c04f04e088d..ae65add9d63d6 100644 --- a/go/arrow/array/float16_builder.go +++ b/go/arrow/array/float16_builder.go @@ -28,7 +28,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/float16" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) type Float16Builder struct { diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go index c105e04f8cc09..a6ae678fb313b 100644 --- a/go/arrow/array/interval.go +++ b/go/arrow/array/interval.go @@ -27,7 +27,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) func NewIntervalData(data arrow.ArrayData) arrow.Array { diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go index 393c0094beca8..25f8b5db68ed5 100644 --- a/go/arrow/array/json_reader.go +++ b/go/arrow/array/json_reader.go @@ -25,7 +25,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) type Option func(config) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index ddec6c5307e90..e7b0d3f132caf 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -26,7 +26,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) type ListLike interface { diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index 904ebd5088fbf..e2d074a48d252 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -22,7 +22,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // Map represents an immutable sequence of Key/Value structs. It is a diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go index 80cef00777da9..e207b1f122424 100644 --- a/go/arrow/array/null.go +++ b/go/arrow/array/null.go @@ -26,7 +26,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // Null represents an immutable, degenerate array with no physical storage. diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index 4ceb63d2c4926..dd23f63cc31dc 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -24,7 +24,7 @@ import ( "strings" "github.com/apache/arrow/go/v13/arrow" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // A type which represents an immutable sequence of int64 values. diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl index 44b353069fd05..cd5d920f12dfe 100644 --- a/go/arrow/array/numeric.gen.go.tmpl +++ b/go/arrow/array/numeric.gen.go.tmpl @@ -21,8 +21,8 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v13/arrow" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/internal/json" ) {{range .In}} @@ -92,7 +92,7 @@ func (a *{{.Name}}) ValueStr(i int) string { return a.values[i].FormattedString(a.DataType().(*{{.QualifiedType}}Type).Unit) {{else if (eq .Name "Duration") -}} // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) + return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} return strconv.FormatInt(int64(a.Value(i)), 10) {{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} @@ -116,7 +116,7 @@ func (a *{{.Name}}) GetOneForMarshal(i int) interface{} { return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("15:04:05.999999999") {{else if (eq .Name "Duration") -}} // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit.String()) + return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit.String()) {{else if (eq .Size "1")}} return float64(a.values[i]) // prevent uint8 from being seen as binary data {{else}} diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go index 962b4eb6a598f..dceb00c7ba287 100644 --- a/go/arrow/array/numeric_test.go +++ b/go/arrow/array/numeric_test.go @@ -17,7 +17,6 @@ package array_test import ( - "encoding/json" "math" "reflect" "testing" @@ -25,6 +24,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go index 4f9e68e3d366c..fa89fe2275920 100644 --- a/go/arrow/array/numericbuilder.gen.go +++ b/go/arrow/array/numericbuilder.gen.go @@ -31,7 +31,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) type Int64Builder struct { diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl index c4c12e9b3d176..8e4f123364aa4 100644 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ b/go/arrow/array/numericbuilder.gen.go.tmpl @@ -20,8 +20,8 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" - "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" ) {{range .In}} @@ -206,7 +206,7 @@ func (b *{{.Name}}Builder) AppendValueFromString(s string) error { b.AppendNull() return err } - b.Append(arrow.Date64FromTime(tm)) + b.Append(arrow.Date64FromTime(tm)) {{else if or (eq .Name "Time32") -}} val, err := arrow.Time32FromString(s, b.dtype.Unit) if err != nil { @@ -339,7 +339,7 @@ func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { break } } - + return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf({{.QualifiedType}}(0)), diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go index be32348e9fa0f..67c652cb99c02 100644 --- a/go/arrow/array/record.go +++ b/go/arrow/array/record.go @@ -25,7 +25,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // RecordReader reads a stream of records. diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go index dacf66572e93d..8187edd612f72 100644 --- a/go/arrow/array/string.go +++ b/go/arrow/array/string.go @@ -25,7 +25,7 @@ import ( "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // String represents an immutable sequence of variable-length UTF-8 strings. @@ -79,16 +79,13 @@ func (a *String) ValueOffsets() []int32 { return a.offsets[beg:end] } -func (a *String) ValueBytes() (ret []byte) { +func (a *String) ValueBytes() []byte { beg := a.array.data.offset end := beg + a.array.data.length - data := a.values[a.offsets[beg]:a.offsets[end]] - - s := (*reflect.SliceHeader)(unsafe.Pointer(&ret)) - s.Data = (*reflect.StringHeader)(unsafe.Pointer(&data)).Data - s.Len = len(data) - s.Cap = len(data) - return + if a.array.data.buffers[2] != nil { + return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] + } + return nil } func (a *String) String() string { @@ -221,16 +218,13 @@ func (a *LargeString) ValueOffsets() []int64 { return a.offsets[beg:end] } -func (a *LargeString) ValueBytes() (ret []byte) { +func (a *LargeString) ValueBytes() []byte { beg := a.array.data.offset end := beg + a.array.data.length - data := a.values[a.offsets[beg]:a.offsets[end]] - - s := (*reflect.SliceHeader)(unsafe.Pointer(&ret)) - s.Data = (*reflect.StringHeader)(unsafe.Pointer(&data)).Data - s.Len = len(data) - s.Cap = len(data) - return + if a.array.data.buffers[2] != nil { + return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] + } + return nil } func (a *LargeString) String() string { diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go index 9ebc794006ad5..1ce7dc487f853 100644 --- a/go/arrow/array/struct.go +++ b/go/arrow/array/struct.go @@ -27,7 +27,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // Struct represents an ordered sequence of relative types. diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go index b508006274128..cb7ad89e0d129 100644 --- a/go/arrow/array/timestamp.go +++ b/go/arrow/array/timestamp.go @@ -28,7 +28,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // Timestamp represents an immutable sequence of arrow.Timestamp values. diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go index c868ca2599681..cbc09951012f6 100644 --- a/go/arrow/array/union.go +++ b/go/arrow/array/union.go @@ -30,7 +30,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/apache/arrow/go/v13/internal/bitutils" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) // Union is a convenience interface to encompass both Sparse and Dense diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go index b8521e12faf57..bad9897b5910e 100644 --- a/go/arrow/array/util.go +++ b/go/arrow/array/util.go @@ -26,7 +26,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/bitutil" "github.com/apache/arrow/go/v13/arrow/memory" "github.com/apache/arrow/go/v13/internal/hashing" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" ) func min(a, b int) int { @@ -82,15 +82,16 @@ func WithUseNumber() FromJSONOption { // using the json.Marshal function // // The JSON provided must be formatted in one of two ways: -// Default: the top level of the json must be a list which matches the type specified exactly -// Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], null, [], .....]` for a List type -// Struct arrays are represented a list of objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]` // -// Using WithMultipleDocs: -// If the JSON provided is multiple newline separated json documents, then use this option -// and each json document will be treated as a single row of the array. This is most useful for record batches -// and interacting with other processes that use json. For example: -// `{"col1": 1, "col2": "row1", "col3": ...}\n{"col1": 2, "col2": "row2", "col3": ...}\n.....` +// Default: the top level of the json must be a list which matches the type specified exactly +// Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], null, [], .....]` for a List type +// Struct arrays are represented a list of objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]` +// +// Using WithMultipleDocs: +// If the JSON provided is multiple newline separated json documents, then use this option +// and each json document will be treated as a single row of the array. This is most useful for record batches +// and interacting with other processes that use json. For example: +// `{"col1": 1, "col2": "row1", "col3": ...}\n{"col1": 2, "col2": "row2", "col3": ...}\n.....` // // Duration values get formated upon marshalling as a string consisting of their numeric // value followed by the unit suffix such as "10s" for a value of 10 and unit of Seconds. @@ -100,23 +101,25 @@ func WithUseNumber() FromJSONOption { // to the same values which are output. // // Interval types are marshalled / unmarshalled as follows: -// MonthInterval is marshalled as an object with the format: -// { "months": #} -// DayTimeInterval is marshalled using Go's regular marshalling of structs: -// { "days": #, "milliseconds": # } -// MonthDayNanoInterval values are marshalled the same as DayTime using Go's struct marshalling: -// { "months": #, "days": #, "nanoseconds": # } +// +// MonthInterval is marshalled as an object with the format: +// { "months": #} +// DayTimeInterval is marshalled using Go's regular marshalling of structs: +// { "days": #, "milliseconds": # } +// MonthDayNanoInterval values are marshalled the same as DayTime using Go's struct marshalling: +// { "months": #, "days": #, "nanoseconds": # } // // Times use a format of HH:MM or HH:MM:SS[.zzz] where the fractions of a second cannot // exceed the precision allowed by the time unit, otherwise unmarshalling will error. // -// Dates use YYYY-MM-DD format +// # Dates use YYYY-MM-DD format // // Timestamps use RFC3339Nano format except without a timezone, all of the following are valid: -// YYYY-MM-DD -// YYYY-MM-DD[T]HH -// YYYY-MM-DD[T]HH:MM -// YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzzzz] +// +// YYYY-MM-DD +// YYYY-MM-DD[T]HH +// YYYY-MM-DD[T]HH:MM +// YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzzzz] // // The fractions of a second cannot exceed the precision allowed by the timeunit of the datatype. // diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go index 8822c13bf5525..da11d175e4ad3 100644 --- a/go/arrow/array/util_test.go +++ b/go/arrow/array/util_test.go @@ -31,7 +31,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/decimal256" "github.com/apache/arrow/go/v13/arrow/internal/arrdata" "github.com/apache/arrow/go/v13/arrow/memory" - "github.com/goccy/go-json" + "github.com/apache/arrow/go/v13/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go index 92be9d1a54565..a5d74f2b325a2 100644 --- a/go/arrow/bitutil/bitutil.go +++ b/go/arrow/bitutil/bitutil.go @@ -150,15 +150,12 @@ const ( ) func bytesToUint64(b []byte) []uint64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - var res []uint64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / uint64SizeBytes - s.Cap = h.Cap / uint64SizeBytes + if cap(b) < uint64SizeBytes { + return nil + } - return res + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + return unsafe.Slice((*uint64)(unsafe.Pointer(h.Data)), cap(b)/uint64SizeBytes)[:len(b)/uint64SizeBytes] } var ( diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go index 7117ff8c3de96..c4e0c591cbf54 100644 --- a/go/arrow/compute/arithmetic_test.go +++ b/go/arrow/compute/arithmetic_test.go @@ -2621,7 +2621,7 @@ func (us *UnaryArithmeticSigned[T]) TestAbsoluteValue() { fn(`[]`, `[]`) // scalar/arrays with nulls fn(`[null]`, `[null]`) - fn(`[1, null -10]`, `[1, null, 10]`) + fn(`[1, null, -10]`, `[1, null, 10]`) us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) // scalar/arrays with zeros fn(`[0, -0]`, `[0, 0]`) @@ -2663,7 +2663,7 @@ func (us *UnaryArithmeticSigned[T]) TestNegate() { fn(`[]`, `[]`) // scalar/arrays with nulls fn(`[null]`, `[null]`) - fn(`[1, null -10]`, `[-1, null, 10]`) + fn(`[1, null, -10]`, `[-1, null, 10]`) // ordinary scalars/arrays (positive inputs) fn(`[1, 10, 127]`, `[-1, -10, -127]`) us.assertUnaryOpVals(compute.Negate, 1, -1) @@ -2781,7 +2781,7 @@ func (us *UnaryArithmeticFloating[T]) TestNegate() { fn(`[]`, `[]`) // scalar/arrays with nulls fn(`[null]`, `[null]`) - fn(`[1.5, null -10.25]`, `[-1.5, null, 10.25]`) + fn(`[1.5, null, -10.25]`, `[-1.5, null, 10.25]`) // ordinary scalars/arrays (positive inputs) fn(`[0.5, 10.123, 127.321]`, `[-0.5, -10.123, -127.321]`) us.assertUnaryOpVals(compute.Negate, 1.25, -1.25) diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go index f79286a9220da..e1051a33dfa1d 100644 --- a/go/arrow/compute/cast_test.go +++ b/go/arrow/compute/cast_test.go @@ -657,7 +657,7 @@ func (c *CastSuite) TestDecimal128ToInt() { overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`[ - "12345678901234567890000.0000000000", + "12345678901234567890000.0000000000", "99999999999999999999999.0000000000", null]`), array.WithUseNumber()) defer overflowNoTrunc.Release() @@ -2589,7 +2589,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() { } srcNonNull, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNonNullable...), strings.NewReader(`[ - {"a": 11, "b": 32, "c", 95}, + {"a": 11, "b": 32, "c": 95}, {"a": 23, "b": 46, "c": 11}, {"a": 56, "b": 37, "c": 44} ]`)) @@ -2603,7 +2603,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() { } destNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsDest1Nullable...), strings.NewReader(`[ - {"a": 11, "b": 32, "c", 95}, + {"a": 11, "b": 32, "c": 95}, {"a": 23, "b": 46, "c": 11}, {"a": 56, "b": 37, "c": 44} ]`)) @@ -2644,7 +2644,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() { } srcNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNullable...), strings.NewReader(`[ - {"a": 1, "b": 3, "c", 9}, + {"a": 1, "b": 3, "c": 9}, {"a": null, "b": 4, "c": 11}, {"a": 5, "b": null, "c": 44} ]`)) diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index 9f60149616296..d6550c1cf896d 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -17,11 +17,12 @@ package arrow import ( - "encoding/json" "fmt" "strconv" "time" + "github.com/apache/arrow/go/v13/internal/json" + "golang.org/x/xerrors" ) diff --git a/go/arrow/doc.go b/go/arrow/doc.go index 0df91ea447012..0d8272e7df33f 100644 --- a/go/arrow/doc.go +++ b/go/arrow/doc.go @@ -32,6 +32,8 @@ Requirements Despite the go.mod stating go1.18, everything except for the compute package is able to be built with go1.17 (and most is also compatible with go1.16). + +To build with tinygo include the noasm build tag. */ package arrow diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index f25691f0cf681..2748120b0c4fc 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -21,7 +21,6 @@ package arrjson import ( "bytes" "encoding/hex" - "encoding/json" "fmt" "math/big" "strconv" @@ -36,6 +35,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/internal/dictutils" "github.com/apache/arrow/go/v13/arrow/ipc" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" ) type Schema struct { diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go index f5c788ff5d7cd..6ae397782f4a0 100644 --- a/go/arrow/internal/arrjson/reader.go +++ b/go/arrow/internal/arrjson/reader.go @@ -17,7 +17,6 @@ package arrjson import ( - "encoding/json" "io" "sync/atomic" @@ -25,6 +24,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/arrio" "github.com/apache/arrow/go/v13/arrow/internal/debug" "github.com/apache/arrow/go/v13/arrow/internal/dictutils" + "github.com/apache/arrow/go/v13/internal/json" ) type Reader struct { diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go index 743fa366d071e..7800483a9d0e1 100644 --- a/go/arrow/internal/arrjson/writer.go +++ b/go/arrow/internal/arrjson/writer.go @@ -17,7 +17,6 @@ package arrjson import ( - "encoding/json" "fmt" "io" @@ -25,6 +24,7 @@ import ( "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/arrio" "github.com/apache/arrow/go/v13/arrow/internal/dictutils" + "github.com/apache/arrow/go/v13/internal/json" ) const ( diff --git a/go/arrow/memory/checked_allocator.go b/go/arrow/memory/checked_allocator.go index b5b66b1cd9a5b..78a09a57d74ba 100644 --- a/go/arrow/memory/checked_allocator.go +++ b/go/arrow/memory/checked_allocator.go @@ -14,6 +14,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !tinygo +// +build !tinygo + package memory import ( diff --git a/go/arrow/record.go b/go/arrow/record.go index b93f1015b9688..1d9a1368f46c6 100644 --- a/go/arrow/record.go +++ b/go/arrow/record.go @@ -16,7 +16,7 @@ package arrow -import "encoding/json" +import "github.com/apache/arrow/go/v13/internal/json" // Record is a collection of equal-length arrays matching a particular Schema. // Also known as a RecordBatch in the spec and in some implementations. diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index 0d81cfdfd8ba1..dbe88ae007624 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -49,26 +49,14 @@ func (decimal128Traits) PutValue(b []byte, v decimal128.Num) { func (decimal128Traits) CastFromBytes(b []byte) []decimal128.Num { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []decimal128.Num - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Decimal128SizeBytes - s.Cap = h.Cap / Decimal128SizeBytes - - return res + return unsafe.Slice((*decimal128.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal128SizeBytes)[:len(b)/Decimal128SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (decimal128Traits) CastToBytes(b []decimal128.Num) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Decimal128SizeBytes - s.Cap = h.Cap * Decimal128SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal128SizeBytes)[:len(b)*Decimal128SizeBytes] } // Copy copies src to dst. diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index 38d26ce1002a2..e2b35a7df22ec 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -46,25 +46,13 @@ func (decimal256Traits) PutValue(b []byte, v decimal256.Num) { func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []decimal256.Num - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Decimal256SizeBytes - s.Cap = h.Cap / Decimal256SizeBytes - - return res + return unsafe.Slice((*decimal256.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal256SizeBytes)[:len(b)/Decimal256SizeBytes] } func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Decimal256SizeBytes - s.Cap = h.Cap * Decimal256SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal256SizeBytes)[:len(b)*Decimal256SizeBytes] } func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) } diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index 40e3d2e7466bf..cb7afc2e0c25b 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -20,8 +20,8 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v13/arrow/float16" "github.com/apache/arrow/go/v13/arrow/endian" + "github.com/apache/arrow/go/v13/arrow/float16" ) // Float16 traits @@ -48,26 +48,14 @@ func (float16Traits) PutValue(b []byte, v float16.Num) { func (float16Traits) CastFromBytes(b []byte) []float16.Num { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []float16.Num - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Float16SizeBytes - s.Cap = h.Cap / Float16SizeBytes - - return res + return unsafe.Slice((*float16.Num)(unsafe.Pointer(h.Data)), cap(b)/Float16SizeBytes)[:len(b)/Float16SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (float16Traits) CastToBytes(b []float16.Num) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Float16SizeBytes - s.Cap = h.Cap * Float16SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float16SizeBytes)[:len(b)*Float16SizeBytes] } // Copy copies src to dst. diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index 12d298f6e8f56..aff39effe5c1b 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -59,26 +59,14 @@ func (monthTraits) PutValue(b []byte, v MonthInterval) { func (monthTraits) CastFromBytes(b []byte) []MonthInterval { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []MonthInterval - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / MonthIntervalSizeBytes - s.Cap = h.Cap / MonthIntervalSizeBytes - - return res + return unsafe.Slice((*MonthInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthIntervalSizeBytes)[:len(b)/MonthIntervalSizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (monthTraits) CastToBytes(b []MonthInterval) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * MonthIntervalSizeBytes - s.Cap = h.Cap * MonthIntervalSizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthIntervalSizeBytes)[:len(b)*MonthIntervalSizeBytes] } // Copy copies src to dst. @@ -108,26 +96,14 @@ func (daytimeTraits) PutValue(b []byte, v DayTimeInterval) { func (daytimeTraits) CastFromBytes(b []byte) []DayTimeInterval { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []DayTimeInterval - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / DayTimeIntervalSizeBytes - s.Cap = h.Cap / DayTimeIntervalSizeBytes - - return res + return unsafe.Slice((*DayTimeInterval)(unsafe.Pointer(h.Data)), cap(b)/DayTimeIntervalSizeBytes)[:len(b)/DayTimeIntervalSizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (daytimeTraits) CastToBytes(b []DayTimeInterval) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * DayTimeIntervalSizeBytes - s.Cap = h.Cap * DayTimeIntervalSizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DayTimeIntervalSizeBytes)[:len(b)*DayTimeIntervalSizeBytes] } // Copy copies src to dst. @@ -158,26 +134,14 @@ func (monthDayNanoTraits) PutValue(b []byte, v MonthDayNanoInterval) { func (monthDayNanoTraits) CastFromBytes(b []byte) []MonthDayNanoInterval { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []MonthDayNanoInterval - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / MonthDayNanoIntervalSizeBytes - s.Cap = h.Cap / MonthDayNanoIntervalSizeBytes - - return res + return unsafe.Slice((*MonthDayNanoInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthDayNanoIntervalSizeBytes)[:len(b)/MonthDayNanoIntervalSizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (monthDayNanoTraits) CastToBytes(b []MonthDayNanoInterval) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * MonthDayNanoIntervalSizeBytes - s.Cap = h.Cap * MonthDayNanoIntervalSizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthDayNanoIntervalSizeBytes)[:len(b)*MonthDayNanoIntervalSizeBytes] } // Copy copies src to dst. diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index ba394b67e8303..3330e2616bbe7 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -67,26 +67,14 @@ func (int64Traits) PutValue(b []byte, v int64) { func (int64Traits) CastFromBytes(b []byte) []int64 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []int64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Int64SizeBytes - s.Cap = h.Cap / Int64SizeBytes - - return res + return unsafe.Slice((*int64)(unsafe.Pointer(h.Data)), cap(b)/Int64SizeBytes)[:len(b)/Int64SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (int64Traits) CastToBytes(b []int64) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Int64SizeBytes - s.Cap = h.Cap * Int64SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int64SizeBytes)[:len(b)*Int64SizeBytes] } // Copy copies src to dst. @@ -115,26 +103,14 @@ func (uint64Traits) PutValue(b []byte, v uint64) { func (uint64Traits) CastFromBytes(b []byte) []uint64 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []uint64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Uint64SizeBytes - s.Cap = h.Cap / Uint64SizeBytes - - return res + return unsafe.Slice((*uint64)(unsafe.Pointer(h.Data)), cap(b)/Uint64SizeBytes)[:len(b)/Uint64SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint64Traits) CastToBytes(b []uint64) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Uint64SizeBytes - s.Cap = h.Cap * Uint64SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint64SizeBytes)[:len(b)*Uint64SizeBytes] } // Copy copies src to dst. @@ -163,26 +139,14 @@ func (float64Traits) PutValue(b []byte, v float64) { func (float64Traits) CastFromBytes(b []byte) []float64 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []float64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Float64SizeBytes - s.Cap = h.Cap / Float64SizeBytes - - return res + return unsafe.Slice((*float64)(unsafe.Pointer(h.Data)), cap(b)/Float64SizeBytes)[:len(b)/Float64SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (float64Traits) CastToBytes(b []float64) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Float64SizeBytes - s.Cap = h.Cap * Float64SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float64SizeBytes)[:len(b)*Float64SizeBytes] } // Copy copies src to dst. @@ -211,26 +175,14 @@ func (int32Traits) PutValue(b []byte, v int32) { func (int32Traits) CastFromBytes(b []byte) []int32 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []int32 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Int32SizeBytes - s.Cap = h.Cap / Int32SizeBytes - - return res + return unsafe.Slice((*int32)(unsafe.Pointer(h.Data)), cap(b)/Int32SizeBytes)[:len(b)/Int32SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (int32Traits) CastToBytes(b []int32) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Int32SizeBytes - s.Cap = h.Cap * Int32SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int32SizeBytes)[:len(b)*Int32SizeBytes] } // Copy copies src to dst. @@ -259,26 +211,14 @@ func (uint32Traits) PutValue(b []byte, v uint32) { func (uint32Traits) CastFromBytes(b []byte) []uint32 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []uint32 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Uint32SizeBytes - s.Cap = h.Cap / Uint32SizeBytes - - return res + return unsafe.Slice((*uint32)(unsafe.Pointer(h.Data)), cap(b)/Uint32SizeBytes)[:len(b)/Uint32SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint32Traits) CastToBytes(b []uint32) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Uint32SizeBytes - s.Cap = h.Cap * Uint32SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint32SizeBytes)[:len(b)*Uint32SizeBytes] } // Copy copies src to dst. @@ -307,26 +247,14 @@ func (float32Traits) PutValue(b []byte, v float32) { func (float32Traits) CastFromBytes(b []byte) []float32 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []float32 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Float32SizeBytes - s.Cap = h.Cap / Float32SizeBytes - - return res + return unsafe.Slice((*float32)(unsafe.Pointer(h.Data)), cap(b)/Float32SizeBytes)[:len(b)/Float32SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (float32Traits) CastToBytes(b []float32) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Float32SizeBytes - s.Cap = h.Cap * Float32SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float32SizeBytes)[:len(b)*Float32SizeBytes] } // Copy copies src to dst. @@ -355,26 +283,14 @@ func (int16Traits) PutValue(b []byte, v int16) { func (int16Traits) CastFromBytes(b []byte) []int16 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []int16 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Int16SizeBytes - s.Cap = h.Cap / Int16SizeBytes - - return res + return unsafe.Slice((*int16)(unsafe.Pointer(h.Data)), cap(b)/Int16SizeBytes)[:len(b)/Int16SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (int16Traits) CastToBytes(b []int16) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Int16SizeBytes - s.Cap = h.Cap * Int16SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int16SizeBytes)[:len(b)*Int16SizeBytes] } // Copy copies src to dst. @@ -403,26 +319,14 @@ func (uint16Traits) PutValue(b []byte, v uint16) { func (uint16Traits) CastFromBytes(b []byte) []uint16 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []uint16 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Uint16SizeBytes - s.Cap = h.Cap / Uint16SizeBytes - - return res + return unsafe.Slice((*uint16)(unsafe.Pointer(h.Data)), cap(b)/Uint16SizeBytes)[:len(b)/Uint16SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint16Traits) CastToBytes(b []uint16) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Uint16SizeBytes - s.Cap = h.Cap * Uint16SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint16SizeBytes)[:len(b)*Uint16SizeBytes] } // Copy copies src to dst. @@ -451,26 +355,14 @@ func (int8Traits) PutValue(b []byte, v int8) { func (int8Traits) CastFromBytes(b []byte) []int8 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []int8 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Int8SizeBytes - s.Cap = h.Cap / Int8SizeBytes - - return res + return unsafe.Slice((*int8)(unsafe.Pointer(h.Data)), cap(b)/Int8SizeBytes)[:len(b)/Int8SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (int8Traits) CastToBytes(b []int8) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Int8SizeBytes - s.Cap = h.Cap * Int8SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int8SizeBytes)[:len(b)*Int8SizeBytes] } // Copy copies src to dst. @@ -499,26 +391,14 @@ func (uint8Traits) PutValue(b []byte, v uint8) { func (uint8Traits) CastFromBytes(b []byte) []uint8 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []uint8 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Uint8SizeBytes - s.Cap = h.Cap / Uint8SizeBytes - - return res + return unsafe.Slice((*uint8)(unsafe.Pointer(h.Data)), cap(b)/Uint8SizeBytes)[:len(b)/Uint8SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint8Traits) CastToBytes(b []uint8) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Uint8SizeBytes - s.Cap = h.Cap * Uint8SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint8SizeBytes)[:len(b)*Uint8SizeBytes] } // Copy copies src to dst. @@ -547,26 +427,14 @@ func (time32Traits) PutValue(b []byte, v Time32) { func (time32Traits) CastFromBytes(b []byte) []Time32 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []Time32 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Time32SizeBytes - s.Cap = h.Cap / Time32SizeBytes - - return res + return unsafe.Slice((*Time32)(unsafe.Pointer(h.Data)), cap(b)/Time32SizeBytes)[:len(b)/Time32SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (time32Traits) CastToBytes(b []Time32) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Time32SizeBytes - s.Cap = h.Cap * Time32SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time32SizeBytes)[:len(b)*Time32SizeBytes] } // Copy copies src to dst. @@ -595,26 +463,14 @@ func (time64Traits) PutValue(b []byte, v Time64) { func (time64Traits) CastFromBytes(b []byte) []Time64 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []Time64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Time64SizeBytes - s.Cap = h.Cap / Time64SizeBytes - - return res + return unsafe.Slice((*Time64)(unsafe.Pointer(h.Data)), cap(b)/Time64SizeBytes)[:len(b)/Time64SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (time64Traits) CastToBytes(b []Time64) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Time64SizeBytes - s.Cap = h.Cap * Time64SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time64SizeBytes)[:len(b)*Time64SizeBytes] } // Copy copies src to dst. @@ -643,26 +499,14 @@ func (date32Traits) PutValue(b []byte, v Date32) { func (date32Traits) CastFromBytes(b []byte) []Date32 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []Date32 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Date32SizeBytes - s.Cap = h.Cap / Date32SizeBytes - - return res + return unsafe.Slice((*Date32)(unsafe.Pointer(h.Data)), cap(b)/Date32SizeBytes)[:len(b)/Date32SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (date32Traits) CastToBytes(b []Date32) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Date32SizeBytes - s.Cap = h.Cap * Date32SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date32SizeBytes)[:len(b)*Date32SizeBytes] } // Copy copies src to dst. @@ -691,26 +535,14 @@ func (date64Traits) PutValue(b []byte, v Date64) { func (date64Traits) CastFromBytes(b []byte) []Date64 { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []Date64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / Date64SizeBytes - s.Cap = h.Cap / Date64SizeBytes - - return res + return unsafe.Slice((*Date64)(unsafe.Pointer(h.Data)), cap(b)/Date64SizeBytes)[:len(b)/Date64SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (date64Traits) CastToBytes(b []Date64) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * Date64SizeBytes - s.Cap = h.Cap * Date64SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date64SizeBytes)[:len(b)*Date64SizeBytes] } // Copy copies src to dst. @@ -739,26 +571,14 @@ func (durationTraits) PutValue(b []byte, v Duration) { func (durationTraits) CastFromBytes(b []byte) []Duration { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []Duration - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / DurationSizeBytes - s.Cap = h.Cap / DurationSizeBytes - - return res + return unsafe.Slice((*Duration)(unsafe.Pointer(h.Data)), cap(b)/DurationSizeBytes)[:len(b)/DurationSizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (durationTraits) CastToBytes(b []Duration) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * DurationSizeBytes - s.Cap = h.Cap * DurationSizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DurationSizeBytes)[:len(b)*DurationSizeBytes] } // Copy copies src to dst. diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index f5f6a53d74bce..b2af18579af93 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -68,26 +68,14 @@ func ({{.name}}Traits) PutValue(b []byte, v {{.Type}}) { func ({{.name}}Traits) CastFromBytes(b []byte) []{{.Type}} { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []{{.Type}} - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len/{{.Name}}SizeBytes - s.Cap = h.Cap/{{.Name}}SizeBytes - - return res + return unsafe.Slice((*{{.Type}})(unsafe.Pointer(h.Data)), cap(b)/{{.Name}}SizeBytes)[:len(b)/{{.Name}}SizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func ({{.name}}Traits) CastToBytes(b []{{.Type}}) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len*{{.Name}}SizeBytes - s.Cap = h.Cap*{{.Name}}SizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*{{.Name}}SizeBytes)[:len(b)*{{.Name}}SizeBytes] } // Copy copies src to dst. diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go index ade215e629be5..e39867309c06b 100644 --- a/go/arrow/type_traits_timestamp.go +++ b/go/arrow/type_traits_timestamp.go @@ -45,26 +45,14 @@ func (timestampTraits) PutValue(b []byte, v Timestamp) { func (timestampTraits) CastFromBytes(b []byte) []Timestamp { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []Timestamp - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len / TimestampSizeBytes - s.Cap = h.Cap / TimestampSizeBytes - - return res + return unsafe.Slice((*Timestamp)(unsafe.Pointer(h.Data)), cap(b)/TimestampSizeBytes)[:len(b)/TimestampSizeBytes] } // CastToBytes reinterprets the slice b to a slice of bytes. func (timestampTraits) CastToBytes(b []Timestamp) []byte { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - var res []byte - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = h.Len * TimestampSizeBytes - s.Cap = h.Cap * TimestampSizeBytes - - return res + return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*TimestampSizeBytes)[:len(b)*TimestampSizeBytes] } // Copy copies src to dst. diff --git a/go/internal/hashing/hash_string.go b/go/internal/hashing/hash_string.go index 6cd49517184c3..b772c7d7f8998 100644 --- a/go/internal/hashing/hash_string.go +++ b/go/internal/hashing/hash_string.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.20 +//go:build go1.20 || tinygo package hashing diff --git a/go/internal/hashing/hash_string_go1.19.go b/go/internal/hashing/hash_string_go1.19.go index a421d28409b5e..c496f43abdcc6 100644 --- a/go/internal/hashing/hash_string_go1.19.go +++ b/go/internal/hashing/hash_string_go1.19.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !go1.20 +//go:build !go1.20 && !tinygo package hashing diff --git a/go/internal/hashing/xxh3_memo_table.go b/go/internal/hashing/xxh3_memo_table.go index 5ec4d80d4bea4..67e2aef380488 100644 --- a/go/internal/hashing/xxh3_memo_table.go +++ b/go/internal/hashing/xxh3_memo_table.go @@ -24,8 +24,6 @@ import ( "math" "reflect" "unsafe" - - "github.com/apache/arrow/go/v13/parquet" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=types.tmpldata xxh3_memo_table.gen.go.tmpl @@ -34,6 +32,10 @@ type TypeTraits interface { BytesRequired(n int) int } +type ByteSlice interface { + Bytes() []byte +} + // MemoTable interface for hash tables and dictionary encoding. // // Values will remember the order they are inserted to generate a valid @@ -166,16 +168,14 @@ func (s *BinaryMemoTable) Size() int { } // helper function to easily return a byte slice for any given value -// regardless of the type if it's a []byte, parquet.ByteArray, -// parquet.FixedLenByteArray or string. +// regardless of the type if it's a []byte, string, or fulfills the +// ByteSlice interface. func (BinaryMemoTable) valAsByteSlice(val interface{}) []byte { switch v := val.(type) { case []byte: return v - case parquet.ByteArray: - return *(*[]byte)(unsafe.Pointer(&v)) - case parquet.FixedLenByteArray: - return *(*[]byte)(unsafe.Pointer(&v)) + case ByteSlice: + return v.Bytes() case string: var out []byte h := (*reflect.StringHeader)(unsafe.Pointer(&v)) @@ -196,10 +196,8 @@ func (BinaryMemoTable) getHash(val interface{}) uint64 { return hashString(v, 0) case []byte: return Hash(v, 0) - case parquet.ByteArray: - return Hash(*(*[]byte)(unsafe.Pointer(&v)), 0) - case parquet.FixedLenByteArray: - return Hash(*(*[]byte)(unsafe.Pointer(&v)), 0) + case ByteSlice: + return Hash(v.Bytes(), 0) default: panic("invalid type for binarymemotable") } @@ -213,10 +211,8 @@ func (b *BinaryMemoTable) appendVal(val interface{}) { b.builder.AppendString(v) case []byte: b.builder.Append(v) - case parquet.ByteArray: - b.builder.Append(*(*[]byte)(unsafe.Pointer(&v))) - case parquet.FixedLenByteArray: - b.builder.Append(*(*[]byte)(unsafe.Pointer(&v))) + case ByteSlice: + b.builder.Append(v.Bytes()) } } diff --git a/go/internal/json/json.go b/go/internal/json/json.go new file mode 100644 index 0000000000000..319b12c5549c2 --- /dev/null +++ b/go/internal/json/json.go @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !tinygo +// +build !tinygo + +package json + +import ( + "io" + + "github.com/goccy/go-json" +) + +type Decoder = json.Decoder +type Encoder = json.Encoder +type Marshaler = json.Marshaler +type Delim = json.Delim +type UnmarshalTypeError = json.UnmarshalTypeError +type Number = json.Number +type Unmarshaler = json.Unmarshaler +type RawMessage = json.RawMessage + +func Marshal(v interface{}) ([]byte, error) { + return json.Marshal(v) +} + +func Unmarshal(data []byte, v interface{}) error { + return json.Unmarshal(data, v) +} + +func NewDecoder(r io.Reader) *Decoder { + return json.NewDecoder(r) +} + +func NewEncoder(w io.Writer) *Encoder { + return json.NewEncoder(w) +} diff --git a/go/internal/json/json_tinygo.go b/go/internal/json/json_tinygo.go new file mode 100644 index 0000000000000..8e4f447b3c385 --- /dev/null +++ b/go/internal/json/json_tinygo.go @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build tinygo +// +build tinygo + +package json + +import ( + "io" + + "encoding/json" +) + +type Decoder = json.Decoder +type Encoder = json.Encoder +type Marshaler = json.Marshaler +type Delim = json.Delim +type UnmarshalTypeError = json.UnmarshalTypeError +type Number = json.Number +type Unmarshaler = json.Unmarshaler +type RawMessage = json.RawMessage + +func Marshal(v interface{}) ([]byte, error) { + return json.Marshal(v) +} + +func Unmarshal(data []byte, v interface{}) error { + return json.Unmarshal(data, v) +} + +func NewDecoder(r io.Reader) *Decoder { + return json.NewDecoder(r) +} + +func NewEncoder(w io.Writer) *Encoder { + return json.NewEncoder(w) +} diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go index ee3b2ddd9f6fa..ee7349523404e 100644 --- a/go/internal/types/extension_types.go +++ b/go/internal/types/extension_types.go @@ -24,10 +24,9 @@ import ( "reflect" "strings" - "github.com/goccy/go-json" - "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" + "github.com/apache/arrow/go/v13/internal/json" "github.com/google/uuid" "golang.org/x/xerrors" ) diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go index 51764b0e8fab0..9d89c9aa7d05c 100644 --- a/go/internal/types/extension_types_test.go +++ b/go/internal/types/extension_types_test.go @@ -18,12 +18,12 @@ package types_test import ( "bytes" - "encoding/json" "testing" "github.com/apache/arrow/go/v13/arrow" "github.com/apache/arrow/go/v13/arrow/array" "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/internal/types" "github.com/google/uuid" "github.com/stretchr/testify/assert" diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go index b06ee29c28c58..d0d99f35af0ee 100644 --- a/go/parquet/cmd/parquet_reader/main.go +++ b/go/parquet/cmd/parquet_reader/main.go @@ -18,7 +18,6 @@ package main import ( "bufio" - "encoding/json" "fmt" "io" "log" @@ -26,10 +25,12 @@ import ( "strconv" "strings" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/parquet" "github.com/apache/arrow/go/v13/parquet/file" "github.com/apache/arrow/go/v13/parquet/metadata" "github.com/apache/arrow/go/v13/parquet/schema" + "github.com/docopt/docopt-go" ) diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go index 92832541c353f..ade6e750adacb 100644 --- a/go/parquet/schema/logical_types.go +++ b/go/parquet/schema/logical_types.go @@ -17,10 +17,10 @@ package schema import ( - "encoding/json" "fmt" "math" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/parquet" "github.com/apache/arrow/go/v13/parquet/internal/debug" format "github.com/apache/arrow/go/v13/parquet/internal/gen-go/parquet" diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go index 7b68ead4af09d..540899d79a02a 100644 --- a/go/parquet/schema/logical_types_test.go +++ b/go/parquet/schema/logical_types_test.go @@ -17,9 +17,9 @@ package schema_test import ( - "encoding/json" "testing" + "github.com/apache/arrow/go/v13/internal/json" "github.com/apache/arrow/go/v13/parquet" "github.com/apache/arrow/go/v13/parquet/schema" "github.com/stretchr/testify/assert" diff --git a/go/parquet/types.go b/go/parquet/types.go index e0a9e13697573..d1e90a3a01161 100644 --- a/go/parquet/types.go +++ b/go/parquet/types.go @@ -131,6 +131,10 @@ func (b ByteArray) String() string { return *(*string)(unsafe.Pointer(&b)) } +func (b ByteArray) Bytes() []byte { + return b +} + type byteArrayTraits struct{} func (byteArrayTraits) BytesRequired(n int) int { @@ -162,6 +166,10 @@ func (b FixedLenByteArray) String() string { return *(*string)(unsafe.Pointer(&b)) } +func (b FixedLenByteArray) Bytes() []byte { + return b +} + type fixedLenByteArrayTraits struct{} func (fixedLenByteArrayTraits) BytesRequired(n int) int {