Skip to content

Commit

Permalink
GH-32832: [Go] support building with tinygo (#35723)
Browse files Browse the repository at this point in the history
### Rationale for this change

To support compiling with tinygo which enables use of arrow in environments where binary size is important, like web assembly.

### What changes are included in this PR?

Using an internal JSON package that uses `goccy/go-json` for regular builds as it does currently, but uses the native `encoding/json` for tinygo builds. This is necessary because go-json has a lot of code that is incompatible with tinygo.

Remove dependency on `parquet` package from non-parquet code since it is also incompatible with tinygo.

Other minor tweaks for compatibility with tinygo.

### Are these changes tested?

Should we add a build step that compiles the example with tinygo?

### Are there any user-facing changes?

None.

* Closes: #32832

Lead-authored-by: Chris Casola <ccasola@factset.com>
Co-authored-by: Adam Gaynor <adam.gaynor@factset.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
  • Loading branch information
chriscasola and Adam Gaynor authored Jun 12, 2023
1 parent b642707 commit 245404e
Show file tree
Hide file tree
Showing 67 changed files with 382 additions and 483 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,20 @@ jobs:
- name: Test
shell: bash
run: ci/scripts/go_test.sh $(pwd)

tinygo:
name: TinyGo
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
env:
TINYGO_VERSION: 0.27.0
timeout-minutes: 60
steps:
- name: Checkout Arrow
uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
- name: Build and Run Example
run: |
docker run --rm -v $(pwd)/go:/src -v $(pwd)/ci/scripts:/ci-scripts "tinygo/tinygo:$TINYGO_VERSION" /ci-scripts/go_tinygo_example.sh
27 changes: 27 additions & 0 deletions ci/scripts/go_tinygo_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -ex

cd ~
pushd /src
tinygo build -tags noasm -o ~/example_tinygo arrow/_examples/helloworld/main.go
popd

./example_tinygo
28 changes: 24 additions & 4 deletions go/arrow/_examples/helloworld/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,36 @@
package main

import (
"os"

"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/array"
"github.com/apache/arrow/go/v13/arrow/math"
"github.com/apache/arrow/go/v13/arrow/memory"
)

func main() {
fb := array.NewFloat64Builder(memory.DefaultAllocator)
schema := arrow.NewSchema([]arrow.Field{
{Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable: false},
{Name: "stringField", Type: arrow.BinaryTypes.String, Nullable: false},
{Name: "floatField", Type: arrow.PrimitiveTypes.Float64, Nullable: true},
}, nil)

builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
defer builder.Release()

builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4, 5}, nil)
builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b", "c", "d", "e"}, nil)
builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0, 3, 0, 5}, []bool{true, false, true, false, true})

rec := builder.NewRecord()
defer rec.Release()

fb.AppendValues([]float64{1, 3, 5, 7, 9, 11}, nil)
tbl := array.NewTableFromRecords(schema, []arrow.Record{rec})
defer tbl.Release()

vec := fb.NewFloat64Array()
math.Float64.Sum(vec)
sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64))
if sum != 9 {
defer os.Exit(1)
}
}
3 changes: 2 additions & 1 deletion go/arrow/_tools/tmpl/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package main

import (
"bytes"
"encoding/json"
"flag"
"fmt"
"go/format"
Expand All @@ -28,6 +27,8 @@ import (
"path/filepath"
"strings"
"text/template"

"github.com/apache/arrow/go/v13/internal/json"
)

const Ext = ".tmpl"
Expand Down
18 changes: 9 additions & 9 deletions go/arrow/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
package arrow

import (
"encoding/json"
"fmt"

"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/apache/arrow/go/v13/internal/json"
)

// ArrayData is the underlying memory and metadata of an Arrow array, corresponding
Expand All @@ -32,21 +32,21 @@ import (
// which allows for manipulating the internal data and casting. For example,
// one could cast the raw bytes from int64 to float64 like so:
//
// arrdata := GetMyInt64Data().Data()
// newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(),
// arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset())
// defer newdata.Release()
// float64arr := array.NewFloat64Data(newdata)
// defer float64arr.Release()
// arrdata := GetMyInt64Data().Data()
// newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(),
// arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset())
// defer newdata.Release()
// float64arr := array.NewFloat64Data(newdata)
// defer float64arr.Release()
//
// This is also useful in an analytics setting where memory may be reused. For
// example, if we had a group of operations all returning float64 such as:
//
// Log(Sqrt(Expr(arr)))
// Log(Sqrt(Expr(arr)))
//
// The low-level implementations could have signatures such as:
//
// func Log(values arrow.ArrayData) arrow.ArrayData
// func Log(values arrow.ArrayData) arrow.ArrayData
//
// Another example would be a function that consumes one or more memory buffers
// in an input array and replaces them with newly-allocated data, changing the
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"unsafe"

"github.com/apache/arrow/go/v13/arrow"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

type BinaryLike interface {
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/binarybuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/internal/debug"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

// A BinaryBuilder is used to build a Binary array using the Append methods.
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/boolean.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/bitutil"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

// A type which represents an immutable sequence of boolean values.
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/booleanbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/apache/arrow/go/v13/arrow/bitutil"
"github.com/apache/arrow/go/v13/arrow/internal/debug"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

type BooleanBuilder struct {
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/bitutil"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/decimal128.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
"github.com/apache/arrow/go/v13/arrow/decimal128"
"github.com/apache/arrow/go/v13/arrow/internal/debug"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

// A type which represents an immutable sequence of 128-bit decimal values.
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/decimal256.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
"github.com/apache/arrow/go/v13/arrow/decimal256"
"github.com/apache/arrow/go/v13/arrow/internal/debug"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/goccy/go-json"
"github.com/apache/arrow/go/v13/internal/json"
)

// Decimal256 is a type that represents an immutable sequence of 256-bit decimal values.
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/array/dictionary.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ import (
"github.com/apache/arrow/go/v13/arrow/internal/debug"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/apache/arrow/go/v13/internal/hashing"
"github.com/apache/arrow/go/v13/internal/json"
"github.com/apache/arrow/go/v13/internal/utils"
"github.com/goccy/go-json"
)

// Dictionary represents the type for dictionary-encoded data with a data
Expand Down
6 changes: 3 additions & 3 deletions go/arrow/array/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package array_test

import (
"encoding/json"
"fmt"
"math/rand"
"reflect"
Expand All @@ -27,6 +26,7 @@ import (
"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/array"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/apache/arrow/go/v13/internal/json"
"github.com/apache/arrow/go/v13/internal/types"
)

Expand Down Expand Up @@ -640,13 +640,13 @@ func TestEdits_UnifiedDiff(t *testing.T) {
dataType: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32),
baseJSON: `[
[{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}],
[{"key": "quux", "value": 13}]
[{"key": "quux", "value": 13}],
[]
]`,
targetJSON: `[
[{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}],
[{"key": "ytho", "value": 11}],
[{"key": "quux", "value": 13}]
[{"key": "quux", "value": 13}],
[]
]`,
want: `@@ -1, +1 @@
Expand Down
30 changes: 17 additions & 13 deletions go/arrow/array/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ import (
"github.com/apache/arrow/go/v13/arrow/encoded"
"github.com/apache/arrow/go/v13/arrow/internal/debug"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/apache/arrow/go/v13/internal/json"
"github.com/apache/arrow/go/v13/internal/utils"
"github.com/goccy/go-json"
)

// RunEndEncoded represents an array containing two children:
Expand Down Expand Up @@ -74,22 +74,24 @@ func (r *RunEndEncoded) Release() {
// run, only over the range of run values inside the logical offset/length
// range of the parent array.
//
// Example
// # Example
//
// For this array:
// RunEndEncoded: { Offset: 150, Length: 1500 }
// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ]
// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ]
//
// RunEndEncoded: { Offset: 150, Length: 1500 }
// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ]
// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ]
//
// LogicalValuesArray will return the following array:
// [ "f", "g" ]
//
// [ "f", "g" ]
//
// This is because the offset of 150 tells it to skip the values until
// "f" which corresponds with the logical offset (the run from 10 - 1000),
// and stops after "g" because the length + offset goes to 1650 which is
// within the run from 1000 - 1750, corresponding to the "g" value.
//
// Note
// # Note
//
// The return from this needs to be Released.
func (r *RunEndEncoded) LogicalValuesArray() arrow.Array {
Expand All @@ -109,23 +111,25 @@ func (r *RunEndEncoded) LogicalValuesArray() arrow.Array {
// that are adjusted so the new array can have an offset of 0. As a result
// this method can be expensive to call for an array with a non-zero offset.
//
// Example
// # Example
//
// For this array:
// RunEndEncoded: { Offset: 150, Length: 1500 }
// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ]
// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ]
//
// RunEndEncoded: { Offset: 150, Length: 1500 }
// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ]
// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ]
//
// LogicalRunEndsArray will return the following array:
// [ 850, 1500 ]
//
// [ 850, 1500 ]
//
// This is because the offset of 150 tells us to skip all run-ends less
// than 150 (by finding the physical offset), and we adjust the run-ends
// accordingly (1000 - 150 = 850). The logical length of the array is 1500,
// so we know we don't want to go past the 1750 run end. Thus the last
// run-end is determined by doing: min(1750 - 150, 1500) = 1500.
//
// Note
// # Note
//
// The return from this needs to be Released
func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array {
Expand Down
5 changes: 3 additions & 2 deletions go/arrow/array/encoded_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
package array_test

import (
"encoding/json"
"strings"
"testing"

"github.com/apache/arrow/go/v13/arrow"
"github.com/apache/arrow/go/v13/arrow/array"
"github.com/apache/arrow/go/v13/arrow/memory"
"github.com/apache/arrow/go/v13/internal/json"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -435,7 +436,7 @@ func TestRunEndEncodedUnmarshalNestedJSON(t *testing.T) {
defer bldr.Release()

const testJSON = `
[null, [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, null, 3], [4, 5, null], null, null,
[null, [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, null, 3], [4, 5, null], null, null,
[4, 5, null], [4, 5, null], [4, 5, null]]
`

Expand Down
Loading

0 comments on commit 245404e

Please sign in to comment.