-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GH-13: Set up JNI build (dataset, etc.)
Fixes #13.
- Loading branch information
Showing
5 changed files
with
342 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
name: Test (JNI) | ||
|
||
on: | ||
push: | ||
branches: | ||
- '**' | ||
- '!dependabot/**' | ||
tags: | ||
- '**' | ||
pull_request: | ||
|
||
concurrency: | ||
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | ||
cancel-in-progress: true | ||
|
||
permissions: | ||
contents: read | ||
|
||
env: | ||
DOCKER_VOLUME_PREFIX: ".docker/" | ||
|
||
jobs: | ||
cpp-ubuntu: | ||
name: Build C++ libraries ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} | ||
runs-on: ${{ matrix.platform.runs_on }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
platform: | ||
- runs_on: ubuntu-latest | ||
arch: "x86_64" | ||
archery_arch: "amd64" | ||
archery_arch_alias: "x86_64" | ||
archery_arch_short: "amd64" | ||
env: | ||
# architecture name used for archery build | ||
ARCH: ${{ matrix.platform.archery_arch }} | ||
ARCH_ALIAS: ${{ matrix.platform.archery_arch_alias }} | ||
ARCH_SHORT: ${{ matrix.platform.archery_arch_short }} | ||
steps: | ||
- name: Checkout apache/arrow-java | ||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 | ||
with: | ||
fetch-depth: 0 | ||
submodules: recursive | ||
- name: Checkout apache/arrow | ||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 | ||
with: | ||
repository: apache/arrow | ||
fetch-depth: 0 | ||
path: arrow | ||
submodules: recursive | ||
- name: Build C++ libraries | ||
env: | ||
VCPKG_BINARY_SOURCES: "clear;nuget,GitHub,readwrite" | ||
run: | | ||
docker-compose run vcpkg-jni | ||
- name: Compress into single artifact to keep directory structure | ||
run: tar -cvzf arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz dist/ | ||
- name: Upload artifacts | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: ubuntu-shared-lib-${{ matrix.platform.arch }} | ||
path: arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
ARG base | ||
FROM ${base} | ||
|
||
# Install the libraries required by Gandiva to run | ||
# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva | ||
RUN vcpkg install \ | ||
--clean-after-build \ | ||
--x-install-root=${VCPKG_ROOT}/installed \ | ||
--x-manifest-root=/arrow/ci/vcpkg \ | ||
--x-feature=dev \ | ||
--x-feature=flight \ | ||
--x-feature=gcs \ | ||
--x-feature=json \ | ||
--x-feature=parquet \ | ||
--x-feature=gandiva \ | ||
--x-feature=s3 | ||
|
||
# Install Java | ||
# We need Java for JNI headers, but we don't invoke Maven in this build. | ||
ARG java=11 | ||
RUN yum install -y java-$java-openjdk-devel && yum clean all | ||
|
||
# For ci/scripts/{cpp,java}_*.sh | ||
ENV ARROW_HOME=/tmp/local \ | ||
ARROW_JAVA_CDATA=ON \ | ||
ARROW_JAVA_JNI=ON \ | ||
ARROW_USE_CCACHE=ON |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
#!/usr/bin/env bash | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# This script is like java_jni_build.sh, but is meant for release artifacts | ||
# and hardcodes assumptions about the environment it is being run in. | ||
|
||
set -eo pipefail | ||
|
||
arrow_dir=${1} | ||
build_dir=${2} | ||
normalized_arch=$(arch) | ||
case ${normalized_arch} in | ||
aarch64) | ||
normalized_arch=aarch_64 | ||
;; | ||
esac | ||
# The directory where the final binaries will be stored when scripts finish | ||
dist_dir=${3} | ||
|
||
echo "=== Install Archery ===" | ||
pip install -e "${arrow_dir}/dev/archery[all]" | ||
|
||
echo "=== Clear output directories and leftovers ===" | ||
# Clear output directories and leftovers | ||
rm -rf ${build_dir} | ||
rm -rf "${dist_dir}" | ||
|
||
echo "=== Building Arrow C++ libraries ===" | ||
devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ | ||
grep -o "^[0-9]*") | ||
devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" | ||
: ${ARROW_ACERO:=ON} | ||
export ARROW_ACERO | ||
: ${ARROW_BUILD_TESTS:=ON} | ||
: ${ARROW_DATASET:=ON} | ||
export ARROW_DATASET | ||
: ${ARROW_GANDIVA:=ON} | ||
export ARROW_GANDIVA | ||
: ${ARROW_GCS:=ON} | ||
: ${ARROW_JEMALLOC:=ON} | ||
: ${ARROW_RPATH_ORIGIN:=ON} | ||
: ${ARROW_ORC:=ON} | ||
export ARROW_ORC | ||
: ${ARROW_PARQUET:=ON} | ||
: ${ARROW_S3:=ON} | ||
: ${ARROW_USE_CCACHE:=OFF} | ||
: ${CMAKE_BUILD_TYPE:=release} | ||
: ${CMAKE_UNITY_BUILD:=ON} | ||
: ${VCPKG_ROOT:=/opt/vcpkg} | ||
: ${VCPKG_FEATURE_FLAGS:=-manifests} | ||
: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} | ||
: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread} | ||
|
||
if [ "${ARROW_USE_CCACHE}" == "ON" ]; then | ||
echo "=== ccache statistics before build ===" | ||
ccache -sv 2>/dev/null || ccache -s | ||
fi | ||
|
||
export ARROW_TEST_DATA="${arrow_dir}/testing/data" | ||
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" | ||
export AWS_EC2_METADATA_DISABLED=TRUE | ||
|
||
mkdir -p "${build_dir}/cpp" | ||
pushd "${build_dir}/cpp" | ||
|
||
cmake \ | ||
-DARROW_ACERO=${ARROW_ACERO} \ | ||
-DARROW_BUILD_SHARED=OFF \ | ||
-DARROW_BUILD_TESTS=ON \ | ||
-DARROW_CSV=${ARROW_DATASET} \ | ||
-DARROW_DATASET=${ARROW_DATASET} \ | ||
-DARROW_SUBSTRAIT=${ARROW_DATASET} \ | ||
-DARROW_DEPENDENCY_SOURCE="VCPKG" \ | ||
-DARROW_DEPENDENCY_USE_SHARED=OFF \ | ||
-DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \ | ||
-DARROW_GANDIVA=${ARROW_GANDIVA} \ | ||
-DARROW_GCS=${ARROW_GCS} \ | ||
-DARROW_JEMALLOC=${ARROW_JEMALLOC} \ | ||
-DARROW_ORC=${ARROW_ORC} \ | ||
-DARROW_PARQUET=${ARROW_PARQUET} \ | ||
-DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ | ||
-DARROW_S3=${ARROW_S3} \ | ||
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ | ||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ | ||
-DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ | ||
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ | ||
-DGTest_SOURCE=BUNDLED \ | ||
-DORC_SOURCE=BUNDLED \ | ||
-DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ | ||
-DPARQUET_BUILD_EXAMPLES=OFF \ | ||
-DPARQUET_BUILD_EXECUTABLES=OFF \ | ||
-DPARQUET_REQUIRE_ENCRYPTION=OFF \ | ||
-DVCPKG_MANIFEST_MODE=OFF \ | ||
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ | ||
-GNinja \ | ||
${arrow_dir}/cpp | ||
ninja install | ||
|
||
if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then | ||
# MinIO is required | ||
exclude_tests="arrow-s3fs-test" | ||
case $(arch) in | ||
aarch64) | ||
# GCS testbench is crashed on aarch64: | ||
# ImportError: ../grpc/_cython/cygrpc.cpython-38-aarch64-linux-gnu.so: | ||
# undefined symbol: vtable for std::__cxx11::basic_ostringstream< | ||
# char, std::char_traits<char>, std::allocator<char> > | ||
exclude_tests="${exclude_tests}|arrow-gcsfs-test" | ||
;; | ||
esac | ||
# unstable | ||
exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test" | ||
exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test" | ||
# external dependency | ||
exclude_tests="${exclude_tests}|arrow-gcsfs-test" | ||
# strptime | ||
exclude_tests="${exclude_tests}|arrow-utility-test" | ||
ctest \ | ||
--exclude-regex "${exclude_tests}" \ | ||
--label-regex unittest \ | ||
--output-on-failure \ | ||
--parallel $(nproc) \ | ||
--timeout 300 | ||
fi | ||
|
||
popd | ||
|
||
|
||
JAVA_JNI_CMAKE_ARGS="" | ||
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" | ||
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}" | ||
export JAVA_JNI_CMAKE_ARGS | ||
${arrow_dir}/ci/scripts/java_jni_build.sh \ | ||
${arrow_dir} \ | ||
${ARROW_HOME} \ | ||
${build_dir} \ | ||
${dist_dir} | ||
|
||
if [ "${ARROW_USE_CCACHE}" == "ON" ]; then | ||
echo "=== ccache statistics after build ===" | ||
ccache -sv 2>/dev/null || ccache -s | ||
fi | ||
|
||
|
||
echo "=== Checking shared dependencies for libraries ===" | ||
pushd ${dist_dir} | ||
archery linking check-dependencies \ | ||
--allow ld-linux-aarch64 \ | ||
--allow ld-linux-x86-64 \ | ||
--allow libc \ | ||
--allow libdl \ | ||
--allow libgcc_s \ | ||
--allow libm \ | ||
--allow libpthread \ | ||
--allow librt \ | ||
--allow libstdc++ \ | ||
--allow libz \ | ||
--allow linux-vdso \ | ||
arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.so \ | ||
arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.so \ | ||
arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.so \ | ||
gandiva_jni/${normalized_arch}/libgandiva_jni.so | ||
popd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters