Skip to content

Commit

Permalink
fix XLA CUDA plugin build (#440)
Browse files Browse the repository at this point in the history
* pin OS versions (macOS 14, Ubuntu 24.04)
* pin CUDA 12.6.2, cuDNN 9.6.0
* use custom image to build OpenXLA targets
* add custom XLA CUDA runtime Dockerfile
  • Loading branch information
joelberkeley authored Jan 12, 2025
1 parent fb808c1 commit 7cbcd2d
Show file tree
Hide file tree
Showing 17 changed files with 193 additions and 113 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/c-xla-version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:

jobs:
backend-version:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
with:
Expand Down
154 changes: 96 additions & 58 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ on:
pull_request:
branches: master

concurrency:
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
cancel-in-progress: true

jobs:
shellcheck:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- name: Install shellcheck
Expand All @@ -14,26 +18,43 @@ jobs:
run: |
shopt -s extglob nullglob globstar
shellcheck **/*.sh
openxla-dev-docker:
runs-on: ubuntu-24.04
steps:
- uses: docker/setup-buildx-action@v3
- name: Build and export
uses: docker/build-push-action@v6
with:
file: openxla-dev.Dockerfile
tags: openxla-dev
outputs: type=docker,dest=/tmp/openxla-dev.tar
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: openxla-dev
path: /tmp/openxla-dev.tar
pjrt-linux-x86_64:
runs-on: ubuntu-latest
needs:
- openxla-dev-docker
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Download bazel Docker image
uses: actions/download-artifact@v4
with:
name: openxla-dev
path: /tmp
- name: Build PJRT + XLA binary
run: |
if [ ! "$(git diff --exit-code HEAD^ spidr/backend/VERSION)" ]; then
curl -LO --fail-with-body "https://github.com/joelberkeley/spidr/releases/download/c-xla-v$(cat spidr/backend/VERSION)/libc_xla-linux-x86_64.so"
else
# free up space not used if running in Docker, see
# https://github.com/orgs/community/discussions/25678#discussioncomment-5242449
rm -rf /opt/hostedtoolcache
docker run \
-v $(pwd):/spidr -w /spidr \
tensorflow/build:latest-python3.9 \
sh -c "spidr/backend/build.sh"
docker load --input /tmp/openxla-dev.tar
docker run -v $(pwd):/spidr -w /spidr openxla-dev sh -c "./spidr/backend/build.sh"
fi
mv libc_xla-linux-x86_64.so libc_xla.so
- name: Upload binary
uses: actions/upload-artifact@v4
Expand All @@ -42,7 +63,7 @@ jobs:
path: libc_xla.so
if-no-files-found: error
pjrt-darwin-aarch64:
runs-on: macos-latest
runs-on: macos-14
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -54,7 +75,7 @@ jobs:
else
./spidr/backend/build.sh
fi
mv libc_xla-darwin-aarch64.dylib libc_xla.dylib
- name: Upload binary
uses: actions/upload-artifact@v4
Expand All @@ -63,25 +84,29 @@ jobs:
path: libc_xla.dylib
if-no-files-found: error
pjrt-plugin-xla-cpu-linux-x86_64:
runs-on: ubuntu-latest
needs:
- openxla-dev-docker
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Download bazel Docker image
uses: actions/download-artifact@v4
with:
name: openxla-dev
path: /tmp
- name: Build or fetch XLA CPU PJRT plugin
run: |
if [ ! "$(git diff --exit-code HEAD^ XLA_VERSION)" ]; then
. ./dev.sh
rev=$(cat XLA_VERSION)
curl -LO --fail-with-body "https://github.com/joelberkeley/spidr/releases/download/xla-$(short_revision $rev)/pjrt_plugin_xla_cpu-linux-x86_64.so"
else
rm -rf /opt/hostedtoolcache
docker run \
-v $(pwd):/spidr -w /spidr \
tensorflow/build:latest-python3.9 \
sh -c "pjrt-plugins/xla-cpu/build.sh"
docker load --input /tmp/openxla-dev.tar
docker run -v $(pwd):/spidr -w /spidr openxla-dev sh -c "./pjrt-plugins/xla-cpu/build.sh"
fi
mv pjrt_plugin_xla_cpu-linux-x86_64.so pjrt_plugin_xla_cpu.so
- name: Upload binary
uses: actions/upload-artifact@v4
Expand All @@ -90,7 +115,7 @@ jobs:
path: pjrt_plugin_xla_cpu.so
if-no-files-found: error
pjrt-plugin-xla-cpu-darwin-aarch64:
runs-on: macos-latest
runs-on: macos-14
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -104,7 +129,7 @@ jobs:
else
./pjrt-plugins/xla-cpu/build.sh
fi
mv pjrt_plugin_xla_cpu-darwin-aarch64.dylib pjrt_plugin_xla_cpu.dylib
- name: Upload binary
uses: actions/upload-artifact@v4
Expand All @@ -113,24 +138,27 @@ jobs:
path: pjrt_plugin_xla_cpu.dylib
if-no-files-found: error
pjrt-plugin-xla-cuda-linux-x86_64:
runs-on: ubuntu-latest
needs:
- openxla-dev-docker
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Download bazel Docker image
uses: actions/download-artifact@v4
with:
name: openxla-dev
path: /tmp
- name: Build or fetch XLA CUDA PJRT plugin
run: |
if [ ! "$(git diff --exit-code HEAD^ XLA_VERSION)" ]; then
. ./dev.sh
rev=$(cat XLA_VERSION)
curl -LO --fail-with-body "https://github.com/joelberkeley/spidr/releases/download/xla-$(short_revision $rev)/pjrt_plugin_xla_cuda-linux-x86_64.so"
else
rm -rf /opt/hostedtoolcache
# note this implies specific versions of CUDA and cuDNN
docker run \
-v $(pwd):/spidr -w /spidr \
tensorflow/build:latest-python3.9 \
sh -c "pjrt-plugins/xla-cuda/build.sh"
docker load --input /tmp/openxla-dev.tar
docker run -v $(pwd):/spidr -w /spidr openxla-dev sh -c "./pjrt-plugins/xla-cuda/build.sh"
fi
mv pjrt_plugin_xla_cuda-linux-x86_64.so pjrt_plugin_xla_cuda.so
Expand All @@ -141,17 +169,14 @@ jobs:
path: pjrt_plugin_xla_cuda.so
if-no-files-found: error
build-tests-xla-cpu-linux-x86_64:
runs-on: ubuntu-latest
container: ghcr.io/stefan-hoeck/idris2-pack
runs-on: ubuntu-24.04
container: ghcr.io/stefan-hoeck/idris2-pack:noble
steps:
- uses: actions/checkout@v4
- name: Install build dependencies
run: |
apt-get update && apt-get install -y curl
pack switch HEAD
- name: Build tests
working-directory: test/xla-cpu
run: |
pack switch HEAD
SPIDR_INSTALL_SUPPORT_LIBS=false pack --no-prompt build xla-cpu.ipkg
tar cfz tests-xla-cpu.tar.gz -C build/exec .
- name: Upload tests
Expand All @@ -161,7 +186,7 @@ jobs:
path: test/xla-cpu/tests-xla-cpu.tar.gz
if-no-files-found: error
build-tests-xla-cpu-darwin-aarch64:
runs-on: macos-latest
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- name: Install build dependencies
Expand All @@ -182,31 +207,28 @@ jobs:
path: test/xla-cpu/tests-xla-cpu.tar.gz
if-no-files-found: error
build-tests-xla-cuda-linux-x86_64:
runs-on: ubuntu-latest
container: ghcr.io/stefan-hoeck/idris2-pack
runs-on: ubuntu-24.04
container: ghcr.io/stefan-hoeck/idris2-pack:noble
steps:
- uses: actions/checkout@v4
- name: Install build dependencies
run: |
apt-get update && apt-get install -y curl
pack switch HEAD
- name: Build tests
working-directory: test/xla-cuda
run: |
pack switch HEAD
SPIDR_INSTALL_SUPPORT_LIBS=false pack --no-prompt build xla-cuda.ipkg
tar cfz tests-xla-cuda-linux-x86_64.tar.gz -C build/exec .
tar cfz tests-xla-cuda.tar.gz -C build/exec .
- name: Upload tests
uses: actions/upload-artifact@v4
with:
name: tests-xla-cuda-linux-x86_64
path: test/xla-cuda/tests-xla-cuda-linux-x86_64.tar.gz
path: test/xla-cuda/tests-xla-cuda.tar.gz
if-no-files-found: error
test-xla-cpu-linux-x86_64:
needs:
- pjrt-linux-x86_64
- pjrt-plugin-xla-cpu-linux-x86_64
- build-tests-xla-cpu-linux-x86_64
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
Expand All @@ -225,7 +247,7 @@ jobs:
- pjrt-darwin-aarch64
- pjrt-plugin-xla-cpu-darwin-aarch64
- build-tests-xla-cpu-darwin-aarch64
runs-on: macos-latest
runs-on: macos-14
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
Expand All @@ -239,44 +261,60 @@ jobs:
run: |
tar xfz tests-xla-cpu.tar.gz && rm tests-xla-cpu.tar.gz
./test
build-xla-cuda-linux-x86_64-runtime:
runs-on: ubuntu-24.04
steps:
- uses: docker/setup-buildx-action@v3
- name: Build and export
uses: docker/build-push-action@v6
with:
file: pjrt-plugins/xla-cuda/Dockerfile
tags: xla-cuda
outputs: type=docker,dest=/tmp/xla-cuda.tar
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: xla-cuda
path: /tmp/xla-cuda.tar
test-xla-cuda-linux-x86_64:
needs:
- pjrt-linux-x86_64
- pjrt-plugin-xla-cuda-linux-x86_64
- build-tests-xla-cuda-linux-x86_64
runs-on: ubuntu-latest # needs a CUDA runner
container: nvcr.io/nvidia/tensorrt:23.11-py3
- build-xla-cuda-linux-x86_64-runtime
runs-on: ubuntu-24.04 # needs a CUDA runner
steps:
- name: Download artifacts
- name: Download XLA CUDA runtime Docker image
uses: actions/download-artifact@v4
with:
name: xla-cuda
path: /tmp
- name: Download app artifacts
uses: actions/download-artifact@v4
with:
pattern: "{libc_xla-linux-x86_64,*cuda-linux-x86_64}"
merge-multiple: true
- name: Install runtime dependencies
run: |
apt-get update && apt-get install chezscheme
- name: Run tests
run: |
tar xfz tests-xla-cuda.tar.gz . && rm tests-xla-cuda.tar.gz
exit 0 # we can't run tests without a GPU
docker load --input /tmp/xla-cuda.tar
docker run -v $(pwd):/xla-cuda -w /xla-cuda xla-cuda sh -c "exit 0" # needs a CUDA runner
readme:
runs-on: ubuntu-latest
container: ghcr.io/stefan-hoeck/idris2-pack
runs-on: ubuntu-24.04
container: ghcr.io/stefan-hoeck/idris2-pack:noble
steps:
- uses: actions/checkout@v4
- name: Type-check README
run: |
apt-get update && apt-get install -y curl
pack switch HEAD
SPIDR_INSTALL_SUPPORT_LIBS=false pack --no-prompt typecheck readme.ipkg
tutorials:
runs-on: ubuntu-latest
container: ghcr.io/stefan-hoeck/idris2-pack
runs-on: ubuntu-24.04
container: ghcr.io/stefan-hoeck/idris2-pack:noble
steps:
- uses: actions/checkout@v4
- name: Type-check tutorials
run: |
apt-get update && apt-get install -y curl
pack switch HEAD
export SPIDR_INSTALL_SUPPORT_LIBS=false
res=0; for f in tutorials/*.ipkg; do pack --no-prompt typecheck $f || res=$?; done; $(exit $res)
10 changes: 6 additions & 4 deletions .github/workflows/release-c-xla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
create-release:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
outputs:
upload_url: ${{ steps.create-release.outputs.upload_url }}
steps:
Expand All @@ -25,12 +25,14 @@ jobs:
body: "C XLA release ${{ env.C_XLA_VERSION }}"
c-xla-linux-x86_64:
needs: create-release
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
container: tensorflow/build:latest-python3.9
steps:
- uses: actions/checkout@v4
- name: Build C XLA lib
run: ./spidr/backend/build.sh
run: |
docker build -t openxla-dev -f openxla-dev.Dockerfile .
docker run -v $(pwd):/spidr -w /spidr openxla-dev sh -c "./spidr/backend/build.sh"
- name: Publish C XLA lib
uses: actions/upload-release-asset@v1
env:
Expand All @@ -42,7 +44,7 @@ jobs:
asset_content_type: application/x-sharedlib
c-xla-darwin-aarch64:
needs: create-release
runs-on: macos-latest
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- name: Build C XLA lib
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ env:

jobs:
docs:
runs-on: ubuntu-latest
container: ghcr.io/stefan-hoeck/idris2-pack
runs-on: ubuntu-24.04
container: ghcr.io/stefan-hoeck/idris2-pack:noble
steps:
- uses: actions/checkout@v4
- name: Build docs
Expand Down
Loading

0 comments on commit 7cbcd2d

Please sign in to comment.