From 525560c0a3a8855dc22577b23e2d98c189a41752 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 12 Jan 2023 01:16:52 -0500 Subject: [PATCH 01/26] add file to download nccl --- .github/workflows/helpers/download_nccl.sh | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100755 .github/workflows/helpers/download_nccl.sh diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh new file mode 100755 index 0000000..8692dd9 --- /dev/null +++ b/.github/workflows/helpers/download_nccl.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -euo pipefail +set -x + +ubuntu_version=$(lsb_release -rs) +ubuntu_version=${ubuntu_version//./} +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${ubuntu_version}/x86_64/cuda-keyring_1.0-1_all.deb +sudo dpkg -i cuda-keyring_1.0-1_all.deb +sudo apt-get update -y +rm -f cuda-keyring_1.0-1_all.deb + +if [[ "$ubuntu_version" == "2004" ]]; then + sudo apt download libnccl2=2.15.5-1+cuda11.0 libnccl-dev=2.15.5-1+cuda11.0 + sudo apt download libnccl2=2.8.4-1+cuda11.1 libnccl-dev=2.8.4-1+cuda11.1 + sudo apt download libnccl2=2.8.4-1+cuda11.2 libnccl-dev=2.8.4-1+cuda11.2 + sudo apt download libnccl2=2.9.9-1+cuda11.3 libnccl-dev=2.9.9-1+cuda11.3 + sudo apt download libnccl2=2.11.4-1+cuda11.4 libnccl-dev=2.11.4-1+cuda11.4 + sudo apt download libnccl2=2.11.4-1+cuda11.5 libnccl-dev=2.11.4-1+cuda11.5 + sudo apt download libnccl2=2.12.12-1+cuda11.6 libnccl-dev=2.12.12-1+cuda11.6 + sudo apt download libnccl2=2.14.3-1+cuda11.7 libnccl-dev=2.14.3-1+cuda11.7 +elif [[ "$ubuntu_version" == "1804" ]]; then + sudo apt download libnccl2=2.8.3-1+cuda10.1 libnccl-dev=2.8.3-1+cuda10.1 + sudo apt download libnccl2=2.15.5-1+cuda10.2 libnccl-dev=2.15.5-1+cuda10.2 + sudo apt download libnccl2=2.15.5-1+cuda11.0 libnccl-dev=2.15.5-1+cuda11.0 + sudo apt download libnccl2=2.8.4-1+cuda11.1 libnccl-dev=2.8.4-1+cuda11.1 + sudo apt download libnccl2=2.8.4-1+cuda11.2 libnccl-dev=2.8.4-1+cuda11.2 + sudo apt download libnccl2=2.9.9-1+cuda11.3 libnccl-dev=2.9.9-1+cuda11.3 + sudo apt download libnccl2=2.11.4-1+cuda11.4 libnccl-dev=2.11.4-1+cuda11.4 + sudo apt download libnccl2=2.11.4-1+cuda11.5 libnccl-dev=2.11.4-1+cuda11.5 + sudo apt download libnccl2=2.12.12-1+cuda11.6 libnccl-dev=2.12.12-1+cuda11.6 + sudo apt download libnccl2=2.14.3-1+cuda11.7 libnccl-dev=2.14.3-1+cuda11.7 +fi + +for debfile in *.deb; do + temp_str=${debfile#*+} + temp_str=${temp_str%_*} + cuda_version=${temp_str:4} + mkdir -p $cuda_version/nccl + dpkg-deb -xv $debfile ./$cuda_version/nccl + cd $cuda_version/nccl + [ -d ./usr/include ] && mv ./usr/include ./ + mkdir -p lib + files_to_move=(./usr/lib/x86_64-linux-gnu/*.a) + [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.a ./lib/ + files_to_move=(./usr/lib/x86_64-linux-gnu/*.so) + [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.so ./lib/ + files_to_move=(./usr/lib/x86_64-linux-gnu/*.so.*) + [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.so.* ./lib/ + rm -rf usr + cd ../../ +done From a92eb1e8dc7d2a65d0850ba5f83fec0c9899293c Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 12 Jan 2023 01:42:23 -0500 Subject: [PATCH 02/26] first version --- .github/workflows/build.yml | 72 ++++++++++++++-------- .github/workflows/helpers/download_nccl.sh | 5 ++ 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4a5b62a..2a9a256 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,8 +12,47 @@ concurrency: group: build-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: - build-dependencies: - name: Build the FlexFlow dependencies + download-nccl: + name: Download and Package NCCL + strategy: + matrix: + os: [ubuntu-18.04, ubuntu-20.04] + fail-fast: false + runs-on: ${{ matrix.os }} + steps: + - name: Checkout Git Repository + uses: actions/checkout@v3 + + - name: Free additional space on runner + run: .github/workflows/helpers/free_space_on_runner.sh + + - name: Download NCCL + run: .github/workflows/helpers/download_nccl.sh + + - name: Prepare library files + working-directory: nccl_downloads + run: | + for folder in */ ; do + cd $folder + export NCCL_TARBALL="nccl_${{ matrix.os }}_${folder}.tar.gz" + echo "Creating archive $NCCL_TARBALL" + tar -zcvf $NCCL_TARBALL nccl + echo "Checking the size of the NCCL tarball..." + du -h $NCCL_TARBALL + mv $NCCL_TARBALL ../ + cd .. + done + + - name: Archive compiled NCCL libraries + uses: actions/upload-artifact@v3 + if: ${{ matrix.gpu_backend == 'cuda' }} + with: + name: nccl_${{ matrix.os }} + path: nccl_downloads + + build-legion: + name: Build Legion + needs: download-nccl strategy: matrix: os: [ubuntu-18.04, ubuntu-20.04] @@ -84,7 +123,7 @@ jobs: FF_GPU_BACKEND: ${{ matrix.gpu_backend }} run: .github/workflows/helpers/install_dependencies.sh - - name: Build NCCL/Legion + - name: Build Legion env: #DEPENDENCY: ${{ matrix.dependency }} CUDA_VERSION: ${{ matrix.cuda_version }} @@ -95,9 +134,7 @@ jobs: export CUDA_DIR=/usr/local/cuda export FF_BUILD_LEGION=ON - if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export FF_USE_NCCL=ON - fi + export FF_USE_NCCL=OFF cores_available=$(nproc --all) n_build_cores=$(( cores_available -1 )) @@ -113,20 +150,8 @@ jobs: env: FF_GPU_BACKEND: ${{ matrix.gpu_backend }} run: | - # Remove unnecessary files - echo "Removing unnecessary files..." - rm -rf build/deps/nccl/obj build/deps/nccl/src build/deps/nccl/tmp - rm -f build/export/legion/lib/libflexflow.so - if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export NCCL_TARBALL="nccl_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz" export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz" - - # Only build NCCL tarball for CUDA backends - echo "Creating archive $NCCL_TARBALL" - tar -zcvf $NCCL_TARBALL build/deps/nccl/ - echo "Checking the size of the NCCL tarball..." - du -h $NCCL_TARBALL else export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz" fi @@ -150,17 +175,10 @@ jobs: name: legion_${{ matrix.os }}_${{ matrix.gpu_backend }} path: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz - - name: Archive compiled NCCL library (CUDA) - uses: actions/upload-artifact@v3 - if: ${{ matrix.gpu_backend == 'cuda' }} - with: - name: nccl_${{ matrix.os }}_${{ matrix.cuda_version }} - path: nccl_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz - notify-slack: name: Notify Slack in case of failure runs-on: ubuntu-20.04 - needs: build-dependencies + needs: build-legion if: ${{ failure() && github.event_name == 'schedule' }} steps: - name: Send Slack message @@ -172,7 +190,7 @@ jobs: create-release: name: Create new release runs-on: ubuntu-20.04 - needs: build-dependencies + needs: build-legion steps: - name: Checkout Git Repository uses: actions/checkout@v3 diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index 8692dd9..9bf63b8 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -2,6 +2,9 @@ set -euo pipefail set -x +mkdir -p nccl_downloads +cd nccl_downloads + ubuntu_version=$(lsb_release -rs) ubuntu_version=${ubuntu_version//./} wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${ubuntu_version}/x86_64/cuda-keyring_1.0-1_all.deb @@ -49,3 +52,5 @@ for debfile in *.deb; do rm -rf usr cd ../../ done + +rm -rf *.deb From 3b05c5d3fac185271c2ceedb972ded6e70ea09ba Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 12 Jan 2023 01:50:18 -0500 Subject: [PATCH 03/26] updates --- .github/workflows/build.yml | 20 +++++++++++--------- .github/workflows/helpers/download_nccl.sh | 5 +++++ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2a9a256..48338c9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -32,15 +32,17 @@ jobs: - name: Prepare library files working-directory: nccl_downloads run: | - for folder in */ ; do - cd $folder - export NCCL_TARBALL="nccl_${{ matrix.os }}_${folder}.tar.gz" - echo "Creating archive $NCCL_TARBALL" - tar -zcvf $NCCL_TARBALL nccl - echo "Checking the size of the NCCL tarball..." - du -h $NCCL_TARBALL - mv $NCCL_TARBALL ../ - cd .. + for folder in *; do + if [ -d "$folder" ]; then + cd $folder + export NCCL_TARBALL="nccl_${{ matrix.os }}_${folder}.tar.gz" + echo "Creating archive $NCCL_TARBALL" + tar -zcvf $NCCL_TARBALL nccl + echo "Checking the size of the NCCL tarball..." + du -h $NCCL_TARBALL + mv $NCCL_TARBALL ../ + cd .. + fi done - name: Archive compiled NCCL libraries diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index 9bf63b8..b15551d 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -22,6 +22,11 @@ if [[ "$ubuntu_version" == "2004" ]]; then sudo apt download libnccl2=2.12.12-1+cuda11.6 libnccl-dev=2.12.12-1+cuda11.6 sudo apt download libnccl2=2.14.3-1+cuda11.7 libnccl-dev=2.14.3-1+cuda11.7 elif [[ "$ubuntu_version" == "1804" ]]; then + # Additional key required to download the CUDA 10.1 version + wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb + sudo apt-get update -y + rm -f nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb sudo apt download libnccl2=2.8.3-1+cuda10.1 libnccl-dev=2.8.3-1+cuda10.1 sudo apt download libnccl2=2.15.5-1+cuda10.2 libnccl-dev=2.15.5-1+cuda10.2 sudo apt download libnccl2=2.15.5-1+cuda11.0 libnccl-dev=2.15.5-1+cuda11.0 From d02a11573a3a4327c6d2d8957862584b9a5365e7 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 12 Jan 2023 01:52:07 -0500 Subject: [PATCH 04/26] update --- .github/workflows/helpers/download_nccl.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index b15551d..13b2420 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -23,6 +23,7 @@ if [[ "$ubuntu_version" == "2004" ]]; then sudo apt download libnccl2=2.14.3-1+cuda11.7 libnccl-dev=2.14.3-1+cuda11.7 elif [[ "$ubuntu_version" == "1804" ]]; then # Additional key required to download the CUDA 10.1 version + sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb sudo apt-get update -y From a5a1223d9d31c0718aa351f0ae2fc24488e88ce0 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 12 Jan 2023 02:01:23 -0500 Subject: [PATCH 05/26] update --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 48338c9..525344a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,10 +47,9 @@ jobs: - name: Archive compiled NCCL libraries uses: actions/upload-artifact@v3 - if: ${{ matrix.gpu_backend == 'cuda' }} with: name: nccl_${{ matrix.os }} - path: nccl_downloads + path: nccl_downloads/*.tar.gz build-legion: name: Build Legion From 8f9efc9edfa0331388d04837106a109adc52fd8b Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 00:45:24 -0500 Subject: [PATCH 06/26] add cuda to path --- .github/workflows/helpers/download_nccl.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index 13b2420..6a6fda6 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -44,9 +44,9 @@ for debfile in *.deb; do temp_str=${debfile#*+} temp_str=${temp_str%_*} cuda_version=${temp_str:4} - mkdir -p $cuda_version/nccl - dpkg-deb -xv $debfile ./$cuda_version/nccl - cd $cuda_version/nccl + mkdir -p cuda-$cuda_version/nccl + dpkg-deb -xv $debfile ./cuda-$cuda_version/nccl + cd cuda-$cuda_version/nccl [ -d ./usr/include ] && mv ./usr/include ./ mkdir -p lib files_to_move=(./usr/lib/x86_64-linux-gnu/*.a) From 824894335ee17e23df894b5785aea4ce6c63f5ed Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 00:49:05 -0500 Subject: [PATCH 07/26] use lower versions of cuda when building legion for best compatibility --- .github/workflows/build.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 525344a..bb51e04 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -63,11 +63,11 @@ jobs: "10.2.89", "11.0.3", "11.1.1", - "11.2.2", - "11.3.1", - "11.4.3", - "11.5.2", - "11.6.2", + "11.2.0", + "11.3.0", + "11.4.0", + "11.5.0", + "11.6.0", "11.7.0", ] gpu_backend: [cuda, hip_rocm] @@ -88,15 +88,15 @@ jobs: gpu_backend: "hip_rocm" - cuda_version: "11.0.3" gpu_backend: "hip_rocm" - - cuda_version: "11.2.2" + - cuda_version: "11.2.0" gpu_backend: "hip_rocm" - - cuda_version: "11.3.1" + - cuda_version: "11.3.0" gpu_backend: "hip_rocm" - - cuda_version: "11.4.3" + - cuda_version: "11.4.0" gpu_backend: "hip_rocm" - - cuda_version: "11.5.2" + - cuda_version: "11.5.0" gpu_backend: "hip_rocm" - - cuda_version: "11.6.2" + - cuda_version: "11.6.0" gpu_backend: "hip_rocm" - cuda_version: "11.7.0" gpu_backend: "hip_rocm" From 7a02cb349c13323ea21bd1a0c39772c6f8517b09 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 00:56:06 -0500 Subject: [PATCH 08/26] use first three digits of cuda version in legion path --- .github/workflows/build.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bb51e04..fc0c544 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -150,9 +150,11 @@ jobs: - name: Prepare library files env: FF_GPU_BACKEND: ${{ matrix.gpu_backend }} + CUDA_VERSION: ${{ matrix.cuda_version }} + CUDA_VERSION_MAJOR: ${CUDA_VERSION:0:4} run: | if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz" + export LEGION_TARBALL="legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR}.tar.gz" else export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz" fi @@ -163,11 +165,14 @@ jobs: du -h $LEGION_TARBALL - name: Archive compiled Legion library (CUDA) + env: + CUDA_VERSION: ${{ matrix.cuda_version }} + CUDA_VERSION_MAJOR: ${CUDA_VERSION:0:4} uses: actions/upload-artifact@v3 if: ${{ matrix.gpu_backend == 'cuda' }} with: - name: legion_${{ matrix.os }}_${{ matrix.cuda_version }} - path: legion_${{ matrix.os }}_${{ matrix.cuda_version }}.tar.gz + name: legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR} + path: legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR}.tar.gz - name: Archive compiled Legion library (HIP) uses: actions/upload-artifact@v3 From 152d9ebd02f8f8049512d4120cfd4207d0e870cb Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 00:58:50 -0500 Subject: [PATCH 09/26] fixed legion path within tarball --- .github/workflows/build.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fc0c544..b8106db 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -160,9 +160,12 @@ jobs: fi echo "Creating archive $LEGION_TARBALL" - tar -zcvf $LEGION_TARBALL build/export/legion/ + cd build/export + tar -zcvf $LEGION_TARBALL legion echo "Checking the size of the Legion tarball..." du -h $LEGION_TARBALL + mv $LEGION_TARBALL ../../ + cd ../../ - name: Archive compiled Legion library (CUDA) env: From 6e3e9135470faffad5c07a0c26981619b4a909b0 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 00:59:48 -0500 Subject: [PATCH 10/26] linting --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b8106db..1f62e8e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,7 +50,7 @@ jobs: with: name: nccl_${{ matrix.os }} path: nccl_downloads/*.tar.gz - + build-legion: name: Build Legion needs: download-nccl From 1425ee03411a985119f973c3acbb500772638dc9 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 01:03:01 -0500 Subject: [PATCH 11/26] parallelize builds --- .github/workflows/build.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f62e8e..85ba828 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,7 +53,6 @@ jobs: build-legion: name: Build Legion - needs: download-nccl strategy: matrix: os: [ubuntu-18.04, ubuntu-20.04] @@ -187,7 +186,7 @@ jobs: notify-slack: name: Notify Slack in case of failure runs-on: ubuntu-20.04 - needs: build-legion + needs: [download-nccl, build-legion] if: ${{ failure() && github.event_name == 'schedule' }} steps: - name: Send Slack message @@ -199,7 +198,7 @@ jobs: create-release: name: Create new release runs-on: ubuntu-20.04 - needs: build-legion + needs: [download-nccl, build-legion] steps: - name: Checkout Git Repository uses: actions/checkout@v3 From 1241fb17eda34b9200d3d3b338a31dfe77055423 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 01:06:08 -0500 Subject: [PATCH 12/26] update legion version --- deps/legion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/legion b/deps/legion index 15b23cf..7f8df4e 160000 --- a/deps/legion +++ b/deps/legion @@ -1 +1 @@ -Subproject commit 15b23cf0d95f186297f05c76611ddd3e2cbbe9f9 +Subproject commit 7f8df4ee66896acf1c1f5ac8f43808596046f54b From 5687a8b0014f7b9bc49c932435c051b6a50f0d87 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 02:02:19 -0500 Subject: [PATCH 13/26] hopefully fix --- .github/workflows/build.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 85ba828..c1dbbaf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -150,8 +150,10 @@ jobs: env: FF_GPU_BACKEND: ${{ matrix.gpu_backend }} CUDA_VERSION: ${{ matrix.cuda_version }} - CUDA_VERSION_MAJOR: ${CUDA_VERSION:0:4} run: | + echo "CUDA_VERSION=$CUDA_VERSION" >> $GITHUB_ENV + echo "CUDA_VERSION_MAJOR=${CUDA_VERSION:0:4}" >> $GITHUB_ENV + if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then export LEGION_TARBALL="legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR}.tar.gz" else @@ -167,9 +169,6 @@ jobs: cd ../../ - name: Archive compiled Legion library (CUDA) - env: - CUDA_VERSION: ${{ matrix.cuda_version }} - CUDA_VERSION_MAJOR: ${CUDA_VERSION:0:4} uses: actions/upload-artifact@v3 if: ${{ matrix.gpu_backend == 'cuda' }} with: From 55d71d8e4cdbbd077f61746afbcfe35246f47e75 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 13:20:16 -0500 Subject: [PATCH 14/26] bug fix --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c1dbbaf..fb1e29a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -151,7 +151,7 @@ jobs: FF_GPU_BACKEND: ${{ matrix.gpu_backend }} CUDA_VERSION: ${{ matrix.cuda_version }} run: | - echo "CUDA_VERSION=$CUDA_VERSION" >> $GITHUB_ENV + export CUDA_VERSION_MAJOR="${CUDA_VERSION:0:4}" echo "CUDA_VERSION_MAJOR=${CUDA_VERSION:0:4}" >> $GITHUB_ENV if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then @@ -172,8 +172,8 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ matrix.gpu_backend == 'cuda' }} with: - name: legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR} - path: legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR}.tar.gz + name: legion_${{ matrix.os }}_cuda-${{ env.CUDA_VERSION_MAJOR }} + path: legion_${{ matrix.os }}_cuda-${{ env.CUDA_VERSION_MAJOR }}.tar.gz - name: Archive compiled Legion library (HIP) uses: actions/upload-artifact@v3 From 000b87f5e085d89f99d269d741a821ad1b5e7bee Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 12:38:43 -0500 Subject: [PATCH 15/26] installing conda for specific python version --- .github/workflows/build.yml | 11 ++++++++--- .../workflows/helpers/install_dependencies.sh | 16 ++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fb1e29a..19196aa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -70,9 +70,7 @@ jobs: "11.7.0", ] gpu_backend: [cuda, hip_rocm] - # uncomment the line below (and related ones) to build nccl, legion in parallel. Because - # git only supports up to 20 jobs in parallel, building in parallel is currently not needed. - #dependency: ["nccl", "legion"] + python_version: ["3.6", "3.7", "3.8", "3.9", "3.10"] exclude: - os: ubuntu-20.04 cuda_version: "10.1.243" @@ -110,6 +108,12 @@ jobs: - name: Free additional space on runner run: .github/workflows/helpers/free_space_on_runner.sh + - name: Install Miniconda (Python ${{ matrix.python_version }}) + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python_version }} + - name: Install CUDA uses: Jimver/cuda-toolkit@v0.2.8 id: cuda-toolkit @@ -121,6 +125,7 @@ jobs: env: CUDA_VERSION: ${{ matrix.cuda_version }} FF_GPU_BACKEND: ${{ matrix.gpu_backend }} + LEGION_PYTHON_VERSION: ${{ matrix.python_version }} run: .github/workflows/helpers/install_dependencies.sh - name: Build Legion diff --git a/.github/workflows/helpers/install_dependencies.sh b/.github/workflows/helpers/install_dependencies.sh index 561f583..e7d2386 100755 --- a/.github/workflows/helpers/install_dependencies.sh +++ b/.github/workflows/helpers/install_dependencies.sh @@ -15,14 +15,14 @@ CUDA_VERSION=${CUDA_VERSION:-11.1.1} ./install_cudnn.sh "${CUDA_VERSION}" # Install Miniconda -echo "Installing Miniconda..." -wget -c -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - chmod +x ./Miniconda3-latest-Linux-x86_64.sh && \ - bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ - rm ./Miniconda3-latest-Linux-x86_64.sh && \ - /opt/conda/bin/conda upgrade --all && \ - /opt/conda/bin/conda install conda-build conda-verify && \ - /opt/conda/bin/conda clean -ya +# echo "Installing Miniconda..." +# wget -c -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ +# chmod +x ./Miniconda3-latest-Linux-x86_64.sh && \ +# bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ +# rm ./Miniconda3-latest-Linux-x86_64.sh && \ +# /opt/conda/bin/conda upgrade --all && \ +# /opt/conda/bin/conda install conda-build conda-verify && \ +# /opt/conda/bin/conda clean -ya # Install HIP dependencies if needed FF_GPU_BACKEND=${FF_GPU_BACKEND:-"cuda"} From 84dd129af723e9c33bc63b9526db309b0b07737d Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 12:39:33 -0500 Subject: [PATCH 16/26] fix --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 19196aa..5aef3fe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -110,9 +110,9 @@ jobs: - name: Install Miniconda (Python ${{ matrix.python_version }}) uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: ${{ matrix.python_version }} + with: + auto-update-conda: true + python-version: ${{ matrix.python_version }} - name: Install CUDA uses: Jimver/cuda-toolkit@v0.2.8 From d9ba7c3eb7746cce3ad90e6cc034b4c644a09e2e Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 12:45:39 -0500 Subject: [PATCH 17/26] removed opt/conda from path --- .github/workflows/build.yml | 2 +- .github/workflows/helpers/install_dependencies.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5aef3fe..1e326e0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -134,7 +134,7 @@ jobs: CUDA_VERSION: ${{ matrix.cuda_version }} FF_GPU_BACKEND: ${{ matrix.gpu_backend }} run: | - export PATH=/opt/conda/bin:$PATH + #export PATH=/opt/conda/bin:$PATH export CUDNN_DIR=/usr/local/cuda export CUDA_DIR=/usr/local/cuda diff --git a/.github/workflows/helpers/install_dependencies.sh b/.github/workflows/helpers/install_dependencies.sh index e7d2386..292912b 100755 --- a/.github/workflows/helpers/install_dependencies.sh +++ b/.github/workflows/helpers/install_dependencies.sh @@ -43,6 +43,6 @@ sudo rm -rf /var/lib/apt/lists/* # Install conda packages echo "Installing conda packages..." -export PATH=/opt/conda/bin:$PATH +#export PATH=/opt/conda/bin:$PATH conda install cmake make conda install -c conda-forge numpy keras-preprocessing pybind11 cmake-build-extension pillow=9.0.0 From 1bbcdc502fd4841658fb808309dedcf8a4c77b6f Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 15:23:59 -0500 Subject: [PATCH 18/26] fix --- .github/workflows/build.yml | 13 ++----- .../workflows/helpers/install_dependencies.sh | 37 ++++++++++++++----- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1e326e0..d738366 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -70,7 +70,7 @@ jobs: "11.7.0", ] gpu_backend: [cuda, hip_rocm] - python_version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python_version: ["3.9"] #["3.7", "3.8", "3.9", "3.10"] exclude: - os: ubuntu-20.04 cuda_version: "10.1.243" @@ -108,12 +108,6 @@ jobs: - name: Free additional space on runner run: .github/workflows/helpers/free_space_on_runner.sh - - name: Install Miniconda (Python ${{ matrix.python_version }}) - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: ${{ matrix.python_version }} - - name: Install CUDA uses: Jimver/cuda-toolkit@v0.2.8 id: cuda-toolkit @@ -125,16 +119,15 @@ jobs: env: CUDA_VERSION: ${{ matrix.cuda_version }} FF_GPU_BACKEND: ${{ matrix.gpu_backend }} - LEGION_PYTHON_VERSION: ${{ matrix.python_version }} + PY_VERSION: ${{ matrix.python_version }} run: .github/workflows/helpers/install_dependencies.sh - name: Build Legion env: - #DEPENDENCY: ${{ matrix.dependency }} CUDA_VERSION: ${{ matrix.cuda_version }} FF_GPU_BACKEND: ${{ matrix.gpu_backend }} run: | - #export PATH=/opt/conda/bin:$PATH + export PATH=/opt/conda/bin:$PATH export CUDNN_DIR=/usr/local/cuda export CUDA_DIR=/usr/local/cuda diff --git a/.github/workflows/helpers/install_dependencies.sh b/.github/workflows/helpers/install_dependencies.sh index 292912b..530c60c 100755 --- a/.github/workflows/helpers/install_dependencies.sh +++ b/.github/workflows/helpers/install_dependencies.sh @@ -14,15 +14,34 @@ sudo apt-get update && sudo apt-get install -y --no-install-recommends wget binu CUDA_VERSION=${CUDA_VERSION:-11.1.1} ./install_cudnn.sh "${CUDA_VERSION}" -# Install Miniconda -# echo "Installing Miniconda..." -# wget -c -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ -# chmod +x ./Miniconda3-latest-Linux-x86_64.sh && \ -# bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ -# rm ./Miniconda3-latest-Linux-x86_64.sh && \ -# /opt/conda/bin/conda upgrade --all && \ -# /opt/conda/bin/conda install conda-build conda-verify && \ -# /opt/conda/bin/conda clean -ya +#Install Miniconda +echo "Installing Miniconda..." +PY_VERSION=${PY_VERSION:latest} +MINICONDA_BASE_URL="https://repo.continuum.io/miniconda/" +if [[ "$PY_VERSION" == "latest" ]]; then + echo "Installing latest Python version" + MINICONDA_INSTALLER="Miniconda3-latest-Linux-x86_64.sh" +elif [[ "$PY_VERSION" == "3.9" ]]; then + echo "Installing Python version ${PY_VERSION}" + MINICONDA_INSTALLER="Miniconda3-py39_22.11.1-1-Linux-x86_64.sh" +elif [[ "$PY_VERSION" == "3.8" ]]; then + echo "Installing Python version ${PY_VERSION}" + MINICONDA_INSTALLER="Miniconda3-py38_22.11.1-1-Linux-x86_64.sh" +elif [[ "$PY_VERSION" == "3.7" ]]; then + echo "Installing Python version ${PY_VERSION}" + MINICONDA_INSTALLER="Miniconda3-py37_22.11.1-1-Linux-x86_64.sh" +else + echo "Request Python version (${PY_VERSION}) not supported" + exit 1 +fi +MINICONDA_URL="${MINICONDA_BASE_URL}${MINICONDA_INSTALLER}" +wget -c -q $MINICONDA_URL && \ + chmod +x $MINICONDA_INSTALLER && \ + bash $MINICONDA_INSTALLER -b -p /opt/conda && \ + rm $MINICONDA_INSTALLER && \ + /opt/conda/bin/conda upgrade --all && \ + /opt/conda/bin/conda install conda-build conda-verify && \ + /opt/conda/bin/conda clean -ya # Install HIP dependencies if needed FF_GPU_BACKEND=${FF_GPU_BACKEND:-"cuda"} From 8bde8665baf5aec51deffe4884a98075d511eddd Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 15:36:41 -0500 Subject: [PATCH 19/26] fix --- .github/workflows/helpers/install_dependencies.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/helpers/install_dependencies.sh b/.github/workflows/helpers/install_dependencies.sh index 530c60c..9c0a855 100755 --- a/.github/workflows/helpers/install_dependencies.sh +++ b/.github/workflows/helpers/install_dependencies.sh @@ -16,7 +16,7 @@ CUDA_VERSION=${CUDA_VERSION:-11.1.1} #Install Miniconda echo "Installing Miniconda..." -PY_VERSION=${PY_VERSION:latest} +PY_VERSION=${PY_VERSION:-latest} MINICONDA_BASE_URL="https://repo.continuum.io/miniconda/" if [[ "$PY_VERSION" == "latest" ]]; then echo "Installing latest Python version" From fd795dd0cb363abd7a02f7b0b084e1210c47a900 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 15:38:57 -0500 Subject: [PATCH 20/26] fix --- .github/workflows/helpers/install_dependencies.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/helpers/install_dependencies.sh b/.github/workflows/helpers/install_dependencies.sh index 9c0a855..68765e5 100755 --- a/.github/workflows/helpers/install_dependencies.sh +++ b/.github/workflows/helpers/install_dependencies.sh @@ -62,6 +62,6 @@ sudo rm -rf /var/lib/apt/lists/* # Install conda packages echo "Installing conda packages..." -#export PATH=/opt/conda/bin:$PATH +export PATH=/opt/conda/bin:$PATH conda install cmake make conda install -c conda-forge numpy keras-preprocessing pybind11 cmake-build-extension pillow=9.0.0 From bc7ee40bde10ef75599792b13f611e28ae85cbee Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 15:52:03 -0500 Subject: [PATCH 21/26] build for all versions of python --- .github/workflows/build.yml | 6 +++--- .github/workflows/helpers/install_dependencies.sh | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d738366..0bacd61 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -70,7 +70,7 @@ jobs: "11.7.0", ] gpu_backend: [cuda, hip_rocm] - python_version: ["3.9"] #["3.7", "3.8", "3.9", "3.10"] + python_version: ["3.7", "3.8", "3.9", "3.10"] exclude: - os: ubuntu-20.04 cuda_version: "10.1.243" @@ -153,9 +153,9 @@ jobs: echo "CUDA_VERSION_MAJOR=${CUDA_VERSION:0:4}" >> $GITHUB_ENV if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export LEGION_TARBALL="legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR}.tar.gz" + export LEGION_TARBALL="legion_${{ matrix.os }}_cuda-${CUDA_VERSION_MAJOR}_python${{ matrix.python_version }}.tar.gz" else - export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz" + export LEGION_TARBALL="legion_${{ matrix.os }}_${{ matrix.gpu_backend }}_python${{ matrix.python_version }}.tar.gz" fi echo "Creating archive $LEGION_TARBALL" diff --git a/.github/workflows/helpers/install_dependencies.sh b/.github/workflows/helpers/install_dependencies.sh index 68765e5..3894fbf 100755 --- a/.github/workflows/helpers/install_dependencies.sh +++ b/.github/workflows/helpers/install_dependencies.sh @@ -21,6 +21,9 @@ MINICONDA_BASE_URL="https://repo.continuum.io/miniconda/" if [[ "$PY_VERSION" == "latest" ]]; then echo "Installing latest Python version" MINICONDA_INSTALLER="Miniconda3-latest-Linux-x86_64.sh" +elif [[ "$PY_VERSION" == "3.10" ]]; then + echo "Installing Python version ${PY_VERSION}" + MINICONDA_INSTALLER="Miniconda3-py310_22.11.1-1-Linux-x86_64.sh" elif [[ "$PY_VERSION" == "3.9" ]]; then echo "Installing Python version ${PY_VERSION}" MINICONDA_INSTALLER="Miniconda3-py39_22.11.1-1-Linux-x86_64.sh" From 0cf1c39951d0bb0d2dc08cf48412f3b142e21e76 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 16:31:55 -0500 Subject: [PATCH 22/26] fix tarball name and link --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0bacd61..3bab505 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -170,15 +170,15 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ matrix.gpu_backend == 'cuda' }} with: - name: legion_${{ matrix.os }}_cuda-${{ env.CUDA_VERSION_MAJOR }} - path: legion_${{ matrix.os }}_cuda-${{ env.CUDA_VERSION_MAJOR }}.tar.gz + name: legion_${{ matrix.os }}_cuda-${{ env.CUDA_VERSION_MAJOR }}_python${{ matrix.python_version }} + path: legion_${{ matrix.os }}_cuda-${{ env.CUDA_VERSION_MAJOR }}_python${{ matrix.python_version }}.tar.gz - name: Archive compiled Legion library (HIP) uses: actions/upload-artifact@v3 if: ${{ matrix.gpu_backend != 'cuda' }} with: - name: legion_${{ matrix.os }}_${{ matrix.gpu_backend }} - path: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}.tar.gz + name: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}_python${{ matrix.python_version }} + path: legion_${{ matrix.os }}_${{ matrix.gpu_backend }}_python${{ matrix.python_version }}.tar.gz notify-slack: name: Notify Slack in case of failure From e73deee6058728a72aea6ba217a4eedbd05cc032 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 21:19:32 -0500 Subject: [PATCH 23/26] fix download nccl script, add shellcheck --- .github/workflows/helpers/download_nccl.sh | 5 +++++ .github/workflows/shell-check.yml | 10 ++++++++++ 2 files changed, 15 insertions(+) create mode 100644 .github/workflows/shell-check.yml diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index 6a6fda6..8c5a864 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -55,6 +55,11 @@ for debfile in *.deb; do [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.so ./lib/ files_to_move=(./usr/lib/x86_64-linux-gnu/*.so.*) [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.so.* ./lib/ + symlinks_to_move="$(find ./usr/lib/x86_64-linux-gnu/ -type l )" + for s in $symlinks_to_move; do + fname="$(basename $s)" + ln -s "$(readlink $s )" ./lib/$fname + done rm -rf usr cd ../../ done diff --git a/.github/workflows/shell-check.yml b/.github/workflows/shell-check.yml new file mode 100644 index 0000000..a51803e --- /dev/null +++ b/.github/workflows/shell-check.yml @@ -0,0 +1,10 @@ +name: Shell Check +on: [push, pull_request, workflow_dispatch] +jobs: + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master From 437f31c06f775f381bab4e7fd2c66b82a087f47f Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 21:23:33 -0500 Subject: [PATCH 24/26] shellcheck --- .github/workflows/helpers/download_nccl.sh | 14 +++++++------- .github/workflows/helpers/install_cudnn.sh | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index 8c5a864..5df1223 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -7,7 +7,7 @@ cd nccl_downloads ubuntu_version=$(lsb_release -rs) ubuntu_version=${ubuntu_version//./} -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${ubuntu_version}/x86_64/cuda-keyring_1.0-1_all.deb +wget "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${ubuntu_version}/x86_64/cuda-keyring_1.0-1_all.deb" sudo dpkg -i cuda-keyring_1.0-1_all.deb sudo apt-get update -y rm -f cuda-keyring_1.0-1_all.deb @@ -44,17 +44,17 @@ for debfile in *.deb; do temp_str=${debfile#*+} temp_str=${temp_str%_*} cuda_version=${temp_str:4} - mkdir -p cuda-$cuda_version/nccl - dpkg-deb -xv $debfile ./cuda-$cuda_version/nccl - cd cuda-$cuda_version/nccl + mkdir -p "cuda-$cuda_version/nccl" + dpkg-deb -xv "$debfile" "./cuda-$cuda_version/nccl" + cd "cuda-$cuda_version/nccl" [ -d ./usr/include ] && mv ./usr/include ./ mkdir -p lib files_to_move=(./usr/lib/x86_64-linux-gnu/*.a) - [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.a ./lib/ + [ -f "${files_to_move[0]}" ] && mv ./usr/lib/x86_64-linux-gnu/*.a ./lib/ files_to_move=(./usr/lib/x86_64-linux-gnu/*.so) - [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.so ./lib/ + [ -f "${files_to_move[0]}" ] && mv ./usr/lib/x86_64-linux-gnu/*.so ./lib/ files_to_move=(./usr/lib/x86_64-linux-gnu/*.so.*) - [ -f ${files_to_move[0]} ] && mv ./usr/lib/x86_64-linux-gnu/*.so.* ./lib/ + [ -f "${files_to_move[0]}" ] && mv ./usr/lib/x86_64-linux-gnu/*.so.* ./lib/ symlinks_to_move="$(find ./usr/lib/x86_64-linux-gnu/ -type l )" for s in $symlinks_to_move; do fname="$(basename $s)" diff --git a/.github/workflows/helpers/install_cudnn.sh b/.github/workflows/helpers/install_cudnn.sh index 866eaf8..d777454 100755 --- a/.github/workflows/helpers/install_cudnn.sh +++ b/.github/workflows/helpers/install_cudnn.sh @@ -46,9 +46,9 @@ wget -c -q $CUDNN_LINK if [[ "$cuda_version" == "11.6" || "$cuda_version" == "11.7" ]]; then tar -xf $CUDNN_TARBALL_NAME -C ./ CUDNN_EXTRACTED_TARBALL_NAME="${CUDNN_TARBALL_NAME::-7}" - sudo cp -r $CUDNN_EXTRACTED_TARBALL_NAME/include/* /usr/local/include - sudo cp -r $CUDNN_EXTRACTED_TARBALL_NAME/lib/* /usr/local/lib - rm -rf $CUDNN_EXTRACTED_TARBALL_NAME + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/include/*" "/usr/local/include" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/lib/*" "/usr/local/lib" + rm -rf "$CUDNN_EXTRACTED_TARBALL_NAME" else sudo tar -xzf $CUDNN_TARBALL_NAME -C /usr/local fi From 8aa9741b76a629334194adda8fc82f7447a91731 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 21:24:54 -0500 Subject: [PATCH 25/26] more shellcheck --- .github/workflows/helpers/download_nccl.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/helpers/download_nccl.sh b/.github/workflows/helpers/download_nccl.sh index 5df1223..f83d63b 100755 --- a/.github/workflows/helpers/download_nccl.sh +++ b/.github/workflows/helpers/download_nccl.sh @@ -57,11 +57,11 @@ for debfile in *.deb; do [ -f "${files_to_move[0]}" ] && mv ./usr/lib/x86_64-linux-gnu/*.so.* ./lib/ symlinks_to_move="$(find ./usr/lib/x86_64-linux-gnu/ -type l )" for s in $symlinks_to_move; do - fname="$(basename $s)" - ln -s "$(readlink $s )" ./lib/$fname + fname="$(basename "$s")" + ln -s "$(readlink "$s" )" "./lib/$fname" done rm -rf usr cd ../../ done -rm -rf *.deb +rm -rf ./*.deb From 794eb155b6f89120d7cf376a07621ae66bed731c Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 22:37:33 -0500 Subject: [PATCH 26/26] fix --- .github/workflows/helpers/install_cudnn.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/helpers/install_cudnn.sh b/.github/workflows/helpers/install_cudnn.sh index d777454..4f1e6fc 100755 --- a/.github/workflows/helpers/install_cudnn.sh +++ b/.github/workflows/helpers/install_cudnn.sh @@ -46,8 +46,8 @@ wget -c -q $CUDNN_LINK if [[ "$cuda_version" == "11.6" || "$cuda_version" == "11.7" ]]; then tar -xf $CUDNN_TARBALL_NAME -C ./ CUDNN_EXTRACTED_TARBALL_NAME="${CUDNN_TARBALL_NAME::-7}" - sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/include/*" "/usr/local/include" - sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/lib/*" "/usr/local/lib" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME"/include/* "/usr/local/include" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME"/lib/* "/usr/local/lib" rm -rf "$CUDNN_EXTRACTED_TARBALL_NAME" else sudo tar -xzf $CUDNN_TARBALL_NAME -C /usr/local