From e4c5d5a2650eb59bf75a81a6c7c10fea42610233 Mon Sep 17 00:00:00 2001 From: Michael Norris Date: Tue, 28 Jan 2025 10:44:02 -0800 Subject: [PATCH] Debug cuVS 11.8.0 failure (#4149) Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/4149 Differential Revision: D68781021 --- .github/actions/build_conda/action.yml | 2 +- .github/workflows/build-pull-request.yml | 139 ++--------------------- conda/faiss-gpu-cuvs/meta.yaml | 1 + 3 files changed, 12 insertions(+), 130 deletions(-) diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml index d3f02827d0..6395f1185b 100644 --- a/.github/actions/build_conda/action.yml +++ b/.github/actions/build_conda/action.yml @@ -90,7 +90,7 @@ runs: working-directory: conda run: | conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ - -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia + -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge - name: Conda build (GPU w/ cuVS) w/ anaconda upload if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != '' shell: ${{ steps.choose_shell.outputs.shell }} diff --git a/.github/workflows/build-pull-request.yml b/.github/workflows/build-pull-request.yml index bc0d2d625a..f0deb57f26 100644 --- a/.github/workflows/build-pull-request.yml +++ b/.github/workflows/build-pull-request.yml @@ -30,140 +30,21 @@ jobs: git --no-pager diff --color exit 1 fi - linux-x86_64-cmake: - name: Linux x86_64 (cmake) - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - linux-x86_64-AVX2-cmake: - name: Linux x86_64 AVX2 (cmake) - needs: linux-x86_64-cmake - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - opt_level: avx2 - linux-x86_64-AVX512-cmake: - name: Linux x86_64 AVX512 (cmake) - needs: linux-x86_64-cmake - runs-on: faiss-aws-m7i.large - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - opt_level: avx512 - linux-x86_64-AVX512_SPR-cmake: - name: Linux x86_64 AVX512_SPR (cmake) - needs: linux-x86_64-cmake - runs-on: faiss-aws-m7i.large - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - opt_level: avx512_spr - linux-x86_64-GPU-cmake: - name: Linux x86_64 GPU (cmake) - needs: linux-x86_64-cmake + linux-x86_64-GPU-CUVS-CUDA11-8-0-nightly: + name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 11.8.0) runs-on: 4-core-ubuntu-gpu-t4 - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - gpu: ON - linux-x86_64-GPU-w-CUVS-cmake: - name: Linux x86_64 GPU w/ cuVS (cmake) - needs: linux-x86_64-cmake - runs-on: 4-core-ubuntu-gpu-t4 - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - gpu: ON - cuvs: ON - linux-x86_64-GPU-w-ROCm-cmake: - name: Linux x86_64 GPU w/ ROCm (cmake) - needs: linux-x86_64-cmake - runs-on: faiss-amd-MI200 - container: - image: ubuntu:22.04 - options: --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --cap-add=SYS_ADMIN - steps: - - name: Container setup - run: | - if [ -f /.dockerenv ]; then - apt-get update && apt-get install -y sudo && apt-get install -y git - git config --global --add safe.directory '*' - else - echo 'Skipping. Current job is not running inside a container.' - fi - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - gpu: ON - rocm: ON - linux-arm64-SVE-cmake: - name: Linux arm64 SVE (cmake) - needs: linux-x86_64-cmake - runs-on: faiss-aws-r8g.large - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test (cmake) - uses: ./.github/actions/build_cmake - with: - opt_level: sve - env: - # Context: https://github.com/facebookresearch/faiss/wiki/Troubleshooting#surprising-faiss-openmp-and-openblas-interaction - OPENBLAS_NUM_THREADS: '1' - linux-x86_64-conda: - name: Linux x86_64 (conda) - needs: linux-x86_64-cmake - runs-on: ubuntu-latest + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 fetch-tags: true - - name: Build and Package (conda) - uses: ./.github/actions/build_conda - windows-x86_64-conda: - name: Windows x86_64 (conda) - needs: linux-x86_64-cmake - runs-on: windows-2019 - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - fetch-tags: true - - name: Build and Package (conda) - uses: ./.github/actions/build_conda - linux-arm64-conda: - name: Linux arm64 (conda) - needs: linux-x86_64-cmake - runs-on: 2-core-ubuntu-arm - steps: - - name: Checkout - uses: actions/checkout@v4 + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} with: - fetch-depth: 0 - fetch-tags: true - - name: Build and Package (conda) - uses: ./.github/actions/build_conda + label: nightly + cuvs: "ON" + cuda: "11.8.0" diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml index a51674b609..86151982ba 100644 --- a/conda/faiss-gpu-cuvs/meta.yaml +++ b/conda/faiss-gpu-cuvs/meta.yaml @@ -54,6 +54,7 @@ outputs: - mkl =2023 # [x86_64] - mkl-devel =2023 # [x86_64] - cuda-toolkit {{ cudatoolkit }} + - cuda-cudart {{ cuda_constraints }} host: - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64]