conda-forge · hmaarrfk · Jan 14, 2025 · Dec 19, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/conda-forge.yml b/conda-forge.yml
@@ -21,3 +21,4 @@ provider:
   linux_64: github_actions
   linux_aarch64: azure
 test: native_and_emulated
+
@@ -80,6 +80,7 @@ of 2024-11-28:
 | Package   | Upstream       | Recipe | Conda-forge | Source                              |
 |-----------|----------------|--------|-------------|-------------------------------------|
 | cuda      | 11.8/12.1/12.4 | 12.6   | 12.6        | `.ci/docker/build.sh`               |
+| cuda-cupti| 12.4.127       |        | 12.6.80     | `.github/scripts/generate_binary_build_matrix.py` |
 | cusparselt| 0.6.2.3+others |        | 0.6.3.2     | `.ci/docker/common/install_cuda.sh` |
 | libcudss  | 0.3.0.9        |        | 0.4.0.2     | `.ci/docker/common/install_cudss.sh`|
 | magma     | 2.6.1          |        | 2.8.0       | `.ci/docker/common/instal_magma.sh` |

@@ -36,12 +36,6 @@ fi
 # This is not correctly found for linux-aarch64 since pytorch 2.0.0 for some reason
 export _GLIBCXX_USE_CXX11_ABI=1
 
-# KINETO seems to require CUPTI and will look quite hard for it.
-# CUPTI seems to cause trouble when users install a version of
-# cudatoolkit different than the one specified at compile time.
-# https://github.com/conda-forge/pytorch-cpu-feedstock/issues/135
-export USE_KINETO=OFF
-
 if [[ "$target_platform" == "osx-64" ]]; then
   export CXXFLAGS="$CXXFLAGS -DTARGET_OS_OSX=1"
   export CFLAGS="$CFLAGS -DTARGET_OS_OSX=1"
@@ -155,6 +149,8 @@ elif [[ ${cuda_compiler_version} != "None" ]]; then
     if [[ "${target_platform}" != "${build_platform}" ]]; then
         export CUDA_TOOLKIT_ROOT=${PREFIX}
     fi
+    # for CUPTI
+    export CUDA_TOOLKIT_ROOT_DIR=${PREFIX}
     case ${target_platform} in
         linux-64)
             export CUDAToolkit_TARGET_DIR=${PREFIX}/targets/x86_64-linux

@@ -103,6 +103,7 @@ requirements:
     {% if cuda_compiler_version != "None" %}
     - cuda-driver-dev
     - cuda-cudart-dev
+    - cuda-cupti-dev
     - cuda-nvrtc-dev
     - cuda-nvtx-dev
     - cuda-nvml-dev
@@ -152,6 +153,10 @@ requirements:
     - pytorch-cpu ==99999999       # [cuda_compiler_version != "None"]
     - pytorch {{ version }} cuda{{ cuda_compiler_version | replace('.', '') }}_{{ blas_impl }}_*_{{ PKG_BUILDNUM }}  # [cuda_compiler_version != "None"]
     - pytorch {{ version }} cpu_{{ blas_impl }}_*_{{ PKG_BUILDNUM }}                                                 # [cuda_compiler_version == "None"]
+    # if using OpenBLAS, ensure that a version compatible with OpenMP is used
+    # otherwise, we get the following warnings:
+    # OpenBLAS Warning : Detect OpenMP Loop and this application may hang. Please rebuild the library with USE_OPENMP=1 option.
+    - openblas * openmp_*          # [blas_impl != "mkl"]
 
 test:
   commands:
@@ -206,6 +211,7 @@ outputs:
         {% if cuda_compiler_version != "None" %}
         - cuda-driver-dev
         - cuda-cudart-dev
+        - cuda-cupti-dev
         - cuda-nvrtc-dev
         - cuda-nvtx-dev
         - cuda-nvml-dev
@@ -287,7 +293,7 @@ outputs:
         - pytest-rerunfailures
         - pytest-xdist
       imports:
-        - torch  # [not (aarch64 and cuda_compiler_version != "None")]
+        - torch
       source_files:
         - test
         # tools/ is needed to optimise test run
@@ -297,7 +303,7 @@ outputs:
         # Run pip check so as to ensure that all pytorch packages are installed
         # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/24
         - pip check
-        - python -c "import torch; print(torch.__version__)"                       # [not (aarch64 and cuda_compiler_version != "None")]
+        - python -c "import torch; print(torch.__version__)"
         - python -c "import torch; assert torch.backends.mkldnn.m.is_available()"  # [x86 and cuda_compiler_version == "None"]
         - python -c "import torch; torch.tensor(1).to('cpu').numpy(); print('numpy support enabled!!!')"
         # At conda-forge, we target versions of OSX that are too old for MPS support
@@ -342,7 +348,7 @@ outputs:
         # the whole test suite takes forever, but we should get a good enough coverage
         # for potential packaging problems by running a fixed subset
         - export OMP_NUM_THREADS=4  # [unix]
-        - python -m pytest -n auto {{ tests }} -k "not ({{ skips }})" --durations=50    # [not (aarch64 and cuda_compiler_version != "None")]
+        - python -m pytest -n 2 {{ tests }} -k "not ({{ skips }})" --durations=50  # [not (aarch64 and cuda_compiler_version != "None")]
 
   # 2021/08/01, hmaarrfk
   # While this seems like a roundabout way of defining the package name
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,3 +21,4 @@ provider:
		linux_64: github_actions
		linux_aarch64: azure
		test: native_and_emulated