From 85b95dc102f89effd3d6d29827912e3b1513eb39 Mon Sep 17 00:00:00 2001 From: Christopher Tate Date: Fri, 31 Jan 2025 07:38:58 -0700 Subject: [PATCH] The NVIDIA GPU Operator requires GPU tolerations The NVIDIA DaemonSets are unable to run their pods without specifying the tolerations given to the GPU nodes in the AcceleratorProfiles. Fixes nerc-project/operations#913 --- .../clusterpolicy/clusterpolicy_patch.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nvidia-gpu-operator/overlays/nerc-ocp-test/clusterpolicy/clusterpolicy_patch.yaml b/nvidia-gpu-operator/overlays/nerc-ocp-test/clusterpolicy/clusterpolicy_patch.yaml index dbfdcc08..3e9ce989 100644 --- a/nvidia-gpu-operator/overlays/nerc-ocp-test/clusterpolicy/clusterpolicy_patch.yaml +++ b/nvidia-gpu-operator/overlays/nerc-ocp-test/clusterpolicy/clusterpolicy_patch.yaml @@ -9,3 +9,13 @@ spec: config: default: all-disabled name: test-mig-parted-config + daemonsets: + tolerations: + - effect: NoSchedule + key: nvidia.com/gpu.product + operator: Equal + value: NVIDIA-A100-SXM4-40GB + - effect: NoSchedule + key: nvidia.com/gpu.product + operator: Equal + value: Tesla-V100-PCIE-32GB