Perlmutter (NERSC): Finish Affinity Control (#3495)

Slurm at Perlmutter is not yet configured to do proper affinity control itself. Thus, we do it ourselves. Co-authored-by: Kevin Gott <kngott@lbl.gov>
ECP-WarpX · Nov 2, 2022 · 51041aa · 51041aa
1 parent 9bc04e1
commit 51041aa
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 17 deletions.
diff --git a/Tools/machines/perlmutter-nersc/perlmutter.sbatch b/Tools/machines/perlmutter-nersc/perlmutter.sbatch
@@ -6,35 +6,36 @@
 #
 # License: BSD-3-Clause-LBNL
 
-#SBATCH -t 01:00:00
-#SBATCH -N 4
+#SBATCH -t 00:10:00
+#SBATCH -N 2
 #SBATCH -J WarpX
 #    note: <proj> must end on _g
 #SBATCH -A <proj>
 #SBATCH -q regular
 #SBATCH -C gpu
 #SBATCH --exclusive
-#SBATCH --ntasks-per-gpu=1
+#SBATCH --gpu-bind=none
 #SBATCH --gpus-per-node=4
 #SBATCH -o WarpX.o%j
 #SBATCH -e WarpX.e%j
 
-# GPU-aware MPI
-export MPICH_GPU_SUPPORT_ENABLED=1
+# executable & inputs file or python interpreter & PICMI script here
+EXE=./warpx
+INPUTS=inputs_small
+
+# pin to closest NIC to GPU
 export MPICH_OFI_NIC_POLICY=GPU
 
 # threads for OpenMP and threaded compressors per MPI rank
 export SRUN_CPUS_PER_TASK=32
 
-EXE=./warpx
-#EXE=../WarpX/build/bin/warpx.3d.MPI.CUDA.DP.OPMD.QED
-#EXE=./main3d.gnu.TPROF.MPI.CUDA.ex
-INPUTS=inputs_small
+# depends on https://github.com/ECP-WarpX/WarpX/issues/2009
+#GPU_AWARE_MPI="amrex.the_arena_is_managed=1 amrex.use_gpu_aware_mpi=1"
+GPU_AWARE_MPI=""
 
 # CUDA visible devices are ordered inverse to local task IDs
-srun /bin/bash -l -c "  \
-    export CUDA_VISIBLE_DEVICES=$((3-SLURM_LOCALID));
-    ${EXE} ${INPUTS} \
-      amrex.the_arena_is_managed=0 \
-      amrex.use_gpu_aware_mpi=1"   \
+#   Reference: nvidia-smi topo -m
+srun --cpu-bind=cores bash -c "
+    export CUDA_VISIBLE_DEVICES=\$((3-SLURM_LOCALID));
+    ${EXE} ${INPUTS} ${GPU_AWARE_MPI}" \
   > output.txt
diff --git a/Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example b/Tools/machines/perlmutter-nersc/perlmutter_warpx.profile.example
@@ -34,9 +34,6 @@ alias getNode="salloc -N 1 --ntasks-per-node=4 -t 1:00:00 -q interactive -C gpu
 #   usage: runNode <command>
 alias runNode="srun -N 1 --ntasks-per-node=4 -t 0:30:00 -q interactive -C gpu --gpu-bind=single:1 -c 32 -G 4 -A $proj"
 
-# GPU-aware MPI
-export MPICH_GPU_SUPPORT_ENABLED=1
-
 # necessary to use CUDA-Aware MPI and run a job
 export CRAY_ACCEL_TARGET=nvidia80