diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile b/03-H_Multi_GPU_Parallelization/.master/Makefile.in similarity index 81% rename from 03-H_Multi_GPU_Parallelization/.master/Makefile rename to 03-H_Multi_GPU_Parallelization/.master/Makefile.in index 3af03d1..e15d85c 100644 --- a/03-H_Multi_GPU_Parallelization/.master/Makefile +++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 03H-@@TASKSOL@@ +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --gres=gpu:4 --ntasks-per-node 4 @@ -33,10 +35,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/03-H_Multi_GPU_Parallelization/.master/copy.mk b/03-H_Multi_GPU_Parallelization/.master/copy.mk index bf1a86e..895d460 100755 --- a/03-H_Multi_GPU_Parallelization/.master/copy.mk +++ b/03-H_Multi_GPU_Parallelization/.master/copy.mk @@ -1,13 +1,13 @@ #!/usr/bin/make -f -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. TASKDIR = ../tasks/ SOLUTIONDIR = ../solutions/ -OPT_SOLUTIONDIR = ../solutions/advanced +OPT_SOLUTIONDIR = ../solutions/advanced/ IYPNB_TEMPLATE = ../../.template.json PROCESSFILES = jacobi.cu -COPYFILES = Makefile Instructions.ipynb Instructions.md +COPYFILES = Instructions.ipynb Instructions.md TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES)) @@ -16,12 +16,19 @@ SOLUTIONPROCCESFILES = $(addprefix $(SOLUTIONDIR)/,$(PROCESSFILES)) OPT_SOLUTIONPROCCESFILES = $(addprefix $(OPT_SOLUTIONDIR)/,$(PROCESSFILES)) SOLUTIONCOPYFILES = $(addprefix $(SOLUTIONDIR)/,$(COPYFILES)) OPT_SOLUTIONCOPYFILES = $(addprefix $(OPT_SOLUTIONDIR)/,$(COPYFILES)) +MAKEFILES = $(addsuffix /Makefile,$(TASKDIR) $(SOLUTIONDIR) $(OPT_SOLUTIONDIR)) .PHONY: all task all: task -task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${OPT_SOLUTIONPROCCESFILES} ${OPT_SOLUTIONCOPYFILES} - +task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${OPT_SOLUTIONPROCCESFILES} ${OPT_SOLUTIONCOPYFILES} ${MAKEFILES} + +$(TASKDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/task/' $< > $@ +$(SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/sol/' $< > $@ +$(OPT_SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/solopt/' $< > $@ ${TASKPROCCESFILES}: $(PROCESSFILES) mkdir -p $(TASKDIR)/ diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile index 3af03d1..92f033c 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 03H-sol +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --gres=gpu:4 --ntasks-per-node 4 @@ -33,10 +35,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile index 3af03d1..a6399eb 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 03H-solopt +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --gres=gpu:4 --ntasks-per-node 4 @@ -33,10 +35,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile index 3af03d1..d293686 100644 --- a/03-H_Multi_GPU_Parallelization/tasks/Makefile +++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 03H-task +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --gres=gpu:4 --ntasks-per-node 4 @@ -33,10 +35,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb index db1ff6d..ecb75fe 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb +++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb @@ -44,7 +44,9 @@ " target (`make profile`)\n", "3. Open the recorded profile in the GUI\n", " - Either: Install Nsight Systems locally, and transfer the\n", - " .qdrep/.nsys-rep file\n", + " .nsys-rep file.\n", + " - *Note*: Right-click in file-browser, choose “Download” from\n", + " context menu\n", " - Or: By running Xpra in your browser: In Jupyter, select “File \\>\n", " New Launcher” and “Xpra Desktop”, which will open in a new tab.\n", " Don’t forget to source the environment in your `xterm`.\n", @@ -84,7 +86,7 @@ "- Destroy the additional cuda streams and events before ending the\n", " application" ], - "id": "7563d35d-a670-47af-acef-44cee0450930" + "id": "21f77d33-b675-4746-9241-24837f172b29" } ], "nbformat": 4, diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md index f8f3dcb..80cd443 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md +++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md @@ -31,7 +31,8 @@ Use the Nsight System profiler to profile the starting point version non-Overlap 1. Start by compiling and running the application with `make run` 1. Record an Nsight Systems profile, using the appropriate Makefile target (`make profile`) 1. Open the recorded profile in the GUI - - Either: Install Nsight Systems locally, and transfer the .qdrep/.nsys-rep file + - Either: Install Nsight Systems locally, and transfer the .nsys-rep file. + - *Note*: Right-click in file-browser, choose "Download" from context menu - Or: By running Xpra in your browser: In Jupyter, select "File > New Launcher" and "Xpra Desktop", which will open in a new tab. Don't forget to source the environment in your `xterm`. 1. Familiarize yourself with the different rows and the traces they represent. - See if you can correlate a CUDA API kernel launch call and the resulting kernel execution on the device diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in similarity index 81% rename from 06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile rename to 06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in index 7b83697..b7ce7a5 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile +++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 06H-@@TASKSOL@@ +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc MPICXX=mpicxx @@ -34,10 +36,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk b/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk index b621217..4d8511a 100755 --- a/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk +++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk @@ -1,23 +1,28 @@ #!/usr/bin/make -f -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. TASKDIR = ../tasks/ SOLUTIONDIR = ../solutions/ IYPNB_TEMPLATE = ../../.template.json PROCESSFILES = jacobi.cpp -COPYFILES = Makefile Instructions.ipynb Instructions.md jacobi_kernels.cu +COPYFILES = Instructions.ipynb Instructions.md jacobi_kernels.cu TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES)) TASKCOPYFILES = $(addprefix $(TASKDIR)/,$(COPYFILES)) SOLUTIONPROCCESFILES = $(addprefix $(SOLUTIONDIR)/,$(PROCESSFILES)) SOLUTIONCOPYFILES = $(addprefix $(SOLUTIONDIR)/,$(COPYFILES)) +MAKEFILES = $(addsuffix /Makefile,$(TASKDIR) $(SOLUTIONDIR)) .PHONY: all task clean all: task -task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} +task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${MAKEFILES} +$(TASKDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/task/' $< > $@ +$(SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/sol/' $< > $@ ${TASKPROCCESFILES}: $(PROCESSFILES) mkdir -p $(TASKDIR)/ diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb index db1ff6d..ecb75fe 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb +++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb @@ -44,7 +44,9 @@ " target (`make profile`)\n", "3. Open the recorded profile in the GUI\n", " - Either: Install Nsight Systems locally, and transfer the\n", - " .qdrep/.nsys-rep file\n", + " .nsys-rep file.\n", + " - *Note*: Right-click in file-browser, choose “Download” from\n", + " context menu\n", " - Or: By running Xpra in your browser: In Jupyter, select “File \\>\n", " New Launcher” and “Xpra Desktop”, which will open in a new tab.\n", " Don’t forget to source the environment in your `xterm`.\n", @@ -84,7 +86,7 @@ "- Destroy the additional cuda streams and events before ending the\n", " application" ], - "id": "7563d35d-a670-47af-acef-44cee0450930" + "id": "21f77d33-b675-4746-9241-24837f172b29" } ], "nbformat": 4, diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md index f8f3dcb..80cd443 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md +++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md @@ -31,7 +31,8 @@ Use the Nsight System profiler to profile the starting point version non-Overlap 1. Start by compiling and running the application with `make run` 1. Record an Nsight Systems profile, using the appropriate Makefile target (`make profile`) 1. Open the recorded profile in the GUI - - Either: Install Nsight Systems locally, and transfer the .qdrep/.nsys-rep file + - Either: Install Nsight Systems locally, and transfer the .nsys-rep file. + - *Note*: Right-click in file-browser, choose "Download" from context menu - Or: By running Xpra in your browser: In Jupyter, select "File > New Launcher" and "Xpra Desktop", which will open in a new tab. Don't forget to source the environment in your `xterm`. 1. Familiarize yourself with the different rows and the traces they represent. - See if you can correlate a CUDA API kernel launch call and the resulting kernel execution on the device diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile index 7b83697..a8ee71a 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile +++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 06H-sol +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc MPICXX=mpicxx @@ -34,10 +36,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb index db1ff6d..ecb75fe 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb +++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb @@ -44,7 +44,9 @@ " target (`make profile`)\n", "3. Open the recorded profile in the GUI\n", " - Either: Install Nsight Systems locally, and transfer the\n", - " .qdrep/.nsys-rep file\n", + " .nsys-rep file.\n", + " - *Note*: Right-click in file-browser, choose “Download” from\n", + " context menu\n", " - Or: By running Xpra in your browser: In Jupyter, select “File \\>\n", " New Launcher” and “Xpra Desktop”, which will open in a new tab.\n", " Don’t forget to source the environment in your `xterm`.\n", @@ -84,7 +86,7 @@ "- Destroy the additional cuda streams and events before ending the\n", " application" ], - "id": "7563d35d-a670-47af-acef-44cee0450930" + "id": "21f77d33-b675-4746-9241-24837f172b29" } ], "nbformat": 4, diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md index f8f3dcb..80cd443 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md +++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md @@ -31,7 +31,8 @@ Use the Nsight System profiler to profile the starting point version non-Overlap 1. Start by compiling and running the application with `make run` 1. Record an Nsight Systems profile, using the appropriate Makefile target (`make profile`) 1. Open the recorded profile in the GUI - - Either: Install Nsight Systems locally, and transfer the .qdrep/.nsys-rep file + - Either: Install Nsight Systems locally, and transfer the .nsys-rep file. + - *Note*: Right-click in file-browser, choose "Download" from context menu - Or: By running Xpra in your browser: In Jupyter, select "File > New Launcher" and "Xpra Desktop", which will open in a new tab. Don't forget to source the environment in your `xterm`. 1. Familiarize yourself with the different rows and the traces they represent. - See if you can correlate a CUDA API kernel launch call and the resulting kernel execution on the device diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile index 7b83697..36da1bf 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile +++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 06H-task +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc MPICXX=mpicxx @@ -34,10 +36,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile b/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in similarity index 81% rename from 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile rename to 08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in index b5b9a9b..d4d3e55 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile +++ b/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in @@ -1,4 +1,6 @@ -# Copyright (c) 2021,2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 08H-NCCL-@@TASKSOL@@ +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -35,10 +37,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx --cuda-graph-trace=node -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/copy.mk b/08-H_NCCL_NVSHMEM/.master/NCCL/copy.mk index b13d4df..3b55268 100755 --- a/08-H_NCCL_NVSHMEM/.master/NCCL/copy.mk +++ b/08-H_NCCL_NVSHMEM/.master/NCCL/copy.mk @@ -1,23 +1,28 @@ #!/usr/bin/make -f -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. TASKDIR = ../../tasks/NCCL SOLUTIONDIR = ../../solutions/NCCL IYPNB_TEMPLATE = ../../../.template.json PROCESSFILES = jacobi.cpp -COPYFILES = Makefile jacobi_kernels.cu Instructions.ipynb Instructions.md +COPYFILES = jacobi_kernels.cu Instructions.ipynb Instructions.md TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES)) TASKCOPYFILES = $(addprefix $(TASKDIR)/,$(COPYFILES)) SOLUTIONPROCCESFILES = $(addprefix $(SOLUTIONDIR)/,$(PROCESSFILES)) SOLUTIONCOPYFILES = $(addprefix $(SOLUTIONDIR)/,$(COPYFILES)) +MAKEFILES = $(addsuffix /Makefile,$(TASKDIR) $(SOLUTIONDIR)) .PHONY: all task all: task -task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} +task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${MAKEFILES} +$(TASKDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/task/' $< > $@ +$(SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/sol/' $< > $@ ${TASKPROCCESFILES}: $(PROCESSFILES) mkdir -p $(TASKDIR)/ diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in similarity index 85% rename from 08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile rename to 08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in index 9af556c..8aad9e0 100644 --- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile +++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, All rights reserved. +THIS_TASK := 08H-NVSHMEM-@@TASKSOL@@ +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc N_D_C_VMM=1 #Enabled to hide warning and errors only found in NVSHMEM/2.5.0 to be fixed in next release @@ -38,10 +40,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/copy.mk b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/copy.mk index a4c37c2..4d62bd5 100755 --- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/copy.mk +++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/copy.mk @@ -1,23 +1,28 @@ #!/usr/bin/make -f -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. TASKDIR = ../../tasks/NVSHMEM SOLUTIONDIR = ../../solutions/NVSHMEM IYPNB_TEMPLATE = ../../../.template.json PROCESSFILES = jacobi.cu -COPYFILES = Makefile Instructions.ipynb Instructions.md +COPYFILES = Instructions.ipynb Instructions.md TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES)) TASKCOPYFILES = $(addprefix $(TASKDIR)/,$(COPYFILES)) SOLUTIONPROCCESFILES = $(addprefix $(SOLUTIONDIR)/,$(PROCESSFILES)) SOLUTIONCOPYFILES = $(addprefix $(SOLUTIONDIR)/,$(COPYFILES)) +MAKEFILES = $(addsuffix /Makefile,$(TASKDIR) $(SOLUTIONDIR)) .PHONY: all task all: task -task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} +task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${MAKEFILES} +$(TASKDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/task/' $< > $@ +$(SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/sol/' $< > $@ ${TASKPROCCESFILES}: $(PROCESSFILES) mkdir -p $(TASKDIR)/ diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile b/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile index 390dbca..d38957b 100644 --- a/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile +++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 08H-NCCL-sol +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -35,10 +37,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile index 9af556c..823b736 100644 --- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile +++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, All rights reserved. +THIS_TASK := 08H-NVSHMEM-sol +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc N_D_C_VMM=1 #Enabled to hide warning and errors only found in NVSHMEM/2.5.0 to be fixed in next release @@ -38,10 +40,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile b/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile index 390dbca..2d0fb22 100644 --- a/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile +++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 08H-NCCL-task +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -35,10 +37,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile index 9af556c..7c57e3e 100644 --- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile +++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, All rights reserved. +THIS_TASK := 08H-NVSHMEM-task +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc N_D_C_VMM=1 #Enabled to hide warning and errors only found in NVSHMEM/2.5.0 to be fixed in next release @@ -38,10 +40,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in similarity index 83% rename from 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile rename to 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in index e5aee1c..1917f62 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 10H-NVSHMEM-@@TASKSOL@@ +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -37,10 +39,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk index fdc71c0..02996b9 100755 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk @@ -1,23 +1,28 @@ #!/usr/bin/make -f -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. TASKDIR = ../../tasks/Device-initiated_Communication_with_NVSHMEM SOLUTIONDIR = ../../solutions/Device-initiated_Communication_with_NVSHMEM IYPNB_TEMPLATE = ../../../.template.json PROCESSFILES = jacobi.cu -COPYFILES = Makefile Instructions.ipynb Instructions.md +COPYFILES = Instructions.ipynb Instructions.md TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES)) TASKCOPYFILES = $(addprefix $(TASKDIR)/,$(COPYFILES)) SOLUTIONPROCCESFILES = $(addprefix $(SOLUTIONDIR)/,$(PROCESSFILES)) SOLUTIONCOPYFILES = $(addprefix $(SOLUTIONDIR)/,$(COPYFILES)) +MAKEFILES = $(addsuffix /Makefile,$(TASKDIR) $(SOLUTIONDIR)) .PHONY: all task all: task -task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} +task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${MAKEFILES} +$(TASKDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/task/' $< > $@ +$(SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/sol/' $< > $@ ${TASKPROCCESFILES}: $(PROCESSFILES) mkdir -p $(TASKDIR)/ diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in similarity index 80% rename from 08-H_NCCL_NVSHMEM/.master/NCCL/Makefile rename to 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in index 390dbca..7aa2d9c 100644 --- a/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in @@ -1,4 +1,6 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 10H-Graphs-@@TASKSOL@@ +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -35,10 +37,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx --cuda-graph-trace=node -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/copy.mk b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/copy.mk index 88bd204..a8ac791 100755 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/copy.mk +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/copy.mk @@ -1,23 +1,28 @@ #!/usr/bin/make -f -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. TASKDIR = ../../tasks/Using_CUDA_Graphs SOLUTIONDIR = ../../solutions/Using_CUDA_Graphs IYPNB_TEMPLATE = ../../../.template.json PROCESSFILES = jacobi.cpp -COPYFILES = Makefile jacobi_kernels.cu Instructions.ipynb Instructions.md +COPYFILES = jacobi_kernels.cu Instructions.ipynb Instructions.md TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES)) TASKCOPYFILES = $(addprefix $(TASKDIR)/,$(COPYFILES)) SOLUTIONPROCCESFILES = $(addprefix $(SOLUTIONDIR)/,$(PROCESSFILES)) SOLUTIONCOPYFILES = $(addprefix $(SOLUTIONDIR)/,$(COPYFILES)) +MAKEFILES = $(addsuffix /Makefile,$(TASKDIR) $(SOLUTIONDIR)) .PHONY: all task all: task -task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} +task: ${TASKPROCCESFILES} ${TASKCOPYFILES} ${SOLUTIONPROCCESFILES} ${SOLUTIONCOPYFILES} ${MAKEFILES} +$(TASKDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/task/' $< > $@ +$(SOLUTIONDIR)/Makefile: Makefile.in + sed -e 's/@@TASKSOL@@/sol/' $< > $@ ${TASKPROCCESFILES}: $(PROCESSFILES) mkdir -p $(TASKDIR)/ diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile index e5aee1c..374e98f 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 10H-NVSHMEM-sol +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -37,10 +39,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile index b5b9a9b..06003e2 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2021,2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 10H-Graphs-sol +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -35,10 +37,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx --cuda-graph-trace=node -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx --cuda-graph-trace=node -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile index e5aee1c..687a121 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 10H-NVSHMEM-task +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -37,10 +39,10 @@ clean: rm -f jacobi jacobi.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + CUDA_VISIBLE_DEVICES=$(C_V_D) $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10 diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile index b5b9a9b..ca0ae2f 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile @@ -1,4 +1,6 @@ -# Copyright (c) 2021,2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. +THIS_TASK := 10H-Graphs-task +OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') NP ?= 1 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 @@ -35,10 +37,10 @@ clean: rm -f jacobi jacobi_kernels.o *.nsys-rep jacobi.*.compute-sanitizer.log sanitize: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file jacobi.%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) compute-sanitizer --log-file $(OUTPUT_NAME).%q{SLURM_PROCID}.compute-sanitizer.log ./jacobi -niter 10 run: jacobi $(JSC_SUBMIT_CMD) -n $(NP) ./jacobi profile: jacobi - $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx --cuda-graph-trace=node -o jacobi.%q{SLURM_PROCID} ./jacobi -niter 10 + $(JSC_SUBMIT_CMD) -n $(NP) nsys profile --trace=mpi,cuda,nvtx --cuda-graph-trace=node -o $(OUTPUT_NAME).%q{SLURM_PROCID} ./jacobi -niter 10