diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fd4c5e39..e0f03883 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,7 +9,12 @@ stages: perlmutter-no-cache-build: stage: build - when: manual + rules: + - if: '$METRICS == null' + when: manual + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ allow_failure: false tags: - perlmutter @@ -32,7 +37,12 @@ perlmutter-no-cache-build: perlmutter-cache-build: stage: build - when: manual + rules: + - if: '$METRICS == null' + when: manual + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ allow_failure: false tags: - perlmutter @@ -60,6 +70,9 @@ perlmutter-cache-build: perlmutter-no-cache-parallel-pdc: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -80,6 +93,9 @@ perlmutter-no-cache-parallel-pdc: perlmutter-no-cache-parallel-obj: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -101,6 +117,9 @@ perlmutter-no-cache-parallel-obj: perlmutter-no-cache-parallel-cont: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -122,6 +141,9 @@ perlmutter-no-cache-parallel-cont: perlmutter-no-cache-parallel-prop: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -143,6 +165,9 @@ perlmutter-no-cache-parallel-prop: perlmutter-no-cache-parallel-region: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -164,6 +189,9 @@ perlmutter-no-cache-parallel-region: perlmutter-no-cache-parallel-region-all: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -190,6 +218,9 @@ perlmutter-no-cache-parallel-region-all: perlmutter-cache-parallel-pdc: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -210,6 +241,9 @@ perlmutter-cache-parallel-pdc: perlmutter-cache-parallel-obj: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -231,6 +265,9 @@ perlmutter-cache-parallel-obj: perlmutter-cache-parallel-cont: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -252,6 +289,9 @@ perlmutter-cache-parallel-cont: perlmutter-cache-parallel-prop: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -273,6 +313,9 @@ perlmutter-cache-parallel-prop: perlmutter-cache-parallel-region: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -294,6 +337,9 @@ perlmutter-cache-parallel-region: perlmutter-cache-parallel-region-all: stage: test + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == null' needs: @@ -313,26 +359,54 @@ perlmutter-cache-parallel-region-all: - ctest -L parallel_region_transfer_all - rm -rf ${PDC_TMPDIR} ${PDC_DATA_LOC} +perlmutter-metrics-build: + stage: build + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ + rules: + - if: '$METRICS == "true"' + allow_failure: false + tags: + - perlmutter + variables: + SCHEDULER_PARAMETERS: "-A m2621 --qos=debug --constraint=cpu --tasks-per-node=64 -N 1 -t 00:30:00" + SUPERCOMPUTER: "perlmutter" + MERCURY_DIR: "/global/cfs/cdirs/m2621/pdc-perlmutter/mercury/install" + script: + - module load libfabric/1.15.2.0 + - module list + - mkdir -p ${PDC_BUILD_PATH}/perlmutter/metrics + - cd ${PDC_BUILD_PATH}/perlmutter/metrics + - cmake ../../.. -DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DPDC_SERVER_CACHE=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=$PDC_DIR -DPDC_ENABLE_MPI=ON -DMERCURY_DIR=$MERCURY_DIR -DCMAKE_C_COMPILER=cc -DMPI_RUN_CMD="srun -A m2621 --qos=debug --constraint=cpu --tasks-per-node=64" -DCMAKE_INSTALL_PREFIX=${PDC_INSTALL_PATH}/perlmutter/metrics + - make -j + - make install + artifacts: + paths: + - ${PDC_BUILD_PATH}/perlmutter/metrics + - ${PDC_INSTALL_PATH}/perlmutter/metrics + perlmutter-metrics: stage: metrics + id_tokens: + SITE_ID_TOKEN: + aud: https://software.nersc.gov/ rules: - if: '$METRICS == "true"' needs: - - perlmutter-cache-build + - perlmutter-metrics-build tags: - perlmutter variables: - PDC_N_NODES: 64 - PDC_N_CLIENTS: 127 - SCHEDULER_PARAMETERS: "-A m2621 --qos=regular --constraint=cpu --tasks-per-node=${PDC_N_CLIENTS} -N ${PDC_N_NODES} -t 00:30:00" + SCHEDULER_PARAMETERS: "-A m2621 --qos=${PDC_QUEUE} --constraint=cpu --tasks-per-node=${PDC_N_CLIENTS} -N ${PDC_N_NODES} -t 00:30:00" SUPERCOMPUTER: "perlmutter" MERCURY_DIR: "/global/cfs/cdirs/m2621/pdc-perlmutter/mercury/install" PDC_TMPDIR: "${PDC_BUILD_PATH}/pdc-tmp-metrics" PDC_DATA_LOC: "${PDC_BUILD_PATH}/pdc-data-metrics" PDC_CLIENT_LOOKUP: "NONE" - PDC_SERVER: "${PDC_BUILD_PATH}/perlmutter/cache/bin/pdc_server.exe" - PDC_SERVER_CLOSE: "${PDC_BUILD_PATH}/perlmutter/cache/bin/close_server" - PDC_CLIENT: "${PDC_BUILD_PATH}/perlmutter/cache/bin/vpicio_mts" + PDC_SERVER: "${PDC_BUILD_PATH}/perlmutter/metrics/bin/pdc_server.exe" + PDC_SERVER_CLOSE: "${PDC_BUILD_PATH}/perlmutter/metrics/bin/close_server" + PDC_CLIENT: "${PDC_BUILD_PATH}/perlmutter/metrics/bin/vpicio_mts" PDC_JOB_OUTPUT: "pdc-metrics.log" script: - hostname