From 871815d04f447e9f6ec2baf6734cb5cfad4a477c Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 7 Jan 2025 15:49:41 +0100 Subject: [PATCH 01/37] ci: enable CI on Lumi --- .github/workflows/gitlab-ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index 4b08fe737..f079e1333 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -103,7 +103,6 @@ jobs: # lumi-init: if: | - false && github.repository_owner == 'earth-system-radiation' && ( github.event_name != 'pull_request' || ( github.event.pull_request.head.repo.owner.login == github.repository_owner && From 0711d4e5eecaa7e8a76041ef593340547569ea04 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 7 Jan 2025 16:21:29 +0100 Subject: [PATCH 02/37] ci: drop commented code --- .github/workflows/gitlab-ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index f079e1333..d5075a82d 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -69,10 +69,8 @@ jobs: config-name: - nvhpc-gpu-openacc-DP - nvhpc-gpu-openacc-SP - #- nag-cpu-default-DP - nag-cpu-default-SP - nag-cpu-accel-DP - #- nag-cpu-accel-SP steps: # # Build, run and check (fetch the log) From 103e70773582072136e9567e8b73f4e7af4cbfba Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 7 Jan 2025 16:23:28 +0100 Subject: [PATCH 03/37] ci: levante: hardcode non-sensitive values --- .github/workflows/gitlab-ci.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index d5075a82d..b6c4feb51 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -43,7 +43,7 @@ jobs: id: g-push-rev uses: "skosukhin/git-ci-hub-lab/g-push-rev@v1" with: - remote-url: ${{ vars.DKRZ_GITLAB_SERVER }}/${{ vars.DKRZ_GITLAB_PROJECT }}.git + remote-url: https://gitlab.dkrz.de/icon-libraries/librte-rrtmgp.git password: ${{ secrets.DKRZ_GITLAB_TOKEN }} ref-type: tag ref-message: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} @@ -55,8 +55,8 @@ jobs: id: gl-trigger-pipeline uses: "skosukhin/git-ci-hub-lab/gl-trigger-pipeline@v1" with: - server-url: ${{ vars.DKRZ_GITLAB_SERVER }} - project-name: ${{ vars.DKRZ_GITLAB_PROJECT }} + server-url: https://gitlab.dkrz.de + project-name: 953 token: ${{ secrets.DKRZ_GITLAB_TRIGGER_TOKEN }} ref-name: ${{ steps.g-push-rev.outputs.ref-name }} expected-sha: ${{ github.sha }} @@ -78,8 +78,8 @@ jobs: - name: Build, run and check (fetch the log) uses: "skosukhin/git-ci-hub-lab/gl-attach-job@v1" with: - server-url: ${{ vars.DKRZ_GITLAB_SERVER }} - project-name: ${{ vars.DKRZ_GITLAB_PROJECT }} + server-url: https://gitlab.dkrz.de + project-name: 953 token: ${{ secrets.DKRZ_GITLAB_TOKEN }} pipeline-id: ${{ needs.levante-init.outputs.pipeline-id }} job-name: ${{ matrix.config-name }} @@ -91,7 +91,7 @@ jobs: steps: - uses: "skosukhin/git-ci-hub-lab/g-delete-ref@v1" with: - remote-url: ${{ vars.DKRZ_GITLAB_SERVER }}/${{ vars.DKRZ_GITLAB_PROJECT }}.git + remote-url: https://gitlab.dkrz.de/icon-libraries/librte-rrtmgp.git password: ${{ secrets.DKRZ_GITLAB_TOKEN }} ref-type: ${{ needs.levante-init.outputs.ref-type }} ref-name: ${{ needs.levante-init.outputs.ref-name }} From 3f9420dbf5bb7efc85de377b3573b4c27395f0be Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 7 Jan 2025 16:23:59 +0100 Subject: [PATCH 04/37] ci: lumi: update and hardcode non-sensitive values --- .github/workflows/gitlab-ci.yml | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index b6c4feb51..48c01d8f4 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -125,8 +125,8 @@ jobs: id: g-push-rev uses: "skosukhin/git-ci-hub-lab/g-push-rev@v1" with: - remote-url: ${{ vars.GITLAB_SERVER }}/${{ vars.GITLAB_PROJECT }}.git - password: ${{ secrets.GITLAB_TOKEN }} + remote-url: https://earth.bsc.es/gitlab/utigerstedt/rte-rrtmgp-lumi-cicd.git + password: ${{ secrets.BSC_GITLAB_TOKEN }} rev-id: ${{ github.sha }} rev-signing-format: ssh rev-signing-key: ${{ secrets.GITLAB_SIGNING_KEY }} @@ -139,9 +139,9 @@ jobs: id: gl-create-pipeline uses: "skosukhin/git-ci-hub-lab/gl-create-pipeline@v1" with: - server-url: ${{ vars.GITLAB_SERVER }} - project-name: ${{ vars.GITLAB_PROJECT }} - token: ${{ secrets.GITLAB_TOKEN }} + server-url: https://earth.bsc.es/gitlab + project-name: 1817 + token: ${{ secrets.BSC_GITLAB_TOKEN }} ref-name: ${{ steps.g-push-rev.outputs.ref-name }} expected-sha: ${{ steps.g-push-rev.outputs.ref-commit }} # @@ -150,9 +150,9 @@ jobs: - name: Set up Python virtual environment (fetch the log) uses: "skosukhin/git-ci-hub-lab/gl-attach-job@v1" with: - server-url: ${{ vars.GITLAB_SERVER }} - project-name: ${{ vars.GITLAB_PROJECT }} - token: ${{ secrets.GITLAB_TOKEN }} + server-url: https://earth.bsc.es/gitlab + project-name: 1817 + token: ${{ secrets.BSC_GITLAB_TOKEN }} pipeline-id: ${{ steps.gl-create-pipeline.outputs.pipeline-id }} job-name: setup-python lumi: @@ -171,9 +171,9 @@ jobs: - name: Build, run and check (fetch the log) uses: "skosukhin/git-ci-hub-lab/gl-attach-job@v1" with: - server-url: ${{ vars.GITLAB_SERVER }} - project-name: ${{ vars.GITLAB_PROJECT }} - token: ${{ secrets.GITLAB_TOKEN }} + server-url: https://earth.bsc.es/gitlab + project-name: 1817 + token: ${{ secrets.BSC_GITLAB_TOKEN }} pipeline-id: ${{ needs.lumi-init.outputs.pipeline-id }} job-name: ${{ matrix.config-name }} lumi-cleanup: @@ -184,16 +184,16 @@ jobs: steps: - uses: "skosukhin/git-ci-hub-lab/gl-cancel-pipeline@v1" with: - server-url: ${{ vars.GITLAB_SERVER }} - project-name: ${{ vars.GITLAB_PROJECT }} - token: ${{ secrets.GITLAB_TOKEN }} + server-url: https://earth.bsc.es/gitlab + project-name: 1817 + token: ${{ secrets.BSC_GITLAB_TOKEN }} pipeline-id: ${{ needs.lumi-init.outputs.pipeline-id }} force: true - uses: "skosukhin/git-ci-hub-lab/gl-delete-ref@v1" with: - server-url: ${{ vars.GITLAB_SERVER }} - project-name: ${{ vars.GITLAB_PROJECT }} - token: ${{ secrets.GITLAB_TOKEN }} + server-url: https://earth.bsc.es/gitlab + project-name: 1817 + token: ${{ secrets.BSC_GITLAB_TOKEN }} ref-type: ${{ needs.lumi-init.outputs.ref-type }} ref-name: ${{ needs.lumi-init.outputs.ref-name }} force: true From e75c9d8b2d978f23776a2d90d38d1e309fb25c87 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 7 Jan 2025 17:26:17 +0100 Subject: [PATCH 05/37] ci: lumi: drop lines of unknown origin --- .gitlab/lumi.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitlab/lumi.yml b/.gitlab/lumi.yml index 12f5603e3..b5b7ed50a 100644 --- a/.gitlab/lumi.yml +++ b/.gitlab/lumi.yml @@ -8,9 +8,6 @@ include: .default: tags: - lumi - id_tokens: - CI_JOB_JWT: - aud: https://gitlab.com variables: TIME_LIMIT: "05:00" From 68f76bd660dd45e4ae36685bfaa3806ca7c0377e Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 7 Jan 2025 18:02:22 +0100 Subject: [PATCH 06/37] ci: avoid string conversion --- .gitlab/common.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/common.yml b/.gitlab/common.yml index 81aac69e4..f3e330cda 100644 --- a/.gitlab/common.yml +++ b/.gitlab/common.yml @@ -1,10 +1,10 @@ .dp: variables: - RTE_ENABLE_SP: OFF + RTE_ENABLE_SP: "OFF" .sp: variables: - RTE_ENABLE_SP: ON + RTE_ENABLE_SP: "ON" .common: variables: From e9d3052079a693c6a5a3b8b7d5965efc15a5cb9d Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 14:39:53 +0100 Subject: [PATCH 07/37] ci: gitlab: enable running via web UI at DKRZ --- .gitlab/levante.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 483e632a5..72f740ed6 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -1,6 +1,7 @@ workflow: rules: - if: $CI_PIPELINE_SOURCE == "trigger" + - if: $CI_PIPELINE_SOURCE == "web" include: - project: 'anw_dienste/ci-templates' From d8c040880c8c249069d5714c2d3d3e209e6cc4dc Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 14:43:41 +0100 Subject: [PATCH 08/37] ci: gitlab: prepare Levante config to be merged with the Lumi one --- .gitlab/levante.yml | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 72f740ed6..682244b0a 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -8,33 +8,35 @@ include: file: '.slurm-ci.yml' - local: '.gitlab/common.yml' -variables: - TIME_LIMIT: "05:00" - SCHEDULER_PARAMETERS: >- - --account=mh0287 - --time=${TIME_LIMIT} - ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: - -.gpu: +.default-levante: extends: .default + variables: + TIME_LIMIT: "05:00" + SCHEDULER_PARAMETERS: >- + --account=mh0287 + --time=${TIME_LIMIT} + ${EXTRA_SCHEDULER_PARAMETERS} + EXTRA_SCHEDULER_PARAMETERS: + +.gpu-levante: + extends: .default-levante variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=gpu --gpus=1 -.cpu: - extends: .default +.cpu-levante: + extends: .default-levante variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=shared -.nvhpc: +.nvhpc-levante: variables: FC: /sw/spack-levante/nvhpc-24.9-p7iohv/Linux_x86_64/24.9/compilers/bin/nvfortran NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.6.1-4wu5wt -.nag: +.nag-levante: variables: FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe @@ -55,8 +57,8 @@ variables: .nvhpc-gpu-openacc: extends: - - .gpu - - .nvhpc + - .gpu-levante + - .nvhpc-levante - .common-levante variables: # Compiler flags used for ICON model: @@ -65,8 +67,8 @@ variables: .nag-cpu: extends: - - .cpu - - .nag + - .cpu-levante + - .nag-levante - .common-levante variables: # Test executables produced in this configuration are too slow to run within From 3110508ae32fbbe9907b0fb1a940c24fae59ba35 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 14:54:16 +0100 Subject: [PATCH 09/37] ci: gitlab: prepare Lumi config to be merged with the Levante one --- .gitlab/lumi.yml | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/.gitlab/lumi.yml b/.gitlab/lumi.yml index b5b7ed50a..ad68c9686 100644 --- a/.gitlab/lumi.yml +++ b/.gitlab/lumi.yml @@ -5,36 +5,35 @@ workflow: include: - local: '.gitlab/common.yml' -.default: +.default-lumi: + variables: + TIME_LIMIT: "05:00" + SCHEDULER_PARAMETERS: >- + --account=project_465000454 + --nodes=1 + --ntasks=1 + --cpus-per-task=4 + --mem-per-cpu=1G + --time=${TIME_LIMIT} + ${EXTRA_SCHEDULER_PARAMETERS} + EXTRA_SCHEDULER_PARAMETERS: tags: - lumi -variables: - TIME_LIMIT: "05:00" - SCHEDULER_PARAMETERS: >- - --account=project_465000454 - --nodes=1 - --ntasks=1 - --cpus-per-task=4 - --mem-per-cpu=1G - --time=${TIME_LIMIT} - ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: - -.gpu: - extends: .default +.gpu-lumi: + extends: .default-lumi variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=dev-g --gpus=1 -.cpu: - extends: .default +.cpu-lumi: + extends: .default-lumi variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=debug -.cce: +.cce-lumi: variables: FC: ftn COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan @@ -49,7 +48,7 @@ variables: setup-python: extends: - - .cpu + - .cpu-lumi - .python-common-lumi script: - test ! -d "${PYHOME}" || exit 0 @@ -81,8 +80,8 @@ setup-python: .cce-gpu-openacc: extends: - - .gpu - - .cce + - .gpu-lumi + - .cce-lumi - .common-lumi variables: # Compiler flags used for ICON model: From d9e4e1bbcb821caadf7b3fb2a51d8f233f5be870 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:00:10 +0100 Subject: [PATCH 10/37] ci: gitlab: merge Levante and Lumi configs --- .gitlab/common.yml | 29 ---------- .gitlab/levante.yml | 126 +++++++++++++++++++++++++++++++++++++++++++- .gitlab/lumi.yml | 101 ----------------------------------- 3 files changed, 125 insertions(+), 131 deletions(-) delete mode 100644 .gitlab/common.yml delete mode 100644 .gitlab/lumi.yml diff --git a/.gitlab/common.yml b/.gitlab/common.yml deleted file mode 100644 index f3e330cda..000000000 --- a/.gitlab/common.yml +++ /dev/null @@ -1,29 +0,0 @@ -.dp: - variables: - RTE_ENABLE_SP: "OFF" - -.sp: - variables: - RTE_ENABLE_SP: "ON" - -.common: - variables: - CMAKE_BUILD_PARALLEL_LEVEL: 8 - VERBOSE: - CTEST_PARALLEL_LEVEL: 8 - CTEST_OUTPUT_ON_FAILURE: 1 - script: - # - # Build libraries, examples and tests - # - - | - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=None \ - -DRTE_ENABLE_SP=$RTE_ENABLE_SP \ - -DKERNEL_MODE=$KERNEL_MODE \ - -DBUILD_TESTING=ON - - cmake --build build - # - # Run examples, tests and checks - # - - ctest --test-dir build diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 682244b0a..f39e1c4d8 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -3,10 +3,39 @@ workflow: - if: $CI_PIPELINE_SOURCE == "trigger" - if: $CI_PIPELINE_SOURCE == "web" +.dp: + variables: + RTE_ENABLE_SP: "OFF" + +.sp: + variables: + RTE_ENABLE_SP: "ON" + +.common: + variables: + CMAKE_BUILD_PARALLEL_LEVEL: 8 + VERBOSE: + CTEST_PARALLEL_LEVEL: 8 + CTEST_OUTPUT_ON_FAILURE: 1 + script: + # + # Build libraries, examples and tests + # + - | + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=None \ + -DRTE_ENABLE_SP=$RTE_ENABLE_SP \ + -DKERNEL_MODE=$KERNEL_MODE \ + -DBUILD_TESTING=ON + - cmake --build build + # + # Run examples, tests and checks + # + - ctest --test-dir build + include: - project: 'anw_dienste/ci-templates' file: '.slurm-ci.yml' - - local: '.gitlab/common.yml' .default-levante: extends: .default @@ -106,3 +135,98 @@ nag-cpu-accel-DP: extends: - .dp - .nag-cpu-accel + +.default-lumi: + variables: + TIME_LIMIT: "05:00" + SCHEDULER_PARAMETERS: >- + --account=project_465000454 + --nodes=1 + --ntasks=1 + --cpus-per-task=4 + --mem-per-cpu=1G + --time=${TIME_LIMIT} + ${EXTRA_SCHEDULER_PARAMETERS} + EXTRA_SCHEDULER_PARAMETERS: + tags: + - lumi + +.gpu-lumi: + extends: .default-lumi + variables: + EXTRA_SCHEDULER_PARAMETERS: >- + --partition=dev-g + --gpus=1 + +.cpu-lumi: + extends: .default-lumi + variables: + EXTRA_SCHEDULER_PARAMETERS: >- + --partition=debug + +.cce-lumi: + variables: + FC: ftn + COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan + +# +# Set up Python virtual environment +# +.python-common-lumi: + variables: + PYHOME: ${CI_PROJECT_DIR}/python-venv + FF_USE_FASTZIP: 1 + +setup-python: + extends: + - .cpu-lumi + - .python-common-lumi + script: + - test ! -d "${PYHOME}" || exit 0 + - module load cray-python + - python -m venv ${PYHOME} + - ${PYHOME}/bin/python -m pip install --upgrade pip + - ${PYHOME}/bin/python -m pip install dask[array] netCDF4 numpy xarray + cache: + # Update the key to regenerate the virtual environment: + key: python-venv-version-1 + paths: + - ${PYHOME} + artifacts: + paths: + - ${PYHOME} + expire_in: 60 minutes + +.common-lumi: + extends: + - .python-common-lumi + - .common + needs: + - setup-python + before_script: + - module --force purge + - module load ${COMPILER_MODULES} ${EXTRA_COMPILER_MODULES} cray-hdf5 cray-netcdf + # Extend the existing environment variables: + - export PATH="${PYHOME}/bin:${PATH}" + +.cce-gpu-openacc: + extends: + - .gpu-lumi + - .cce-lumi + - .common-lumi + variables: + # Compiler flags used for ICON model: + FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g + KERNEL_MODE: accel + # Convenience variables: + EXTRA_COMPILER_MODULES: craype-accel-amd-gfx90a rocm + +cce-gpu-openacc-DP: + extends: + - .dp + - .cce-gpu-openacc + +cce-gpu-openacc-SP: + extends: + - .sp + - .cce-gpu-openacc diff --git a/.gitlab/lumi.yml b/.gitlab/lumi.yml deleted file mode 100644 index ad68c9686..000000000 --- a/.gitlab/lumi.yml +++ /dev/null @@ -1,101 +0,0 @@ -workflow: - rules: - - if: $CI_PIPELINE_SOURCE == "api" - -include: - - local: '.gitlab/common.yml' - -.default-lumi: - variables: - TIME_LIMIT: "05:00" - SCHEDULER_PARAMETERS: >- - --account=project_465000454 - --nodes=1 - --ntasks=1 - --cpus-per-task=4 - --mem-per-cpu=1G - --time=${TIME_LIMIT} - ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: - tags: - - lumi - -.gpu-lumi: - extends: .default-lumi - variables: - EXTRA_SCHEDULER_PARAMETERS: >- - --partition=dev-g - --gpus=1 - -.cpu-lumi: - extends: .default-lumi - variables: - EXTRA_SCHEDULER_PARAMETERS: >- - --partition=debug - -.cce-lumi: - variables: - FC: ftn - COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan - -# -# Set up Python virtual environment -# -.python-common-lumi: - variables: - PYHOME: ${CI_PROJECT_DIR}/python-venv - FF_USE_FASTZIP: 1 - -setup-python: - extends: - - .cpu-lumi - - .python-common-lumi - script: - - test ! -d "${PYHOME}" || exit 0 - - module load cray-python - - python -m venv ${PYHOME} - - ${PYHOME}/bin/python -m pip install --upgrade pip - - ${PYHOME}/bin/python -m pip install dask[array] netCDF4 numpy xarray - cache: - # Update the key to regenerate the virtual environment: - key: python-venv-version-1 - paths: - - ${PYHOME} - artifacts: - paths: - - ${PYHOME} - expire_in: 60 minutes - -.common-lumi: - extends: - - .python-common-lumi - - .common - needs: - - setup-python - before_script: - - module --force purge - - module load ${COMPILER_MODULES} ${EXTRA_COMPILER_MODULES} cray-hdf5 cray-netcdf - # Extend the existing environment variables: - - export PATH="${PYHOME}/bin:${PATH}" - -.cce-gpu-openacc: - extends: - - .gpu-lumi - - .cce-lumi - - .common-lumi - variables: - # Compiler flags used for ICON model: - FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g - KERNEL_MODE: accel - # Convenience variables: - EXTRA_COMPILER_MODULES: craype-accel-amd-gfx90a rocm - -cce-gpu-openacc-DP: - extends: - - .dp - - .cce-gpu-openacc - -cce-gpu-openacc-SP: - extends: - - .sp - - .cce-gpu-openacc From a65a88d27b25f1527c7c4fb0d52a052433a91e4d Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:05:48 +0100 Subject: [PATCH 11/37] ci: gitlab: set id_tokens for Lumi jobs --- .gitlab/levante.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index f39e1c4d8..5dd79d3aa 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -148,6 +148,9 @@ nag-cpu-accel-DP: --time=${TIME_LIMIT} ${EXTRA_SCHEDULER_PARAMETERS} EXTRA_SCHEDULER_PARAMETERS: + id_tokens: + CI_JOB_JWT: + aud: https://gitlab.dkrz.de tags: - lumi From fa6f6f713646e1c7084a33344be923adeec22141 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:31:10 +0100 Subject: [PATCH 12/37] ci: gitlab: formatting --- .gitlab/levante.yml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 5dd79d3aa..ea0df73ee 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -18,19 +18,13 @@ workflow: CTEST_PARALLEL_LEVEL: 8 CTEST_OUTPUT_ON_FAILURE: 1 script: - # - # Build libraries, examples and tests - # - - | - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=None \ - -DRTE_ENABLE_SP=$RTE_ENABLE_SP \ - -DKERNEL_MODE=$KERNEL_MODE \ - -DBUILD_TESTING=ON + - > + cmake -S . -B build + -DCMAKE_BUILD_TYPE=None + -DRTE_ENABLE_SP=${RTE_ENABLE_SP} + -DKERNEL_MODE=${KERNEL_MODE} + -DBUILD_TESTING=ON - cmake --build build - # - # Run examples, tests and checks - # - ctest --test-dir build include: From de9bd98758e9046962b7fa6283f9a1bb780cc4de Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:34:02 +0100 Subject: [PATCH 13/37] ci: gitlab: rename hidden jobs --- .gitlab/levante.yml | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index ea0df73ee..6a5f74ed8 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -11,7 +11,7 @@ workflow: variables: RTE_ENABLE_SP: "ON" -.common: +.build-and-test: variables: CMAKE_BUILD_PARALLEL_LEVEL: 8 VERBOSE: @@ -64,8 +64,8 @@ include: FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe -.common-levante: - extends: .common +.build-and-test-levante: + extends: .build-and-test variables: PYHOME: /sw/spack-levante/mambaforge-22.9.0-2-Linux-x86_64-kptncg # Suppress an irrelevant but annoying error message: @@ -82,7 +82,7 @@ include: extends: - .gpu-levante - .nvhpc-levante - - .common-levante + - .build-and-test-levante variables: # Compiler flags used for ICON model: FFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda12.6 @@ -92,7 +92,7 @@ include: extends: - .cpu-levante - .nag-levante - - .common-levante + - .build-and-test-levante variables: # Test executables produced in this configuration are too slow to run within # the default time limit: @@ -166,10 +166,7 @@ nag-cpu-accel-DP: FC: ftn COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan -# -# Set up Python virtual environment -# -.python-common-lumi: +.python-lumi: variables: PYHOME: ${CI_PROJECT_DIR}/python-venv FF_USE_FASTZIP: 1 @@ -177,7 +174,7 @@ nag-cpu-accel-DP: setup-python: extends: - .cpu-lumi - - .python-common-lumi + - .python-lumi script: - test ! -d "${PYHOME}" || exit 0 - module load cray-python @@ -194,10 +191,10 @@ setup-python: - ${PYHOME} expire_in: 60 minutes -.common-lumi: +.build-and-test-lumi: extends: - - .python-common-lumi - - .common + - .python-lumi + - .build-and-test needs: - setup-python before_script: @@ -210,7 +207,7 @@ setup-python: extends: - .gpu-lumi - .cce-lumi - - .common-lumi + - .build-and-test-lumi variables: # Compiler flags used for ICON model: FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g From 6e5fe89b01aa7e347f237cdbc37b29a557193282 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:37:01 +0100 Subject: [PATCH 14/37] ci: gitlab: drop trivial hidden jobs --- .gitlab/levante.yml | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 6a5f74ed8..85179f742 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -3,16 +3,9 @@ workflow: - if: $CI_PIPELINE_SOURCE == "trigger" - if: $CI_PIPELINE_SOURCE == "web" -.dp: - variables: - RTE_ENABLE_SP: "OFF" - -.sp: - variables: - RTE_ENABLE_SP: "ON" - .build-and-test: variables: + RTE_ENABLE_SP: "OFF" CMAKE_BUILD_PARALLEL_LEVEL: 8 VERBOSE: CTEST_PARALLEL_LEVEL: 8 @@ -111,24 +104,20 @@ include: KERNEL_MODE: accel nvhpc-gpu-openacc-DP: - extends: - - .dp - - .nvhpc-gpu-openacc + extends: .nvhpc-gpu-openacc nvhpc-gpu-openacc-SP: - extends: - - .sp - - .nvhpc-gpu-openacc - -nag-cpu-default-SP: - extends: - - .sp - - .nag-cpu-default + extends: .nvhpc-gpu-openacc + variables: + RTE_ENABLE_SP: "ON" nag-cpu-accel-DP: - extends: - - .dp - - .nag-cpu-accel + extends: .nag-cpu-accel + +nag-cpu-default-SP: + extends: .nag-cpu-default + variables: + RTE_ENABLE_SP: "ON" .default-lumi: variables: @@ -216,11 +205,9 @@ setup-python: EXTRA_COMPILER_MODULES: craype-accel-amd-gfx90a rocm cce-gpu-openacc-DP: - extends: - - .dp - - .cce-gpu-openacc + extends: .cce-gpu-openacc cce-gpu-openacc-SP: - extends: - - .sp - - .cce-gpu-openacc + extends: .cce-gpu-openacc + variables: + RTE_ENABLE_SP: "ON" From 057880970885528da701488e6256e71a520657da Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:41:19 +0100 Subject: [PATCH 15/37] ci: gitlab: rename more hidden jobs --- .gitlab/levante.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 85179f742..ebea3994e 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -24,7 +24,7 @@ include: - project: 'anw_dienste/ci-templates' file: '.slurm-ci.yml' -.default-levante: +.scheduler-levante: extends: .default variables: TIME_LIMIT: "05:00" @@ -34,15 +34,15 @@ include: ${EXTRA_SCHEDULER_PARAMETERS} EXTRA_SCHEDULER_PARAMETERS: -.gpu-levante: - extends: .default-levante +.scheduler-levante-gpu: + extends: .scheduler-levante variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=gpu --gpus=1 -.cpu-levante: - extends: .default-levante +.scheduler-levante-cpu: + extends: .scheduler-levante variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=shared @@ -73,7 +73,7 @@ include: .nvhpc-gpu-openacc: extends: - - .gpu-levante + - .scheduler-levante-gpu - .nvhpc-levante - .build-and-test-levante variables: @@ -83,7 +83,7 @@ include: .nag-cpu: extends: - - .cpu-levante + - .scheduler-levante-cpu - .nag-levante - .build-and-test-levante variables: @@ -119,7 +119,7 @@ nag-cpu-default-SP: variables: RTE_ENABLE_SP: "ON" -.default-lumi: +.scheduler-lumi: variables: TIME_LIMIT: "05:00" SCHEDULER_PARAMETERS: >- @@ -137,15 +137,15 @@ nag-cpu-default-SP: tags: - lumi -.gpu-lumi: - extends: .default-lumi +.scheduler-lumi-gpu: + extends: .scheduler-lumi variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=dev-g --gpus=1 -.cpu-lumi: - extends: .default-lumi +.scheduler-lumi-cpu: + extends: .scheduler-lumi variables: EXTRA_SCHEDULER_PARAMETERS: >- --partition=debug @@ -162,7 +162,7 @@ nag-cpu-default-SP: setup-python: extends: - - .cpu-lumi + - .scheduler-lumi-cpu - .python-lumi script: - test ! -d "${PYHOME}" || exit 0 @@ -194,7 +194,7 @@ setup-python: .cce-gpu-openacc: extends: - - .gpu-lumi + - .scheduler-lumi-gpu - .cce-lumi - .build-and-test-lumi variables: From b6bc982d3633b73d6b9f9dc70d79ef6740e4b0e1 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 15:59:23 +0100 Subject: [PATCH 16/37] ci: gitlab: drop and combine hidden jobs --- .gitlab/levante.yml | 71 ++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 50 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index ebea3994e..5e8aafaec 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -5,7 +5,6 @@ workflow: .build-and-test: variables: - RTE_ENABLE_SP: "OFF" CMAKE_BUILD_PARALLEL_LEVEL: 8 VERBOSE: CTEST_PARALLEL_LEVEL: 8 @@ -47,16 +46,6 @@ include: EXTRA_SCHEDULER_PARAMETERS: >- --partition=shared -.nvhpc-levante: - variables: - FC: /sw/spack-levante/nvhpc-24.9-p7iohv/Linux_x86_64/24.9/compilers/bin/nvfortran - NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.6.1-4wu5wt - -.nag-levante: - variables: - FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor - NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe - .build-and-test-levante: extends: .build-and-test variables: @@ -71,52 +60,42 @@ include: # Some tests require a large stack: - ulimit -s unlimited -.nvhpc-gpu-openacc: +nvhpc-gpu-openacc-DP: extends: - .scheduler-levante-gpu - - .nvhpc-levante - .build-and-test-levante variables: + FC: /sw/spack-levante/nvhpc-24.9-p7iohv/Linux_x86_64/24.9/compilers/bin/nvfortran + NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.6.1-4wu5wt # Compiler flags used for ICON model: FFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda12.6 KERNEL_MODE: accel + RTE_ENABLE_SP: "OFF" + +nvhpc-gpu-openacc-SP: + extends: nvhpc-gpu-openacc-DP + variables: + RTE_ENABLE_SP: "ON" -.nag-cpu: +nag-cpu-accel-DP: extends: - .scheduler-levante-cpu - - .nag-levante - .build-and-test-levante variables: # Test executables produced in this configuration are too slow to run within # the default time limit: TIME_LIMIT: "10:00" + FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor + NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe # Compiler flags used for ICON model: FFLAGS: -Wc=/sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all - -.nag-cpu-default: - extends: .nag-cpu - variables: - KERNEL_MODE: default - -.nag-cpu-accel: - extends: .nag-cpu - variables: KERNEL_MODE: accel - -nvhpc-gpu-openacc-DP: - extends: .nvhpc-gpu-openacc - -nvhpc-gpu-openacc-SP: - extends: .nvhpc-gpu-openacc - variables: - RTE_ENABLE_SP: "ON" - -nag-cpu-accel-DP: - extends: .nag-cpu-accel + RTE_ENABLE_SP: "OFF" nag-cpu-default-SP: - extends: .nag-cpu-default + extends: nag-cpu-accel-DP variables: + KERNEL_MODE: default RTE_ENABLE_SP: "ON" .scheduler-lumi: @@ -150,11 +129,6 @@ nag-cpu-default-SP: EXTRA_SCHEDULER_PARAMETERS: >- --partition=debug -.cce-lumi: - variables: - FC: ftn - COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan - .python-lumi: variables: PYHOME: ${CI_PROJECT_DIR}/python-venv @@ -188,26 +162,23 @@ setup-python: - setup-python before_script: - module --force purge - - module load ${COMPILER_MODULES} ${EXTRA_COMPILER_MODULES} cray-hdf5 cray-netcdf + - module load ${COMPILER_MODULES} cray-hdf5 cray-netcdf # Extend the existing environment variables: - export PATH="${PYHOME}/bin:${PATH}" -.cce-gpu-openacc: +cce-gpu-openacc-DP: extends: - .scheduler-lumi-gpu - - .cce-lumi - .build-and-test-lumi variables: + FC: ftn + COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan craype-accel-amd-gfx90a rocm # Compiler flags used for ICON model: FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g KERNEL_MODE: accel - # Convenience variables: - EXTRA_COMPILER_MODULES: craype-accel-amd-gfx90a rocm - -cce-gpu-openacc-DP: - extends: .cce-gpu-openacc + RTE_ENABLE_SP: "OFF" cce-gpu-openacc-SP: - extends: .cce-gpu-openacc + extends: cce-gpu-openacc-DP variables: RTE_ENABLE_SP: "ON" From c45822da22047f1f4738b65d76f38aa803b5986e Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 16:36:46 +0100 Subject: [PATCH 17/37] ci: gitlab: combine scheduler hidden jobs --- .gitlab/levante.yml | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 5e8aafaec..be18649f9 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -23,28 +23,23 @@ include: - project: 'anw_dienste/ci-templates' file: '.slurm-ci.yml' -.scheduler-levante: +.scheduler-levante-cpu: extends: .default variables: TIME_LIMIT: "05:00" + PARTITION: shared SCHEDULER_PARAMETERS: >- --account=mh0287 --time=${TIME_LIMIT} + --partition=${PARTITION} ${EXTRA_SCHEDULER_PARAMETERS} EXTRA_SCHEDULER_PARAMETERS: .scheduler-levante-gpu: - extends: .scheduler-levante + extends: .scheduler-levante-cpu variables: - EXTRA_SCHEDULER_PARAMETERS: >- - --partition=gpu - --gpus=1 - -.scheduler-levante-cpu: - extends: .scheduler-levante - variables: - EXTRA_SCHEDULER_PARAMETERS: >- - --partition=shared + PARTITION: gpu + EXTRA_SCHEDULER_PARAMETERS: --gpus=1 .build-and-test-levante: extends: .build-and-test @@ -98,7 +93,7 @@ nag-cpu-default-SP: KERNEL_MODE: default RTE_ENABLE_SP: "ON" -.scheduler-lumi: +.scheduler-lumi-cpu: variables: TIME_LIMIT: "05:00" SCHEDULER_PARAMETERS: >- @@ -108,6 +103,7 @@ nag-cpu-default-SP: --cpus-per-task=4 --mem-per-cpu=1G --time=${TIME_LIMIT} + --partition=debug ${EXTRA_SCHEDULER_PARAMETERS} EXTRA_SCHEDULER_PARAMETERS: id_tokens: @@ -117,17 +113,10 @@ nag-cpu-default-SP: - lumi .scheduler-lumi-gpu: - extends: .scheduler-lumi + extends: .scheduler-lumi-cpu variables: - EXTRA_SCHEDULER_PARAMETERS: >- - --partition=dev-g - --gpus=1 - -.scheduler-lumi-cpu: - extends: .scheduler-lumi - variables: - EXTRA_SCHEDULER_PARAMETERS: >- - --partition=debug + PARTITION: dev-g + EXTRA_SCHEDULER_PARAMETERS: --gpus=1 .python-lumi: variables: From 2b347346713455c3b72f314c399ea65421024349 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 16:38:54 +0100 Subject: [PATCH 18/37] ci: gitlab: reorder jobs --- .gitlab/levante.yml | 116 ++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index be18649f9..fc4b46c71 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -3,22 +3,6 @@ workflow: - if: $CI_PIPELINE_SOURCE == "trigger" - if: $CI_PIPELINE_SOURCE == "web" -.build-and-test: - variables: - CMAKE_BUILD_PARALLEL_LEVEL: 8 - VERBOSE: - CTEST_PARALLEL_LEVEL: 8 - CTEST_OUTPUT_ON_FAILURE: 1 - script: - - > - cmake -S . -B build - -DCMAKE_BUILD_TYPE=None - -DRTE_ENABLE_SP=${RTE_ENABLE_SP} - -DKERNEL_MODE=${KERNEL_MODE} - -DBUILD_TESTING=ON - - cmake --build build - - ctest --test-dir build - include: - project: 'anw_dienste/ci-templates' file: '.slurm-ci.yml' @@ -41,6 +25,47 @@ include: PARTITION: gpu EXTRA_SCHEDULER_PARAMETERS: --gpus=1 +.scheduler-lumi-cpu: + variables: + TIME_LIMIT: "05:00" + SCHEDULER_PARAMETERS: >- + --account=project_465000454 + --nodes=1 + --ntasks=1 + --cpus-per-task=4 + --mem-per-cpu=1G + --time=${TIME_LIMIT} + --partition=debug + ${EXTRA_SCHEDULER_PARAMETERS} + EXTRA_SCHEDULER_PARAMETERS: + id_tokens: + CI_JOB_JWT: + aud: https://gitlab.dkrz.de + tags: + - lumi + +.scheduler-lumi-gpu: + extends: .scheduler-lumi-cpu + variables: + PARTITION: dev-g + EXTRA_SCHEDULER_PARAMETERS: --gpus=1 + +.build-and-test: + variables: + CMAKE_BUILD_PARALLEL_LEVEL: 8 + VERBOSE: + CTEST_PARALLEL_LEVEL: 8 + CTEST_OUTPUT_ON_FAILURE: 1 + script: + - > + cmake -S . -B build + -DCMAKE_BUILD_TYPE=None + -DRTE_ENABLE_SP=${RTE_ENABLE_SP} + -DKERNEL_MODE=${KERNEL_MODE} + -DBUILD_TESTING=ON + - cmake --build build + - ctest --test-dir build + .build-and-test-levante: extends: .build-and-test variables: @@ -55,6 +80,23 @@ include: # Some tests require a large stack: - ulimit -s unlimited +.python-lumi: + variables: + PYHOME: ${CI_PROJECT_DIR}/python-venv + FF_USE_FASTZIP: 1 + +.build-and-test-lumi: + extends: + - .python-lumi + - .build-and-test + needs: + - setup-python + before_script: + - module --force purge + - module load ${COMPILER_MODULES} cray-hdf5 cray-netcdf + # Extend the existing environment variables: + - export PATH="${PYHOME}/bin:${PATH}" + nvhpc-gpu-openacc-DP: extends: - .scheduler-levante-gpu @@ -93,36 +135,6 @@ nag-cpu-default-SP: KERNEL_MODE: default RTE_ENABLE_SP: "ON" -.scheduler-lumi-cpu: - variables: - TIME_LIMIT: "05:00" - SCHEDULER_PARAMETERS: >- - --account=project_465000454 - --nodes=1 - --ntasks=1 - --cpus-per-task=4 - --mem-per-cpu=1G - --time=${TIME_LIMIT} - --partition=debug - ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: - id_tokens: - CI_JOB_JWT: - aud: https://gitlab.dkrz.de - tags: - - lumi - -.scheduler-lumi-gpu: - extends: .scheduler-lumi-cpu - variables: - PARTITION: dev-g - EXTRA_SCHEDULER_PARAMETERS: --gpus=1 - -.python-lumi: - variables: - PYHOME: ${CI_PROJECT_DIR}/python-venv - FF_USE_FASTZIP: 1 - setup-python: extends: - .scheduler-lumi-cpu @@ -143,18 +155,6 @@ setup-python: - ${PYHOME} expire_in: 60 minutes -.build-and-test-lumi: - extends: - - .python-lumi - - .build-and-test - needs: - - setup-python - before_script: - - module --force purge - - module load ${COMPILER_MODULES} cray-hdf5 cray-netcdf - # Extend the existing environment variables: - - export PATH="${PYHOME}/bin:${PATH}" - cce-gpu-openacc-DP: extends: - .scheduler-lumi-gpu From 968789231850ec206fefde4dde6e3ae1aaa506de Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:06:38 +0100 Subject: [PATCH 19/37] ci: gitlab: drop the python setup job --- .gitlab/levante.yml | 62 ++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index fc4b46c71..c278ca23f 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -25,9 +25,10 @@ include: PARTITION: gpu EXTRA_SCHEDULER_PARAMETERS: --gpus=1 -.scheduler-lumi-cpu: +.scheduler-lumi-gpu: variables: TIME_LIMIT: "05:00" + PARTITION: dev-g SCHEDULER_PARAMETERS: >- --account=project_465000454 --nodes=1 @@ -35,21 +36,15 @@ include: --cpus-per-task=4 --mem-per-cpu=1G --time=${TIME_LIMIT} - --partition=debug + --partition=${PARTITION} ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: + EXTRA_SCHEDULER_PARAMETERS: --gpus=1 id_tokens: CI_JOB_JWT: aud: https://gitlab.dkrz.de tags: - lumi -.scheduler-lumi-gpu: - extends: .scheduler-lumi-cpu - variables: - PARTITION: dev-g - EXTRA_SCHEDULER_PARAMETERS: --gpus=1 - .build-and-test: variables: CMAKE_BUILD_PARALLEL_LEVEL: 8 @@ -80,22 +75,29 @@ include: # Some tests require a large stack: - ulimit -s unlimited -.python-lumi: +.build-and-test-lumi: + extends: .build-and-test variables: PYHOME: ${CI_PROJECT_DIR}/python-venv - FF_USE_FASTZIP: 1 - -.build-and-test-lumi: - extends: - - .python-lumi - - .build-and-test - needs: - - setup-python before_script: - module --force purge + - | + # Set up Python environment + test -d "${PYHOME}" || { + set -x + module load cray-python + python -m venv ${PYHOME} + ${PYHOME}/bin/python -m pip install \ + --verbose --no-cache-dir --upgrade \ + pip dask[array] netCDF4 numpy xarray + } + export PATH="${PYHOME}/bin:${PATH}" - module load ${COMPILER_MODULES} cray-hdf5 cray-netcdf - # Extend the existing environment variables: - - export PATH="${PYHOME}/bin:${PATH}" + cache: + # Update the key to regenerate the virtual environment: + key: python-venv-version-1 + paths: + - ${PYHOME} nvhpc-gpu-openacc-DP: extends: @@ -135,26 +137,6 @@ nag-cpu-default-SP: KERNEL_MODE: default RTE_ENABLE_SP: "ON" -setup-python: - extends: - - .scheduler-lumi-cpu - - .python-lumi - script: - - test ! -d "${PYHOME}" || exit 0 - - module load cray-python - - python -m venv ${PYHOME} - - ${PYHOME}/bin/python -m pip install --upgrade pip - - ${PYHOME}/bin/python -m pip install dask[array] netCDF4 numpy xarray - cache: - # Update the key to regenerate the virtual environment: - key: python-venv-version-1 - paths: - - ${PYHOME} - artifacts: - paths: - - ${PYHOME} - expire_in: 60 minutes - cce-gpu-openacc-DP: extends: - .scheduler-lumi-gpu From cff90671e1a34b2021adfde92debecb0fe82a5f5 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:15:43 +0100 Subject: [PATCH 20/37] ci: gitlab: refactore scheduling parameters --- .gitlab/levante.yml | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index c278ca23f..09352170a 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -7,17 +7,25 @@ include: - project: 'anw_dienste/ci-templates' file: '.slurm-ci.yml' -.scheduler-levante-cpu: - extends: .default +.scheduler: variables: + ACCOUNT: + PARTITION: TIME_LIMIT: "05:00" - PARTITION: shared + EXTRA_SCHEDULER_PARAMETERS: SCHEDULER_PARAMETERS: >- - --account=mh0287 - --time=${TIME_LIMIT} + --account=${ACCOUNT} --partition=${PARTITION} + --time=${TIME_LIMIT} ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: + +.scheduler-levante-cpu: + extends: + - .default # from anw_dienste/ci-templates/.slurm-ci.yml + - .scheduler + variables: + ACCOUNT: mh0287 + PARTITION: shared .scheduler-levante-gpu: extends: .scheduler-levante-cpu @@ -26,24 +34,21 @@ include: EXTRA_SCHEDULER_PARAMETERS: --gpus=1 .scheduler-lumi-gpu: + extends: .scheduler variables: - TIME_LIMIT: "05:00" + ACCOUNT: project_465000454 PARTITION: dev-g - SCHEDULER_PARAMETERS: >- - --account=project_465000454 + EXTRA_SCHEDULER_PARAMETERS: >- --nodes=1 --ntasks=1 --cpus-per-task=4 --mem-per-cpu=1G - --time=${TIME_LIMIT} - --partition=${PARTITION} - ${EXTRA_SCHEDULER_PARAMETERS} - EXTRA_SCHEDULER_PARAMETERS: --gpus=1 + --gpus=1 id_tokens: CI_JOB_JWT: aud: https://gitlab.dkrz.de tags: - - lumi + - lumi, hpc, csc .build-and-test: variables: From 493550238e7bd638495f28eacfe9b6952c86d02e Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:17:13 +0100 Subject: [PATCH 21/37] ci: gitlab: drop excessive variable --- .gitlab/levante.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 09352170a..e60e09511 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -97,7 +97,7 @@ include: pip dask[array] netCDF4 numpy xarray } export PATH="${PYHOME}/bin:${PATH}" - - module load ${COMPILER_MODULES} cray-hdf5 cray-netcdf + - module load PrgEnv-cray cce/16.0.1 craype-x86-milan craype-accel-amd-gfx90a rocm cray-hdf5 cray-netcdf cache: # Update the key to regenerate the virtual environment: key: python-venv-version-1 @@ -148,7 +148,6 @@ cce-gpu-openacc-DP: - .build-and-test-lumi variables: FC: ftn - COMPILER_MODULES: PrgEnv-cray cce/16.0.1 craype-x86-milan craype-accel-amd-gfx90a rocm # Compiler flags used for ICON model: FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g KERNEL_MODE: accel From cea22808dd358faae457d3585243f9004b997870 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:18:09 +0100 Subject: [PATCH 22/37] ci: gitlab: increase time limit for all jobs --- .gitlab/levante.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index e60e09511..acbb6c927 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -11,7 +11,7 @@ include: variables: ACCOUNT: PARTITION: - TIME_LIMIT: "05:00" + TIME_LIMIT: "10:00" EXTRA_SCHEDULER_PARAMETERS: SCHEDULER_PARAMETERS: >- --account=${ACCOUNT} @@ -126,9 +126,6 @@ nag-cpu-accel-DP: - .scheduler-levante-cpu - .build-and-test-levante variables: - # Test executables produced in this configuration are too slow to run within - # the default time limit: - TIME_LIMIT: "10:00" FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe # Compiler flags used for ICON model: From 914f5efd99d06d08f8d65092fa495e2783b34fc0 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:33:17 +0100 Subject: [PATCH 23/37] ci: gitlab: adjust git depth on Levante --- .gitlab/levante.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index acbb6c927..8264b1ac8 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -26,6 +26,8 @@ include: variables: ACCOUNT: mh0287 PARTITION: shared + # Levante runners fetch from scratch each time: + GIT_DEPTH: 1 .scheduler-levante-gpu: extends: .scheduler-levante-cpu From 86e0f1c518a89cca738bfea31a0be03405fbe957 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:43:22 +0100 Subject: [PATCH 24/37] Allow for longer arguments in the test programs --- examples/all-sky/rrtmgp_allsky.F90 | 2 +- examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 | 4 ++-- examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 | 4 ++-- tests/check_equivalence.F90 | 2 +- tests/check_variants.F90 | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/all-sky/rrtmgp_allsky.F90 b/examples/all-sky/rrtmgp_allsky.F90 index 53bb547f4..8cb6841cd 100644 --- a/examples/all-sky/rrtmgp_allsky.F90 +++ b/examples/all-sky/rrtmgp_allsky.F90 @@ -102,7 +102,7 @@ program rte_rrtmgp_allsky logical :: do_aerosols = .false. integer, parameter :: ngas = 8 - character(len=256) :: output_file, k_dist_file, cloud_optics_file, aerosol_optics_file + character(len=512) :: output_file, k_dist_file, cloud_optics_file, aerosol_optics_file ! ! Timing variables ! diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 index 07fac9c26..fc0e54bb3 100644 --- a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 +++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 @@ -86,8 +86,8 @@ program rrtmgp_rfmip_lw ! ! Local variables ! - character(len=256) :: rfmip_file = 'multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc' - character(len=132) :: kdist_file = 'coefficients_lw.nc' + character(len=512) :: rfmip_file = 'multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc' + character(len=512) :: kdist_file = 'coefficients_lw.nc' character(len=132) :: flxdn_file, flxup_file integer :: nargs, ncol, nlay, nbnd, nexp, nblocks, block_size, forcing_index, physics_index, n_quad_angles = 1 integer :: b, icol, ibnd diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 index e914f3f93..6e2c9ea02 100644 --- a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 +++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 @@ -86,8 +86,8 @@ program rrtmgp_rfmip_sw ! ! Local variables ! - character(len=256) :: rfmip_file = 'multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc' - character(len=132) :: kdist_file = 'coefficients_sw.nc' + character(len=512) :: rfmip_file = 'multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc' + character(len=512) :: kdist_file = 'coefficients_sw.nc' character(len=132) :: flxdn_file, flxup_file integer :: nargs, ncol, nlay, nbnd, ngpt, nexp, nblocks, block_size, forcing_index integer :: b, icol, ibnd, igpt diff --git a/tests/check_equivalence.F90 b/tests/check_equivalence.F90 index aae500d8a..e097ebfa8 100644 --- a/tests/check_equivalence.F90 +++ b/tests/check_equivalence.F90 @@ -110,7 +110,7 @@ program rte_check_equivalence character(len=32 ), & dimension(:), allocatable :: kdist_gas_names, rfmip_gas_games - character(len=256) :: input_file = "", gas_optics_file = "" + character(len=512) :: input_file = "", gas_optics_file = "" ! ---------------------------------------------------------------------------------- ! Code ! ---------------------------------------------------------------------------------- diff --git a/tests/check_variants.F90 b/tests/check_variants.F90 index 68f62ebdc..425d450de 100644 --- a/tests/check_variants.F90 +++ b/tests/check_variants.F90 @@ -111,7 +111,7 @@ program rte_clear_sky_regression character(len=32 ), & dimension(:), allocatable :: kdist_gas_names, rfmip_gas_games - character(len=256) :: input_file = "", output_file = "", gas_optics_file = "", gas_optics_file_2 = "" + character(len=512) :: input_file = "", output_file = "", gas_optics_file = "", gas_optics_file_2 = "" integer :: ncid, dimid ! ---------------------------------------------------------------------------------- ! Code From 5bcf3bd56d9a3ea38bcf1ee76e69494bc1a5e284 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 17:46:59 +0100 Subject: [PATCH 25/37] ci: gitlab: suppress output from lmod on Lumi --- .gitlab/levante.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 8264b1ac8..78dd8951d 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -91,12 +91,13 @@ include: - | # Set up Python environment test -d "${PYHOME}" || { - set -x module load cray-python + set -x python -m venv ${PYHOME} ${PYHOME}/bin/python -m pip install \ --verbose --no-cache-dir --upgrade \ pip dask[array] netCDF4 numpy xarray + set +x } export PATH="${PYHOME}/bin:${PATH}" - module load PrgEnv-cray cce/16.0.1 craype-x86-milan craype-accel-amd-gfx90a rocm cray-hdf5 cray-netcdf From ef04d131419bc8b02662f717aa5a15a9c483e99b Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:11:19 +0100 Subject: [PATCH 26/37] ci: gitlab: switch to variable recognized by CMake --- .gitlab/levante.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 78dd8951d..97c1fcbc5 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -71,41 +71,38 @@ include: .build-and-test-levante: extends: .build-and-test variables: - PYHOME: /sw/spack-levante/mambaforge-22.9.0-2-Linux-x86_64-kptncg + Python3_ROOT_DIR: /sw/spack-levante/mambaforge-22.9.0-2-Linux-x86_64-kptncg # Suppress an irrelevant but annoying error message: - PROJ_LIB: ${PYHOME}/share/proj + PROJ_LIB: ${Python3_ROOT_DIR}/share/proj before_script: - module purge - module load git - # Extend the existing environment variables: - - export PATH="${PYHOME}/bin:${PATH}" # Some tests require a large stack: - ulimit -s unlimited .build-and-test-lumi: extends: .build-and-test variables: - PYHOME: ${CI_PROJECT_DIR}/python-venv + Python3_ROOT_DIR: ${CI_PROJECT_DIR}/python-venv before_script: - module --force purge - | # Set up Python environment - test -d "${PYHOME}" || { + test -d "${Python3_ROOT_DIR}" || { module load cray-python set -x - python -m venv ${PYHOME} - ${PYHOME}/bin/python -m pip install \ + python -m venv ${Python3_ROOT_DIR} + ${Python3_ROOT_DIR}/bin/python -m pip install \ --verbose --no-cache-dir --upgrade \ pip dask[array] netCDF4 numpy xarray set +x } - export PATH="${PYHOME}/bin:${PATH}" - module load PrgEnv-cray cce/16.0.1 craype-x86-milan craype-accel-amd-gfx90a rocm cray-hdf5 cray-netcdf cache: # Update the key to regenerate the virtual environment: key: python-venv-version-1 paths: - - ${PYHOME} + - ${Python3_ROOT_DIR} nvhpc-gpu-openacc-DP: extends: From 0373ce7fcf8a85f37d5ba2b967fe0c4e04febe16 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:25:59 +0100 Subject: [PATCH 27/37] ci: gitlab: polish the Python environment caching --- .gitlab/levante.yml | 14 +++++++++----- .gitlab/lumi-python-requirement.txt | 4 ++++ 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 .gitlab/lumi-python-requirement.txt diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 97c1fcbc5..7e82c47a6 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -84,6 +84,7 @@ include: extends: .build-and-test variables: Python3_ROOT_DIR: ${CI_PROJECT_DIR}/python-venv + REQUIREMENT_FILE: ${CI_PROJECT_DIR}/.gitlab/lumi-python-requirement.txt before_script: - module --force purge - | @@ -92,15 +93,16 @@ include: module load cray-python set -x python -m venv ${Python3_ROOT_DIR} - ${Python3_ROOT_DIR}/bin/python -m pip install \ - --verbose --no-cache-dir --upgrade \ - pip dask[array] netCDF4 numpy xarray + ${Python3_ROOT_DIR}/bin/python -m pip install --upgrade pip + ${Python3_ROOT_DIR}/bin/python -m pip install --no-cache-dir --requirement ${REQUIREMENT_FILE} set +x } - module load PrgEnv-cray cce/16.0.1 craype-x86-milan craype-accel-amd-gfx90a rocm cray-hdf5 cray-netcdf cache: # Update the key to regenerate the virtual environment: - key: python-venv-version-1 + key: + files: + - ${REQUIREMENT_FILE} paths: - ${Python3_ROOT_DIR} @@ -148,9 +150,11 @@ cce-gpu-openacc-DP: # Compiler flags used for ICON model: FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g KERNEL_MODE: accel - RTE_ENABLE_SP: "OFF" cce-gpu-openacc-SP: extends: cce-gpu-openacc-DP variables: RTE_ENABLE_SP: "ON" + cache: + # Let the other job update the cache: + policy: pull diff --git a/.gitlab/lumi-python-requirement.txt b/.gitlab/lumi-python-requirement.txt new file mode 100644 index 000000000..88bc3e0e0 --- /dev/null +++ b/.gitlab/lumi-python-requirement.txt @@ -0,0 +1,4 @@ +dask[array] +netCDF4 +numpy +xarray From 6d2c4a7191a73634436351a467a4028e3c7afbce Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:30:16 +0100 Subject: [PATCH 28/37] ci: gitlab: set DP by default --- .gitlab/levante.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 7e82c47a6..b3acc21e3 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -54,6 +54,7 @@ include: .build-and-test: variables: + RTE_ENABLE_SP: "OFF" CMAKE_BUILD_PARALLEL_LEVEL: 8 VERBOSE: CTEST_PARALLEL_LEVEL: 8 @@ -116,7 +117,6 @@ nvhpc-gpu-openacc-DP: # Compiler flags used for ICON model: FFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda12.6 KERNEL_MODE: accel - RTE_ENABLE_SP: "OFF" nvhpc-gpu-openacc-SP: extends: nvhpc-gpu-openacc-DP @@ -133,7 +133,6 @@ nag-cpu-accel-DP: # Compiler flags used for ICON model: FFLAGS: -Wc=/sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all KERNEL_MODE: accel - RTE_ENABLE_SP: "OFF" nag-cpu-default-SP: extends: nag-cpu-accel-DP From 6de8dbc19341284470fceb42c093a456fd20dad9 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:32:07 +0100 Subject: [PATCH 29/37] ci: gitlab: set accel by default --- .gitlab/levante.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index b3acc21e3..d9b76a5f7 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -54,6 +54,7 @@ include: .build-and-test: variables: + KERNEL_MODE: accel RTE_ENABLE_SP: "OFF" CMAKE_BUILD_PARALLEL_LEVEL: 8 VERBOSE: @@ -63,8 +64,8 @@ include: - > cmake -S . -B build -DCMAKE_BUILD_TYPE=None - -DRTE_ENABLE_SP=${RTE_ENABLE_SP} -DKERNEL_MODE=${KERNEL_MODE} + -DRTE_ENABLE_SP=${RTE_ENABLE_SP} -DBUILD_TESTING=ON - cmake --build build - ctest --test-dir build @@ -116,7 +117,6 @@ nvhpc-gpu-openacc-DP: NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.6.1-4wu5wt # Compiler flags used for ICON model: FFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda12.6 - KERNEL_MODE: accel nvhpc-gpu-openacc-SP: extends: nvhpc-gpu-openacc-DP @@ -132,7 +132,6 @@ nag-cpu-accel-DP: NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe # Compiler flags used for ICON model: FFLAGS: -Wc=/sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all - KERNEL_MODE: accel nag-cpu-default-SP: extends: nag-cpu-accel-DP @@ -148,7 +147,6 @@ cce-gpu-openacc-DP: FC: ftn # Compiler flags used for ICON model: FFLAGS: -hacc -hadd_paren -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -g - KERNEL_MODE: accel cce-gpu-openacc-SP: extends: cce-gpu-openacc-DP From 500f04a42a3236c04893d58f993758efd253eb77 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:38:07 +0100 Subject: [PATCH 30/37] ci: gitlab: request and use resources consistently --- .gitlab/levante.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index d9b76a5f7..65c87dd4c 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -12,11 +12,14 @@ include: ACCOUNT: PARTITION: TIME_LIMIT: "10:00" + NTASKS: 8 EXTRA_SCHEDULER_PARAMETERS: SCHEDULER_PARAMETERS: >- --account=${ACCOUNT} --partition=${PARTITION} --time=${TIME_LIMIT} + --ntasks=${NTASKS} + --nodes=1 ${EXTRA_SCHEDULER_PARAMETERS} .scheduler-levante-cpu: @@ -41,9 +44,7 @@ include: ACCOUNT: project_465000454 PARTITION: dev-g EXTRA_SCHEDULER_PARAMETERS: >- - --nodes=1 - --ntasks=1 - --cpus-per-task=4 + --cpus-per-task=1 --mem-per-cpu=1G --gpus=1 id_tokens: @@ -56,9 +57,9 @@ include: variables: KERNEL_MODE: accel RTE_ENABLE_SP: "OFF" - CMAKE_BUILD_PARALLEL_LEVEL: 8 + CMAKE_BUILD_PARALLEL_LEVEL: ${NTASKS} VERBOSE: - CTEST_PARALLEL_LEVEL: 8 + CTEST_PARALLEL_LEVEL: ${NTASKS} CTEST_OUTPUT_ON_FAILURE: 1 script: - > From 9e1f07039e4411ac512aa56d604cf35d963d2cf0 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:38:51 +0100 Subject: [PATCH 31/37] ci: gitlab: comment on why the variable is empty --- .gitlab/levante.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 65c87dd4c..65d3c1a47 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -58,7 +58,7 @@ include: KERNEL_MODE: accel RTE_ENABLE_SP: "OFF" CMAKE_BUILD_PARALLEL_LEVEL: ${NTASKS} - VERBOSE: + VERBOSE: # any given value is ignored, it's just checked for existence CTEST_PARALLEL_LEVEL: ${NTASKS} CTEST_OUTPUT_ON_FAILURE: 1 script: From 161a64e28ed2430227c008212151a404268802f5 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 18:45:46 +0100 Subject: [PATCH 32/37] ci: gitlab: drop seemingly redundant parameters --- .gitlab/levante.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 65d3c1a47..887556496 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -43,10 +43,7 @@ include: variables: ACCOUNT: project_465000454 PARTITION: dev-g - EXTRA_SCHEDULER_PARAMETERS: >- - --cpus-per-task=1 - --mem-per-cpu=1G - --gpus=1 + EXTRA_SCHEDULER_PARAMETERS: --gpus=1 id_tokens: CI_JOB_JWT: aud: https://gitlab.dkrz.de From 9c2e724b7bd32d5a53d4d8b23bef3f860e9a9692 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 19:26:44 +0100 Subject: [PATCH 33/37] ci: update gitlab-ci configuration --- .github/workflows/gitlab-ci.yml | 189 +++++++------------------------- 1 file changed, 39 insertions(+), 150 deletions(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index 48c01d8f4..0367b107c 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -15,9 +15,9 @@ defaults: jobs: # - # Deferred GitLab pipelines on Levante at DKRZ (see .gitlab/levante.yml): + # Deferred GitLab pipelines at gitlab.dkrz.de (see .gitlab/levante.yml): # - levante-init: + trigger: if: | github.repository_owner == 'earth-system-radiation' && ( github.event_name != 'pull_request' || @@ -29,171 +29,60 @@ jobs: ref-name: ${{ steps.g-push-rev.outputs.ref-name }} pipeline-id: ${{ steps.gl-trigger-pipeline.outputs.pipeline-id }} steps: - # - # Check out GitHub repository - # - - name: Check out GitHub repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - # - # Push to GitLab repository - # - - name: Push to GitLab repository - id: g-push-rev - uses: "skosukhin/git-ci-hub-lab/g-push-rev@v1" - with: - remote-url: https://gitlab.dkrz.de/icon-libraries/librte-rrtmgp.git - password: ${{ secrets.DKRZ_GITLAB_TOKEN }} - ref-type: tag - ref-message: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - force-push: true - # - # Trigger GitLab CI/CD Pipeline - # - - name: Trigger GitLab CI/CD Pipeline - id: gl-trigger-pipeline - uses: "skosukhin/git-ci-hub-lab/gl-trigger-pipeline@v1" - with: - server-url: https://gitlab.dkrz.de - project-name: 953 - token: ${{ secrets.DKRZ_GITLAB_TRIGGER_TOKEN }} - ref-name: ${{ steps.g-push-rev.outputs.ref-name }} - expected-sha: ${{ github.sha }} - levante: + - name: Check out GitHub repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Push to GitLab repository + id: g-push-rev + uses: "skosukhin/git-ci-hub-lab/g-push-rev@v1" + with: + remote-url: ${{ vars.DKRZ_GITLAB_SERVER }}/${{ vars.DKRZ_GITLAB_PROJECT }}.git + password: ${{ secrets.DKRZ_GITLAB_TOKEN }} + ref-type: tag + ref-message: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + force-push: true + - name: Trigger GitLab CI/CD Pipeline + id: gl-trigger-pipeline + uses: "skosukhin/git-ci-hub-lab/gl-trigger-pipeline@v1" + with: + server-url: ${{ vars.DKRZ_GITLAB_SERVER }} + project-name: ${{ vars.DKRZ_GITLAB_PROJECT }} + token: ${{ secrets.DKRZ_GITLAB_TRIGGER_TOKEN }} + ref-name: ${{ steps.g-push-rev.outputs.ref-name }} + expected-sha: ${{ github.sha }} + attach: runs-on: ubuntu-latest - needs: levante-init + needs: trigger strategy: fail-fast: false matrix: config-name: + - cce-gpu-openacc-DP + - cce-gpu-openacc-SP + - nag-cpu-accel-DP + - nag-cpu-default-SP - nvhpc-gpu-openacc-DP - nvhpc-gpu-openacc-SP - - nag-cpu-default-SP - - nag-cpu-accel-DP steps: - # - # Build, run and check (fetch the log) - # - name: Build, run and check (fetch the log) uses: "skosukhin/git-ci-hub-lab/gl-attach-job@v1" with: - server-url: https://gitlab.dkrz.de - project-name: 953 + server-url: ${{ vars.DKRZ_GITLAB_SERVER }} + project-name: ${{ vars.DKRZ_GITLAB_PROJECT }} token: ${{ secrets.DKRZ_GITLAB_TOKEN }} - pipeline-id: ${{ needs.levante-init.outputs.pipeline-id }} + pipeline-id: ${{ needs.trigger.outputs.pipeline-id }} job-name: ${{ matrix.config-name }} - levante-cleanup: + cleanup: runs-on: ubuntu-latest - needs: [levante-init, levante] - if: always() && needs.levante-init.result != 'skipped' + needs: [trigger, attach] + if: always() && needs.trigger.result != 'skipped' continue-on-error: true steps: - uses: "skosukhin/git-ci-hub-lab/g-delete-ref@v1" with: - remote-url: https://gitlab.dkrz.de/icon-libraries/librte-rrtmgp.git + remote-url: ${{ vars.DKRZ_GITLAB_SERVER }}/${{ vars.DKRZ_GITLAB_PROJECT }}.git password: ${{ secrets.DKRZ_GITLAB_TOKEN }} - ref-type: ${{ needs.levante-init.outputs.ref-type }} - ref-name: ${{ needs.levante-init.outputs.ref-name }} - force: true - # - # Deferred GitLab pipelines on Lumi at CSC (see .gitlab/lumi.yml): - # - lumi-init: - if: | - github.repository_owner == 'earth-system-radiation' && - ( github.event_name != 'pull_request' || - ( github.event.pull_request.head.repo.owner.login == github.repository_owner && - github.event.pull_request.user.login != 'dependabot[bot]' )) - runs-on: ubuntu-latest - outputs: - ref-type: ${{ steps.g-push-rev.outputs.ref-type }} - ref-name: ${{ steps.g-push-rev.outputs.ref-name }} - pipeline-id: ${{ steps.gl-create-pipeline.outputs.pipeline-id }} - steps: - # - # Check out GitHub repository - # - - name: Check out GitHub repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - # - # Push to GitLab repository - # - - name: Push to GitLab repository - id: g-push-rev - uses: "skosukhin/git-ci-hub-lab/g-push-rev@v1" - with: - remote-url: https://earth.bsc.es/gitlab/utigerstedt/rte-rrtmgp-lumi-cicd.git - password: ${{ secrets.BSC_GITLAB_TOKEN }} - rev-id: ${{ github.sha }} - rev-signing-format: ssh - rev-signing-key: ${{ secrets.GITLAB_SIGNING_KEY }} - ref-type: branch - force-push: true - # - # Create GitLab CI/CD Pipeline - # - - name: Create GitLab CI/CD Pipeline - id: gl-create-pipeline - uses: "skosukhin/git-ci-hub-lab/gl-create-pipeline@v1" - with: - server-url: https://earth.bsc.es/gitlab - project-name: 1817 - token: ${{ secrets.BSC_GITLAB_TOKEN }} - ref-name: ${{ steps.g-push-rev.outputs.ref-name }} - expected-sha: ${{ steps.g-push-rev.outputs.ref-commit }} - # - # Set up Python virtual environment (fetch the log) - # - - name: Set up Python virtual environment (fetch the log) - uses: "skosukhin/git-ci-hub-lab/gl-attach-job@v1" - with: - server-url: https://earth.bsc.es/gitlab - project-name: 1817 - token: ${{ secrets.BSC_GITLAB_TOKEN }} - pipeline-id: ${{ steps.gl-create-pipeline.outputs.pipeline-id }} - job-name: setup-python - lumi: - runs-on: ubuntu-latest - needs: lumi-init - strategy: - fail-fast: false - matrix: - config-name: - - cce-gpu-openacc-DP - - cce-gpu-openacc-SP - steps: - # - # Build, run and check (fetch the log) - # - - name: Build, run and check (fetch the log) - uses: "skosukhin/git-ci-hub-lab/gl-attach-job@v1" - with: - server-url: https://earth.bsc.es/gitlab - project-name: 1817 - token: ${{ secrets.BSC_GITLAB_TOKEN }} - pipeline-id: ${{ needs.lumi-init.outputs.pipeline-id }} - job-name: ${{ matrix.config-name }} - lumi-cleanup: - runs-on: ubuntu-latest - needs: [lumi-init, lumi] - if: always() && needs.lumi-init.result != 'skipped' - continue-on-error: true - steps: - - uses: "skosukhin/git-ci-hub-lab/gl-cancel-pipeline@v1" - with: - server-url: https://earth.bsc.es/gitlab - project-name: 1817 - token: ${{ secrets.BSC_GITLAB_TOKEN }} - pipeline-id: ${{ needs.lumi-init.outputs.pipeline-id }} - force: true - - uses: "skosukhin/git-ci-hub-lab/gl-delete-ref@v1" - with: - server-url: https://earth.bsc.es/gitlab - project-name: 1817 - token: ${{ secrets.BSC_GITLAB_TOKEN }} - ref-type: ${{ needs.lumi-init.outputs.ref-type }} - ref-name: ${{ needs.lumi-init.outputs.ref-name }} + ref-type: ${{ needs.trigger.outputs.ref-type }} + ref-name: ${{ needs.trigger.outputs.ref-name }} force: true From 81f3b4dc912f2f724ec3b54ce3f0d78a05f262b1 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 19:36:04 +0100 Subject: [PATCH 34/37] ci: gitlab: nagfor needs gcc --- .gitlab/levante.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 887556496..8c0c8a427 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -127,9 +127,10 @@ nag-cpu-accel-DP: - .build-and-test-levante variables: FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor + CC: /sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc NetCDF_Fortran_ROOT: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe # Compiler flags used for ICON model: - FFLAGS: -Wc=/sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all + FFLAGS: -Wc=${CC} -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all nag-cpu-default-SP: extends: nag-cpu-accel-DP From f9694829f2afd2dc93463036a33a2ea26ac7293e Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Wed, 8 Jan 2025 20:22:50 +0100 Subject: [PATCH 35/37] ci: gitlab: better name for the configuration file --- .github/workflows/gitlab-ci.yml | 2 +- .gitlab/{levante.yml => dkrz.yml} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename .gitlab/{levante.yml => dkrz.yml} (100%) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index 0367b107c..d8f1821c1 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -15,7 +15,7 @@ defaults: jobs: # - # Deferred GitLab pipelines at gitlab.dkrz.de (see .gitlab/levante.yml): + # Deferred GitLab pipelines at gitlab.dkrz.de (see .gitlab/dkrz.yml): # trigger: if: | diff --git a/.gitlab/levante.yml b/.gitlab/dkrz.yml similarity index 100% rename from .gitlab/levante.yml rename to .gitlab/dkrz.yml From ee72e2052c109a613d3bc35da1160c0a08cfb77a Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 14 Jan 2025 08:55:18 +0100 Subject: [PATCH 36/37] Temporarily revert "ci: gitlab: better name for the configuration file" --- .github/workflows/gitlab-ci.yml | 2 +- .gitlab/{dkrz.yml => levante.yml} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename .gitlab/{dkrz.yml => levante.yml} (100%) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index d8f1821c1..0367b107c 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -15,7 +15,7 @@ defaults: jobs: # - # Deferred GitLab pipelines at gitlab.dkrz.de (see .gitlab/dkrz.yml): + # Deferred GitLab pipelines at gitlab.dkrz.de (see .gitlab/levante.yml): # trigger: if: | diff --git a/.gitlab/dkrz.yml b/.gitlab/levante.yml similarity index 100% rename from .gitlab/dkrz.yml rename to .gitlab/levante.yml From 9b84fe505b282f35cbf912476f1386ecbf7c2814 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 14 Jan 2025 08:59:15 +0100 Subject: [PATCH 37/37] ci: gitlab: sign the commit --- .github/workflows/gitlab-ci.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index 0367b107c..1b65aa9de 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -39,8 +39,10 @@ jobs: with: remote-url: ${{ vars.DKRZ_GITLAB_SERVER }}/${{ vars.DKRZ_GITLAB_PROJECT }}.git password: ${{ secrets.DKRZ_GITLAB_TOKEN }} - ref-type: tag - ref-message: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + rev-id: ${{ github.sha }} + rev-signing-format: ssh + rev-signing-key: ${{ secrets.GITLAB_SIGNING_KEY }} + ref-type: branch force-push: true - name: Trigger GitLab CI/CD Pipeline id: gl-trigger-pipeline @@ -50,7 +52,7 @@ jobs: project-name: ${{ vars.DKRZ_GITLAB_PROJECT }} token: ${{ secrets.DKRZ_GITLAB_TRIGGER_TOKEN }} ref-name: ${{ steps.g-push-rev.outputs.ref-name }} - expected-sha: ${{ github.sha }} + expected-sha: ${{ steps.g-push-rev.outputs.ref-commit }} attach: runs-on: ubuntu-latest needs: trigger