From b84349cccce4b9409f597cd2366ab0421407a137 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 12 Apr 2022 17:51:19 +0200 Subject: [PATCH 01/10] Overhaul CI structure and add some HoreKa tests --- .gitlab-ci.yml | 560 +++++++++++++----------------------------- .gitlab/image.yml | 19 +- .gitlab/scripts.yml | 204 +++++++++++++++ .gitlab/variables.yml | 25 ++ 4 files changed, 404 insertions(+), 404 deletions(-) create mode 100644 .gitlab/scripts.yml create mode 100644 .gitlab/variables.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 320bf827eb2..3fcc72034c4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,173 +3,26 @@ stages: - sync - trigger_pipeline - build + - test - code_quality - deploy - QoS_tools - - benchmark-build + - on-failure + - finalize-status - benchmark-cuda - benchmark-omp - benchmark-reference - - on-failure - - finalize-status include: - local: '.gitlab/condition.yml' - local: '.gitlab/image.yml' - -# Templates with reasonable defaults for builds and tests -.variables_template: &default_variables - BENCHMARK_SERVER: "FINECI" - C_COMPILER: "gcc" - CXX_COMPILER: "g++" - CUDA_COMPILER: "nvcc" - BUILD_TYPE: "Debug" - BUILD_SHARED_LIBS: "ON" - BUILD_REFERENCE: "ON" - BUILD_OMP: "OFF" - BUILD_CUDA: "OFF" - BUILD_HIP: "OFF" - BUILD_HWLOC: "ON" - BUILD_MPI: "OFF" - MPI_AS_ROOT: "OFF" - FAST_TESTS: "OFF" - DPCPP_SINGLE_MODE: "OFF" - MIXED_PRECISION: "ON" - RUN_EXAMPLES: "OFF" - CONFIG_LOG: "ON" - CXX_FLAGS: "" - EXTRA_CMAKE_FLAGS: "" - EXPORT_BUILD_DIR: "OFF" - CI_PROJECT_DIR_SUFFIX: "" - -.before_script_template: &default_before_script - - export NUM_CORES=${CI_PARALLELISM} - - export OMP_NUM_THREADS=${NUM_CORES} - - export CCACHE_DIR=${CCACHE_DIR} - - export CCACHE_MAXSIZE=${CCACHE_MAXSIZE} - -.before_script_git_template: &git_before_script - - eval $(ssh-agent -s) - - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null - - mkdir -p ~/.ssh - - chmod 700 ~/.ssh - - ssh-keyscan -t rsa gitlab.com github.com >>~/.ssh/known_hosts - - git config --global user.name "${BOT_USER}" - - git config --global user.email "${BOT_EMAIL}" - -.build_template: &default_build - stage: build - variables: *default_variables - before_script: *default_before_script - script: - - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME} - - if [ -n "${CUDA_ARCH}" ]; then - CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH}; - CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER}); - fi - - if [[ "${MPI_AS_ROOT}" == "ON" ]];then - export OMPI_ALLOW_RUN_AS_ROOT=1; - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; - fi - - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} - -GNinja - -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} - -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} - -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} - ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR} - -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} - -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} - -DGINKGO_BUILD_HIP=${BUILD_HIP} - -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} - -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} - -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} - -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON - -DGINKGO_FAST_TESTS=${FAST_TESTS} - -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION} - -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES} - -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG} - -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE} - -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR} - - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install - - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi - dependencies: [] - except: - - schedules - -.build_template: &default_build_with_test - stage: build - variables: *default_variables - before_script: *default_before_script - script: - - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME} - - if [ -n "${CUDA_ARCH}" ]; then - CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH}; - CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER}); - fi - - if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi - - if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi - - if [[ "${MPI_AS_ROOT}" == "ON" ]];then - export OMPI_ALLOW_RUN_AS_ROOT=1; - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; - fi - - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} - -GNinja - -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} - -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} - -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} - ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR} - -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} - -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} - -DGINKGO_BUILD_HIP=${BUILD_HIP} - -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} - -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} - -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} - -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON - -DGINKGO_FAST_TESTS=${FAST_TESTS} - -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION} - -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG} - -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE} - -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES} - -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR} - - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install - - | - (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1 - - ctest -V --timeout 3000 - - ninja test_install - - pushd test/test_install - - ninja install - - popd - - | - if [ "${RUN_EXAMPLES}" == "ON" ]; then - export EX_ARG="reference" - ninja run_all_examples - ninja validate_all_examples - if [ "{BUILD_OMP}" == "ON" ]; then - export EX_ARG="omp" - ninja run_all_examples - ninja validate_all_examples - fi - if [ "{BUILD_CUDA}" == "ON" ]; then - export EX_ARG="cuda" - ninja run_all_examples - ninja validate_all_examples - fi - if [ "{BUILD_HIP}" == "ON" ]; then - export EX_ARG="hip" - ninja run_all_examples - ninja validate_all_examples - fi - fi - - if [ -n "${SYCL_DEVICE_TYPE}" ]; then unset SYCL_DEVICE_TYPE; fi - - if [ -n "${SYCL_DEVICE_FILTER}" ]; then unset SYCL_DEVICE_FILTER; fi - - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi - dependencies: [] - except: - - schedules + - local: '.gitlab/scripts.yml' + - local: '.gitlab/variables.yml' status_pending: stage: init-status extends: + - .default_variables - .pr_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: @@ -182,6 +35,7 @@ status_pending: status_success: stage: finalize-status extends: + - .default_variables - .pr_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: @@ -198,6 +52,7 @@ status_success: status_failure: stage: finalize-status extends: + - .default_variables - .pr_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: @@ -216,12 +71,13 @@ status_failure: sync: stage: sync extends: + - .default_variables + - .before_script_git_template - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: GIT_STRATEGY: none PRIVATE_REPO: git@gitlab.com:ginkgo-project/ginkgo.git PUBLIC_REPO: git@github.com:ginkgo-project/ginkgo.git - before_script: *git_before_script script: - git clone ${PRIVATE_REPO} -b ${CI_COMMIT_REF_NAME} repo_sync - cd repo_sync @@ -237,6 +93,7 @@ sync: trigger_pipeline: stage: trigger_pipeline extends: + - .default_variables - .pr_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: @@ -284,12 +141,12 @@ trigger_pipeline: # Job with example runs. # cuda 9.2 and friends build/cuda92/nompi/gcc/all/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-cuda92-mvapich2-gnu7-llvm50-intel2017 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" @@ -301,12 +158,12 @@ build/cuda92/nompi/gcc/all/release/shared: # Make sure that our jobs run when using self-installed # third-party HWLOC. build/cuda100/mvapich2/gcc/all/debug/shared: - <<: *default_build extends: + - .build_template + - .default_variables - .quick_test_condition - .use_gko-cuda100-mvapich2-gnu7-llvm60-intel2018 variables: - <<: *default_variables BUILD_MPI: "ON" BUILD_OMP: "ON" BUILD_CUDA: "ON" @@ -318,12 +175,12 @@ build/cuda100/mvapich2/gcc/all/debug/shared: # Make sure that our jobs run when HWLOC is # forcibly switched off build/cuda100/nompi/clang/all/release/static: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda100-mvapich2-gnu7-llvm60-intel2018 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -335,12 +192,12 @@ build/cuda100/nompi/clang/all/release/static: CUDA_ARCH: 35 build/cuda100/nompi/intel/cuda/release/shared: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda100-mvapich2-gnu7-llvm60-intel2018 variables: - <<: *default_variables C_COMPILER: "icc" CXX_COMPILER: "icpc" BUILD_OMP: "ON" @@ -350,12 +207,12 @@ build/cuda100/nompi/intel/cuda/release/shared: # Build CUDA NVIDIA without omp build/cuda100/nompi/intel/cuda_wo_omp/release/shared: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda100-mvapich2-gnu7-llvm60-intel2018 variables: - <<: *default_variables C_COMPILER: "icc" CXX_COMPILER: "icpc" BUILD_CUDA: "ON" @@ -366,12 +223,12 @@ build/cuda100/nompi/intel/cuda_wo_omp/release/shared: # cuda 10.1 and friends build/cuda101/nompi/gcc/all/debug/shared: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" @@ -380,12 +237,12 @@ build/cuda101/nompi/gcc/all/debug/shared: CUDA_ARCH: 35 build/cuda101/nompi/clang/all/release/static: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -397,12 +254,12 @@ build/cuda101/nompi/clang/all/release/static: # clang-cuda with cuda 10.1 and friends build/clang-cuda101/openmpi/gcc/cuda/release/shared: - <<: *default_build extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm11-intel2019 variables: - <<: *default_variables CUDA_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" @@ -410,15 +267,16 @@ build/clang-cuda101/openmpi/gcc/cuda/release/shared: MPI_AS_ROOT: "ON" BUILD_HIP: "OFF" BUILD_TYPE: "Release" - CUDA_ARCH: 35 + CUDA_ARCH: 61 + build/clang-cuda101/nompi/clang/cuda/debug/static: - <<: *default_build extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm11-intel2019 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" CUDA_COMPILER: "clang++" @@ -427,18 +285,19 @@ build/clang-cuda101/nompi/clang/cuda/debug/static: BUILD_TYPE: "Debug" FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 35 + CUDA_ARCH: 61 + # cuda 10.2 and friends # works when there is no hwloc and tpl hwloc is also switched off. build/cuda102/nompi/gcc/all/debug/shared: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda102-nompi-gnu8-llvm8-intel2019 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" @@ -449,12 +308,12 @@ build/cuda102/nompi/gcc/all/debug/shared: # Use TPL hwloc when no system hwloc is available build/cuda102/nompi/clang/all/release/static: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda102-nompi-gnu8-llvm8-intel2019 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -465,12 +324,12 @@ build/cuda102/nompi/clang/all/release/static: CUDA_ARCH: 35 build/cuda102/nompi/intel/cuda/debug/static: - <<: *default_build extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda102-nompi-gnu8-llvm8-intel2019 variables: - <<: *default_variables C_COMPILER: "icc" CXX_COMPILER: "icpc" BUILD_OMP: "ON" @@ -480,43 +339,77 @@ build/cuda102/nompi/intel/cuda/debug/static: BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 -# cuda 11.0 and friends +# cuda 11.0 and friends on HoreKa with tests build/cuda110/mvapich2/gcc/cuda/debug/shared: - <<: *default_build_with_test extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_MPI: "ON" BUILD_TYPE: "Debug" FAST_TESTS: "ON" - CUDA_ARCH: 61 + CUDA_ARCH: 80 + USE_NAME: "cuda110-mvapich2-gcc-${CI_PIPELINE_ID}" + KEEP_CONTAINER: "ON" + USE_SLURM: 0 + +test/cuda110/mvapich2/gcc/cuda/debug/shared: + extends: + - .horeka_test_template + - .default_variables + - .full_test_condition + - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 + variables: + USE_NAME: "cuda110-mvapich2-gcc-${CI_PIPELINE_ID}" + SLURM_PARTITION: "accelerated" + SLURM_GRES: "gpu:1" + SLURM_TIME: "00:45:00" + dependencies: [ "build/cuda110/mvapich2/gcc/cuda/debug/shared" ] + build/cuda110/nompi/clang/cuda/release/static: - <<: *default_build_with_test extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 61 + CUDA_ARCH: 80 + USE_NAME: "cuda110-nompi-clang-${CI_PIPELINE_ID}" + KEEP_CONTAINER: "ON" + USE_SLURM: 0 + +test/cuda110/nompi/clang/cuda/release/static: + extends: + - .horeka_test_template + - .default_variables + - .full_test_condition + - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 + variables: + USE_NAME: "cuda110-nompi-clang-${CI_PIPELINE_ID}" + SLURM_PARTITION: "accelerated" + SLURM_GRES: "gpu:1" + SLURM_TIME: "00:45:00" + dependencies: [ "build/cuda110/nompi/clang/cuda/release/static" ] + build/cuda110/nompi/intel/cuda/debug/static: - <<: *default_build_with_test extends: + - .build_template + - .default_variables - .quick_test_condition - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 variables: - <<: *default_variables C_COMPILER: "icc" CXX_COMPILER: "icpc" BUILD_OMP: "ON" @@ -524,16 +417,33 @@ build/cuda110/nompi/intel/cuda/debug/static: BUILD_TYPE: "Debug" FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 61 + CUDA_ARCH: 80 + USE_NAME: "cuda110-nompi-intel-${CI_PIPELINE_ID}" + KEEP_CONTAINER: "ON" + USE_SLURM: 0 + +test/cuda110/nompi/intel/cuda/debug/static: + extends: + - .horeka_test_template + - .default_variables + - .quick_test_condition + - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 + variables: + USE_NAME: "cuda110-nompi-intel-${CI_PIPELINE_ID}" + SLURM_PARTITION: "accelerated" + SLURM_GRES: "gpu:1" + SLURM_TIME: "00:45:00" + dependencies: [ "build/cuda110/nompi/intel/cuda/debug/static" ] + # cuda 11.4 and friends build/cuda114/nompi/gcc/cuda/debug/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko_cuda114-openmpi-gnu11-llvm12 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_TYPE: "Debug" @@ -546,12 +456,12 @@ build/cuda114/nompi/gcc/cuda/debug/shared: # ROCm 4.0 and friends build/amd/nompi/gcc/rocm40/debug/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-rocm40-openmpi-gnu5-llvm50 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_HIP: "ON" RUN_EXAMPLES: "ON" @@ -559,12 +469,12 @@ build/amd/nompi/gcc/rocm40/debug/shared: FAST_TESTS: "ON" build/amd/openmpi/clang/rocm40/release/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-rocm40-openmpi-gnu5-llvm50 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -576,24 +486,24 @@ build/amd/openmpi/clang/rocm40/release/static: # ROCm 4.5 and friends build/amd/mvapich2/gcc/rocm45/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-rocm45-mvapich2-gnu8-llvm8 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_HIP: "ON" RUN_EXAMPLES: "ON" BUILD_TYPE: "Release" build/amd/mvapich2/clang/rocm45/debug/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-rocm45-mvapich2-gnu8-llvm8 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -603,12 +513,12 @@ build/amd/mvapich2/clang/rocm45/debug/shared: # ROCm 5.0.2 and friends build/amd/openmpi/gcc/rocm502/debug/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-rocm502-openmpi-gnu11-llvm11 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_HIP: "ON" RUN_EXAMPLES: "ON" @@ -616,12 +526,12 @@ build/amd/openmpi/gcc/rocm502/debug/static: BUILD_SHARED_LIBS: "OFF" build/amd/openmpi/clang/rocm502/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-rocm502-openmpi-gnu11-llvm11 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -631,12 +541,12 @@ build/amd/openmpi/clang/rocm502/release/shared: # without omp build/amd/nompi/gcc/rocm502_wo_omp/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-rocm502-openmpi-gnu11-llvm11 variables: - <<: *default_variables BUILD_OMP: "OFF" BUILD_MPI: "OFF" BUILD_HIP: "ON" @@ -645,45 +555,45 @@ build/amd/nompi/gcc/rocm502_wo_omp/release/shared: # no cuda but latest gcc and clang build/nocuda/nompi/gcc/core/debug/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables BUILD_TYPE: "Debug" FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" BUILD_HWLOC: "OFF" build/nocuda/nompi/clang/core/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_TYPE: "Release" build/nocuda/nompi/gcc/omp/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_TYPE: "Release" build/nocuda/openmpi/clang/omp/debug/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -695,12 +605,12 @@ build/nocuda/openmpi/clang/omp/debug/static: # nocuda with the oldest supported compiler build/nocuda/nompi/intel/core/debug/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018 variables: - <<: *default_variables # intel with old gcc without include path leads error: identifier "____m128d" is undefined CXX_FLAGS: "-I /opt/intel/include/icc" C_COMPILER: "icc" @@ -709,23 +619,23 @@ build/nocuda/nompi/intel/core/debug/shared: FAST_TESTS: "ON" build/nocuda/nompi/gcc/omp/release/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" build/nocuda-nomixed/nompi/clang/omp/release/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -734,12 +644,12 @@ build/nocuda-nomixed/nompi/clang/omp/release/static: MIXED_PRECISION: "OFF" build/nocuda-nomixed/openmpi/gcc/omp/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables BUILD_MPI: "ON" MPI_AS_ROOT: "ON" BUILD_OMP: "ON" @@ -747,12 +657,12 @@ build/nocuda-nomixed/openmpi/gcc/omp/release/shared: MIXED_PRECISION: "OFF" build/nocuda-nomixed/nompi/clang/omp/debug/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" @@ -761,12 +671,12 @@ build/nocuda-nomixed/nompi/clang/omp/debug/static: MIXED_PRECISION: "OFF" build/dpcpp/cpu/release/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-oneapi-cpu variables: - <<: *default_variables C_COMPILER: "gcc" CXX_COMPILER: "dpcpp" BUILD_DPCPP: "ON" @@ -776,12 +686,12 @@ build/dpcpp/cpu/release/static: # It gives two available backends of GPU on tests build/dpcpp/igpu/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-oneapi-igpu variables: - <<: *default_variables C_COMPILER: "gcc" CXX_COMPILER: "dpcpp" BUILD_DPCPP: "ON" @@ -792,12 +702,12 @@ build/dpcpp/igpu/release/shared: # TODO: Enable when debug shared library size issues are fixed # build/dpcpp/level_zero_igpu/debug/shared: -# <<: *default_build_with_test # extends: +# - .build_and_test_template +# - .default_variables # - .full_test_condition # - .use_gko-oneapi-igpu # variables: -# <<: *default_variables # C_COMPILER: "gcc" # CXX_COMPILER: "dpcpp" # BUILD_DPCPP: "ON" @@ -808,12 +718,12 @@ build/dpcpp/igpu/release/shared: # It gives two available backends of GPU on tests build/dpcpp/dgpu/release/static: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .full_test_condition - .use_gko-oneapi-igpu variables: - <<: *default_variables C_COMPILER: "gcc" CXX_COMPILER: "dpcpp" BUILD_DPCPP: "ON" @@ -823,12 +733,12 @@ build/dpcpp/dgpu/release/static: SYCL_DEVICE_TYPE: "GPU" build/dpcpp/level_zero_dgpu/release/shared: - <<: *default_build_with_test extends: + - .build_and_test_template + - .default_variables - .quick_test_condition - .use_gko-oneapi-dgpu variables: - <<: *default_variables C_COMPILER: "gcc" CXX_COMPILER: "dpcpp" BUILD_DPCPP: "ON" @@ -838,13 +748,13 @@ build/dpcpp/level_zero_dgpu/release/shared: # Job with important warnings as error warnings: - <<: *default_build stage: code_quality extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" @@ -853,13 +763,13 @@ warnings: # Ensure kernel modules do not depend on core no-circular-deps: - <<: *default_build stage: code_quality extends: + - .build_template + - .default_variables - .quick_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" @@ -868,39 +778,39 @@ no-circular-deps: # Ensure Ginkgo builds from a subdirectory subdir-build: - <<: *default_build stage: code_quality extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables BUILD_OMP: "ON" CI_PROJECT_PATH_SUFFIX: "/test_subdir" allow_failure: no # Ensure Ginkgo can be used when exporting the build directory export-build: - <<: *default_build stage: code_quality extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables BUILD_OMP: "ON" EXPORT_BUILD_DIR: "ON" allow_failure: no # Run clang-tidy and iwyu clang-tidy: - <<: *default_build stage: code_quality extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" @@ -908,13 +818,13 @@ clang-tidy: allow_failure: yes iwyu: - <<: *default_build stage: code_quality extends: + - .build_template + - .default_variables - .full_test_condition - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 variables: - <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_CUDA: "HIP" @@ -926,9 +836,10 @@ iwyu: sonarqube_cov_: stage: code_quality extends: + - .default_variables - .quick_test_condition + - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 - before_script: *default_before_script script: - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}" | jq '.items[0].number') @@ -961,9 +872,10 @@ sonarqube_cov_: sonarqube_cov: stage: code_quality extends: + - .default_variables - .deploy_condition + - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 - before_script: *default_before_script script: - ctest -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=COVERAGE -DGINKGO_SONARQUBE_TEST=ON @@ -978,12 +890,12 @@ sonarqube_cov: gh-pages: stage: deploy extends: + - .default_variables - .deploy_condition + - .before_script_git_template - .use_gko-nocuda-openmpi-gnu9-llvm8 variables: - <<: *default_variables PUBLIC_REPO: git@github.com:ginkgo-project/ginkgo.git - before_script: *git_before_script script: # build docs - mkdir -p ${CI_JOB_NAME} && pushd ${CI_JOB_NAME} @@ -1014,9 +926,10 @@ gh-pages: threadsanitizer: stage: QoS_tools extends: + - .default_variables - .deploy_condition + - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm11-intel2019 - before_script: *default_before_script script: - LD_PRELOAD=/usr/local/lib/libomp.so CC=clang CXX=clang++ @@ -1028,9 +941,10 @@ threadsanitizer: leaksanitizer: stage: QoS_tools extends: + - .default_variables - .deploy_condition + - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm11-intel2019 - before_script: *default_before_script script: - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=LSAN -DCTEST_MEMORYCHECK_TYPE=LeakSanitizer @@ -1038,9 +952,10 @@ leaksanitizer: addresssanitizer: stage: QoS_tools extends: + - .default_variables - .deploy_condition + - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm11-intel2019 - before_script: *default_before_script script: - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=ASAN -DCTEST_MEMORYCHECK_TYPE=AddressSanitizer @@ -1048,9 +963,10 @@ addresssanitizer: undefinedsanitizer: stage: QoS_tools extends: + - .default_variables - .deploy_condition + - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm11-intel2019 - before_script: *default_before_script script: # the Gold linker is required because of a linker flag issues given by UBsan # in the Ubuntu setup we are using. @@ -1060,151 +976,21 @@ undefinedsanitizer: cudamemcheck: stage: QoS_tools extends: + - .before_script_template + - .default_variables - .deploy_condition image: ginkgohub/cuda:101-openmpi-gnu8-llvm11-intel2019 tags: - private_ci - nvidia-gpu - before_script: *default_before_script script: - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=RelWithDebInfo -DCTEST_MEMORYCHECK_TYPE=CudaMemcheck -# Benchmark build -.benchmark_before_script_template: &default_benchmark_before_script - # set up identities - - eval $(ssh-agent -s) - - tmp_key=${BENCHMARK_SERVER}_SSH_KEY - - echo "${!tmp_key}" | tr -d '\r' | ssh-add - >/dev/null - - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null - - mkdir -p ~/.ssh - - chmod 700 ~/.ssh - - tmp_name=${BENCHMARK_SERVER}_SSH_NAME - - ssh-keyscan -t rsa "${!tmp_name}" >>~/.ssh/known_hosts - - ssh-keyscan -t rsa gitlab.com github.com >>~/.ssh/known_hosts - - chmod 644 ~/.ssh/known_hosts - - tmp_user=${BENCHMARK_SERVER}_SSH_USER - - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config - - SSH_COMMAND="ssh ${!tmp_user}@${!tmp_name}" - - git config --global user.name "${BOT_USER}" - - git config --global user.email "${BOT_EMAIL}" - -fineci-benchmark-build: - stage: benchmark-build - extends: - - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018 - variables: - <<: *default_variables - BENCHMARK_SERVER: FINECI - BUILD_TYPE: "Release" - BUILD_OMP: "ON" - BUILD_CUDA: "ON" - PUBLIC_REPO: https://github.com/ginkgo-project/ginkgo.git - BENCHMARK_REPO: https://github.com/ginkgo-project/ginkgo-data.git - before_script: *default_benchmark_before_script - script: - - | - ${SSH_COMMAND} 'tee /dev/stderr | scl enable devtoolset-7 bash' << EOT - set -xe - rm -rf ginkgo - git clone ${PUBLIC_REPO} ginkgo - cd ginkgo - git checkout ${CI_COMMIT_SHA} - mkdir build - cd build - cmake3 .. -DCMAKE_C_COMPILER=${C_COMPILER} \\ - -DCMAKE_CXX_COMPILER=${CXX_COMPILER} \\ - -DCMAKE_CUDA_COMPILER=/usr/local/cuda-9.2/bin/nvcc \\ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \\ - -DBUILD_SHARED_LIBS=ON ${EXTRA_CMAKE_FLAGS} \\ - -DGINKGO_DEVEL_TOOLS=OFF \\ - -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} \\ - -DGINKGO_BUILD_OMP=${BUILD_OMP} \\ - -DGINKGO_BUILD_CUDA=${BUILD_CUDA} \\ - -DGINKGO_BUILD_HIP=${BUILD_HIP} \\ - -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF \\ - -DGINKGO_BUILD_BENCHMARKS=ON - make -j${CI_PARALLELISM} - EOT - dependencies: [] - only: - - schedules -# - develop -# - master - - -# Benchmark runs -.benchmark_template: &default_benchmark - before_script: *default_benchmark_before_script - script: - # run benchmarks - - | - ${SSH_COMMAND} 'tee /dev/stderr | scl enable devtoolset-7 bash' \ - >results.json << EOT - module load cuda/cuda-10.0 - set -xe - cd ginkgo/build/benchmark - make benchmark SYSTEM_NAME=${SYSTEM_NAME} EXECUTOR=${EXECUTOR} - tar -czf data.tar.gz results - EOT - # publish them - - export CURRENT_SHA="$(git rev-parse --short HEAD)" - - git clone ${BENCHMARK_REPO} data-repo - - cd data-repo/data/ - - scp ${!tmp_user}@${!tmp_name}:ginkgo/build/benchmark/data.tar.gz . - - tar -xzf data.tar.gz --strip-components=1 - - rm data.tar.gz - - ./build-list . >list.json - - ./agregate agregate.json - - git add -A - - git diff --quiet HEAD || - (git commit -m - "Benchmark ${EXECUTOR} of ginkgo-project/ginkgo@${CURRENT_SHA}" - && git push) - dependencies: [] - only: - - schedules -# - develop -# - master - -fineci-benchmark-cuda: - stage: benchmark-cuda - extends: - - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018 - variables: - <<: *default_variables - BENCHMARK_SERVER: FINECI - EXECUTOR: cuda - PUBLIC_REPO: https://github.com/ginkgo-project/ginkgo.git - BENCHMARK_REPO: git@github.com:ginkgo-project/ginkgo-data.git - SYSTEM_NAME: K20Xm - <<: *default_benchmark - -# fineci-benchmark-omp: -# stage: benchmark-omp -# variables: -# <<: *default_variables -# BENCHMARK_SERVER: FINECI -# EXECUTOR: omp -# PUBLIC_REPO: https://github.com/ginkgo-project/ginkgo.git -# BENCHMARK_REPO: git@github.com:ginkgo-project/ginkgo-data.git -# SYSTEM_NAME: E5-2670 -# <<: *default_benchmark - -# fineci-benchmark-reference: -# stage: benchmark-reference -# variables: -# <<: *default_variables -# BENCHMARK_SERVER: FINECI -# EXECUTOR: reference -# PUBLIC_REPO: https://github.com/ginkgo-project/ginkgo.git -# BENCHMARK_REPO: git@github.com:ginkgo-project/ginkgo-data.git -# SYSTEM_NAME: E5-2670 -# <<: *default_benchmark - new-issue-on-failure: stage: on-failure extends: + - .default_variables - .use_gko-nocuda-openmpi-gnu9-llvm8 script: curl --request POST "https://gitlab.com/api/v4/projects/${PROJECT_ID}/issues?private_token=${BOT_ACCESS_TOKEN}&title=Error%20in%20${CI_PROJECT_NAME}%20with%20pipeline%20${CI_PIPELINE_ID}%20for%20commit%20${CI_COMMIT_SHA}&labels&description=${CI_PIPELINE_URL}" when: on_failure diff --git a/.gitlab/image.yml b/.gitlab/image.yml index a8a60d062ad..770b4ac2001 100644 --- a/.gitlab/image.yml +++ b/.gitlab/image.yml @@ -12,20 +12,6 @@ - cpu - controller -.use_gko-cuda90-openmpi-gnu5-llvm39: - image: ginkgohub/cuda:openmpi-90-gnu5-llvm39 - tags: - - private_ci - - controller - - cpu - -.use_gko-cuda91-mvapich2-gnu6-llvm40: - image: ginkgohub/cuda:91-mvapich2-gnu6-llvm40 - tags: - - private_ci - - controller - - cpu - .use_gko-cuda92-mvapich2-gnu7-llvm50-intel2017: image: ginkgohub/cuda:92-mvapich2-gnu7-llvm50-intel2017 tags: @@ -50,8 +36,7 @@ image: ginkgohub/cuda:101-openmpi-gnu8-llvm11-intel2019 tags: - private_ci - - controller - - cpu + - nvidia-gpu .use_gko-cuda102-nompi-gnu8-llvm8-intel2019: image: ginkgohub/cuda:102-nompi-gnu8-llvm8-intel2019 @@ -64,7 +49,7 @@ image: ginkgohub/cuda:110-mvapich2-gnu9-llvm9-intel2020 tags: - private_ci - - nvidia-gpu + - horeka .use_gko_cuda114-openmpi-gnu11-llvm12: image: ginkgohub/cuda:114-openmpi-gnu11-llvm12 diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml new file mode 100644 index 00000000000..bd54a011bae --- /dev/null +++ b/.gitlab/scripts.yml @@ -0,0 +1,204 @@ +.before_script_template: &default_before_script + before_script: + - export NUM_CORES=${CI_PARALLELISM} + - export OMP_NUM_THREADS=${NUM_CORES} + - export CCACHE_DIR=${CCACHE_DIR} + - export CCACHE_MAXSIZE=${CCACHE_MAXSIZE} + +.before_script_git_template: &git_before_script + before_script: + - eval $(ssh-agent -s) + - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null + - mkdir -p ~/.ssh + - chmod 700 ~/.ssh + - ssh-keyscan -t rsa gitlab.com github.com >>~/.ssh/known_hosts + - git config --global user.name "${BOT_USER}" + - git config --global user.email "${BOT_EMAIL}" + +.build_template: &default_build + stage: build + extends: + - .before_script_template + script: + - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME} + - if [ -n "${CUDA_ARCH}" ]; then + CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH}; + CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER}); + fi + - if [[ "${MPI_AS_ROOT}" == "ON" ]];then + export OMPI_ALLOW_RUN_AS_ROOT=1; + export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; + fi + - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} + -GNinja + -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} + -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} + ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR} + -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} + -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} + -DGINKGO_BUILD_HIP=${BUILD_HIP} + -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} + -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} + -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} + -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON + -DGINKGO_FAST_TESTS=${FAST_TESTS} + -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION} + -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES} + -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG} + -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE} + -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR} + - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install + - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi + dependencies: [] + except: + - schedules + +.build_and_test_template: &default_build_with_test + stage: build + extends: + - .before_script_template + script: + - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME} + - if [ -n "${CUDA_ARCH}" ]; then + CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH}; + CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER}); + fi + - if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi + - if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi + - if [[ "${MPI_AS_ROOT}" == "ON" ]];then + export OMPI_ALLOW_RUN_AS_ROOT=1; + export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; + fi + - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} + -GNinja + -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} + -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} + ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR} + -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} + -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} + -DGINKGO_BUILD_HIP=${BUILD_HIP} + -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} + -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} + -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} + -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON + -DGINKGO_FAST_TESTS=${FAST_TESTS} + -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION} + -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG} + -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE} + -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES} + -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR} + - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install + - | + (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1 + - ctest -V --timeout 3000 + - ninja test_install + - pushd test/test_install + - ninja install + - popd + - | + if [ "${RUN_EXAMPLES}" == "ON" ]; then + export EX_ARG="reference" + ninja run_all_examples + ninja validate_all_examples + if [ "{BUILD_OMP}" == "ON" ]; then + export EX_ARG="omp" + ninja run_all_examples + ninja validate_all_examples + fi + if [ "{BUILD_CUDA}" == "ON" ]; then + export EX_ARG="cuda" + ninja run_all_examples + ninja validate_all_examples + fi + if [ "{BUILD_HIP}" == "ON" ]; then + export EX_ARG="hip" + ninja run_all_examples + ninja validate_all_examples + fi + fi + - if [ -n "${SYCL_DEVICE_TYPE}" ]; then unset SYCL_DEVICE_TYPE; fi + - if [ -n "${SYCL_DEVICE_FILTER}" ]; then unset SYCL_DEVICE_FILTER; fi + - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi + dependencies: [] + except: + - schedules + + +.horeka_test_template: &horeka_test + stage: test + before_script: + # Sanity checks + - if [ -z "${USE_NAME}" ]; then exit 111; fi + - if [ -z "${SLURM_PARTITION}" ]; then exit 222; fi + - if [[ ! "${SLURM_GRES}" =~ "^gpu*" ]]; then export NVIDIA_VISIBLE_DEVICES=void; fi + variables: + GIT_CLEAN_FLAGS: none + script: + # We should be reusing the previous image and build directory. + # Speedup the tests by limiting the number of OMP threads to CI parallelism. + - export OMP_NUM_THREADS=${CI_PARALLELISM} + # The test job should have the name name as the image job, only changing + # build to test + - cd ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} + - cd ${CI_JOB_NAME/test/build} + - ninja test + - ninja test_install + cache: [] + except: + - schedules + + +.horeka_benchmark_before_script_template: &horeka_benchmark_before_script + # Sanity checks + - if [ -z "${USE_NAME}" ]; then exit 111; fi + - if [ -z "${SLURM_PARTITION}" ]; then exit 222; fi + - if [[ ! "${SLURM_GRES}" =~ "^gpu*" ]]; then export NVIDIA_VISIBLE_DEVICES=void; fi + - if [[ -z "${EXECUTOR}" ]]; then exit 233; fi + - if [[ -z "${SYSTEM_NAME}" ]]; then exit 233; fi + - if [[ -z "${BENCHMARK}" ]]; then exit 233; fi + - if [[ -z "${COMMIT_SHA}" ]]; then export COMMIT_SHA=$(git ls-remote ${PUBLIC_REPO} | head -1 | sed 's/[ \t].*//'); fi + # Set environment variables + - eval $(ssh-agent -s) + - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null + - mkdir -p ~/.ssh + - chmod 700 ~/.ssh + - ssh-keyscan -t rsa github.com >>~/.ssh/known_hosts + - chmod 644 ~/.ssh/known_hosts + - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config + - unset OMP_NUM_THREADS + - git config --global user.name "${BOT_USER}" + - git config --global user.email "${BOT_EMAIL}" + + +.benchmark_template: &horeka_benchmark + before_script: *horeka_benchmark_before_script + variables: + GIT_CLEAN_FLAGS: none + script: + # Launch the benchmark + - git clone https://github.com/ginkgo-project/ssget + - sed -i 's:ARCHIVE_LOCATION="${HOME}/.config/ssget":ARCHIVE_LOCATION="/ssget":' ssget/ssget + - export PATH=$PWD/ssget:$PATH + - cd /ginkgo/build + - chmod +x benchmark/run_all_benchmarks.sh + - cd benchmark + - git clone ${BENCHMARK_REPO} data-repo + # Use the representative matrix list + - export MATRIX_LIST_FILE=$PWD/data-repo/data/represent.list + - ./run_all_benchmarks.sh + # Publish the results + - rsync -av results/ data-repo/data/ + - cd data-repo/data/ + - ./build-list . > list.json + - ./agregate < list.json > agregate.json + - ./represent $PWD > represent.json + - git add -A + - git diff --quiet HEAD || + (git commit -m + "Benchmark ${BENCHMARK} on ${EXECUTOR} with ${SYSTEM_NAME} of ginkgo-project/ginkgo@${COMMIT_SHA}" + && git push) + cache: [] + except: + - schedules diff --git a/.gitlab/variables.yml b/.gitlab/variables.yml new file mode 100644 index 00000000000..11379f029f9 --- /dev/null +++ b/.gitlab/variables.yml @@ -0,0 +1,25 @@ +# Templates with reasonable defaults for builds and tests +.default_variables: + variables: + BENCHMARK_SERVER: "FINECI" + C_COMPILER: "gcc" + CXX_COMPILER: "g++" + CUDA_COMPILER: "nvcc" + BUILD_TYPE: "Debug" + BUILD_SHARED_LIBS: "ON" + BUILD_REFERENCE: "ON" + BUILD_OMP: "OFF" + BUILD_CUDA: "OFF" + BUILD_HIP: "OFF" + BUILD_HWLOC: "ON" + BUILD_MPI: "OFF" + MPI_AS_ROOT: "OFF" + FAST_TESTS: "OFF" + DPCPP_SINGLE_MODE: "OFF" + MIXED_PRECISION: "ON" + RUN_EXAMPLES: "OFF" + CONFIG_LOG: "ON" + CXX_FLAGS: "" + EXTRA_CMAKE_FLAGS: "" + EXPORT_BUILD_DIR: "OFF" + CI_PROJECT_DIR_SUFFIX: "" From a8e5adb55789b1329a1d7f427c15c763a900087f Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Fri, 29 Apr 2022 15:43:14 +0200 Subject: [PATCH 02/10] Use a special queue for status jobs. --- .gitlab-ci.yml | 14 +++++++------- .gitlab/image.yml | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3fcc72034c4..01eb9b900fc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,7 +24,7 @@ status_pending: extends: - .default_variables - .pr_condition - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings variables: STATUS_CONTEXT: "quick" script: | @@ -37,7 +37,7 @@ status_success: extends: - .default_variables - .pr_condition - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings variables: STATUS_CONTEXT: "quick" # we always exit with the code 3 such that it will process when retrying @@ -54,7 +54,7 @@ status_failure: extends: - .default_variables - .pr_condition - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings variables: STATUS_CONTEXT: "quick" # we always exit with the code 3 such that it will process when retrying @@ -73,7 +73,7 @@ sync: extends: - .default_variables - .before_script_git_template - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings variables: GIT_STRATEGY: none PRIVATE_REPO: git@gitlab.com:ginkgo-project/ginkgo.git @@ -95,7 +95,7 @@ trigger_pipeline: extends: - .default_variables - .pr_condition - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings variables: STATUS_CONTEXT: "quick" script: @@ -893,7 +893,7 @@ gh-pages: - .default_variables - .deploy_condition - .before_script_git_template - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings variables: PUBLIC_REPO: git@github.com:ginkgo-project/ginkgo.git script: @@ -991,7 +991,7 @@ new-issue-on-failure: stage: on-failure extends: - .default_variables - - .use_gko-nocuda-openmpi-gnu9-llvm8 + - .use_status-job-settings script: curl --request POST "https://gitlab.com/api/v4/projects/${PROJECT_ID}/issues?private_token=${BOT_ACCESS_TOKEN}&title=Error%20in%20${CI_PROJECT_NAME}%20with%20pipeline%20${CI_PIPELINE_ID}%20for%20commit%20${CI_COMMIT_SHA}&labels&description=${CI_PIPELINE_URL}" when: on_failure only: diff --git a/.gitlab/image.yml b/.gitlab/image.yml index 770b4ac2001..3c6886d7fc7 100644 --- a/.gitlab/image.yml +++ b/.gitlab/image.yml @@ -1,3 +1,9 @@ +.use_status-job-settings: + image: ginkgohub/cpu:openmpi-gnu9-llvm8 + tags: + - private_ci + - status-jobs + .use_gko-nocuda-openmpi-gnu9-llvm8: image: ginkgohub/cpu:openmpi-gnu9-llvm8 tags: From 6349317dea12f3635130fe21d313261ef01b75d5 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 3 May 2022 17:12:32 +0200 Subject: [PATCH 03/10] Add benchmarking --- .gitlab-ci.yml | 62 +++++++++++++++++++++++++++++++++++-------- .gitlab/condition.yml | 36 ------------------------- .gitlab/rules.yml | 34 ++++++++++++++++++++++++ .gitlab/scripts.yml | 31 +++++++++++----------- 4 files changed, 100 insertions(+), 63 deletions(-) delete mode 100644 .gitlab/condition.yml create mode 100644 .gitlab/rules.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 01eb9b900fc..a396ccd3dbc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,13 +9,14 @@ stages: - QoS_tools - on-failure - finalize-status + - benchmark-build - benchmark-cuda - benchmark-omp - benchmark-reference include: - - local: '.gitlab/condition.yml' - local: '.gitlab/image.yml' + - local: '.gitlab/rules.yml' - local: '.gitlab/scripts.yml' - local: '.gitlab/variables.yml' @@ -87,8 +88,6 @@ sync: only: - master - develop - except: - - schedules trigger_pipeline: stage: trigger_pipeline @@ -133,9 +132,8 @@ trigger_pipeline: echo "Can not find the corresponding Pull Request" fi # Override variables condition from .pr_condition - only: - variables: - - $RUN_CI_TAG && $STATUS_CONTEXT == "quick" + rules: + - if: $RUN_CI_TAG && $STATUS_CONTEXT == "quick" # Build jobs # Job with example runs. @@ -861,11 +859,9 @@ sonarqube_cov_: -Dsonar.cfamily.gcov.reportsPath=build/Testing/CoverageInfo ${sonar_branching} - bash <(curl -s https://codecov.io/bash) -f "\!*examples*" -f "\!*third_party*" -f "\!*c\\+\\+*" -f "\!*benchmark*" - except: - refs: - - develop - - master - - tags + rules: + - if: $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_TAG + when: never # For long living branches, do not detect the PR. A PR would always be detected # (the one that was merged). @@ -999,3 +995,47 @@ new-issue-on-failure: - develop - master dependencies: [] + + +## Benchmark SpMV +benchmark-cuda-spmv-build: + extends: + - .build_template + - .default_variables + - .quick_test_condition + - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 + - .benchmark-spmv-cuda-rules + stage: benchmark-build + variables: + BUILD_OMP: "ON" + BUILD_CUDA: "ON" + BUILD_MPI: "ON" + MPI_AS_ROOT: "ON" + BUILD_HIP: "OFF" + BUILD_TYPE: "Release" + CUDA_ARCH: 80 + USE_NAME: "benchmark-cuda-spmv-${CI_PIPELINE_ID}" + KEEP_CONTAINER: "ON" + USE_SLURM: 0 + + +# The name must be of the same as above without `-build` +benchmark-cuda-spmv: + extends: + - .benchmark_template + - .default_variables + - .quick_test_condition + - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 + - .benchmark-spmv-cuda-rules + stage: benchmark-cuda + variables: + BENCHMARK_REPO: git@github.com:ginkgo-project/ginkgo-data.git + USE_NAME: "benchmark-cuda-spmv-${CI_PIPELINE_ID}" + SLURM_PARTITION: "accelerated" + SLURM_GRES: "gpu:1" + SLURM_EXCLUSIVE: "ON" + SLURM_TIME: "4:00:00" + BENCHMARK: "spmv" + EXECUTOR: "cuda" + SYSTEM_NAME: "A100" + dependencies: [ "benchmark-cuda-spmv-build" ] diff --git a/.gitlab/condition.yml b/.gitlab/condition.yml deleted file mode 100644 index fffb88738a5..00000000000 --- a/.gitlab/condition.yml +++ /dev/null @@ -1,36 +0,0 @@ -.pr_condition: - only: - variables: - - $RUN_CI_TAG - except: - refs: - - develop - - master - - tags - dependencies: [] - -.full_test_condition: - only: - variables: - - $RUN_CI_TAG && $STATUS_CONTEXT == "full" - - $RUN_CI_TAG && ($CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop") - - $RUN_CI_TAG && $CI_COMMIT_TAG - dependencies: [] - -.quick_test_condition: - only: - variables: - - $RUN_CI_TAG && $STATUS_CONTEXT == null - dependencies: [] - -.deploy_condition: - only: - refs: - - develop - - master - - tags - variables: - - $RUN_CI_TAG - except: - - schedules - dependencies: [] diff --git a/.gitlab/rules.yml b/.gitlab/rules.yml new file mode 100644 index 00000000000..3021d6a29cc --- /dev/null +++ b/.gitlab/rules.yml @@ -0,0 +1,34 @@ +.pr_condition: + rules: + - if: $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_TAG + when: never + - if: $RUN_CI_TAG + dependencies: [] + + +.full_test_condition: + rules: + - if: $RUN_CI_TAG && ($STATUS_CONTEXT == "full" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_TAG) + dependencies: [] + + +.quick_test_condition: + rules: + - if: $RUN_CI_TAG && $STATUS_CONTEXT == null + dependencies: [] + + +.deploy_condition: + rules: + - if: $RUN_CI_TAG && ($CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_TAG) && $CI_PIPELINE_SOURCE != "schedule" + dependencies: [] + + +.benchmark-spmv-cuda-rules: + rules: + - changes: + - cuda/matrix/* + - include/ginkgo/core/matrix/* + - common/cuda_hip/matrix/* + # - common/unified/matrix/* # for now no SpMV there? + when: manual diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml index bd54a011bae..9133b519d32 100644 --- a/.gitlab/scripts.yml +++ b/.gitlab/scripts.yml @@ -51,8 +51,6 @@ - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi dependencies: [] - except: - - schedules .build_and_test_template: &default_build_with_test stage: build @@ -122,8 +120,6 @@ - if [ -n "${SYCL_DEVICE_FILTER}" ]; then unset SYCL_DEVICE_FILTER; fi - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi dependencies: [] - except: - - schedules .horeka_test_template: &horeka_test @@ -146,8 +142,6 @@ - ninja test - ninja test_install cache: [] - except: - - schedules .horeka_benchmark_before_script_template: &horeka_benchmark_before_script @@ -158,7 +152,6 @@ - if [[ -z "${EXECUTOR}" ]]; then exit 233; fi - if [[ -z "${SYSTEM_NAME}" ]]; then exit 233; fi - if [[ -z "${BENCHMARK}" ]]; then exit 233; fi - - if [[ -z "${COMMIT_SHA}" ]]; then export COMMIT_SHA=$(git ls-remote ${PUBLIC_REPO} | head -1 | sed 's/[ \t].*//'); fi # Set environment variables - eval $(ssh-agent -s) - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null @@ -177,15 +170,25 @@ variables: GIT_CLEAN_FLAGS: none script: - # Launch the benchmark + # Install rsync + - pushd / + - git clone https://github.com/WayneD/rsync -b v3.2.4 + - pushd rsync + - ./configure --disable-lz4 --disable-openssl --disable-doc --disable-xxhash --disable-md2man + - make -j10 install + - popd + - popd + # Setup ssget - git clone https://github.com/ginkgo-project/ssget - sed -i 's:ARCHIVE_LOCATION="${HOME}/.config/ssget":ARCHIVE_LOCATION="/ssget":' ssget/ssget - export PATH=$PWD/ssget:$PATH - - cd /ginkgo/build + # Setup the benchmark + - cd ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} + - cd ${CI_JOB_NAME}-build - chmod +x benchmark/run_all_benchmarks.sh - cd benchmark - git clone ${BENCHMARK_REPO} data-repo - # Use the representative matrix list + # Use the representative matrix list and launch benchmark - export MATRIX_LIST_FILE=$PWD/data-repo/data/represent.list - ./run_all_benchmarks.sh # Publish the results @@ -193,12 +196,8 @@ - cd data-repo/data/ - ./build-list . > list.json - ./agregate < list.json > agregate.json + - chmod +x represent - ./represent $PWD > represent.json - git add -A - - git diff --quiet HEAD || - (git commit -m - "Benchmark ${BENCHMARK} on ${EXECUTOR} with ${SYSTEM_NAME} of ginkgo-project/ginkgo@${COMMIT_SHA}" - && git push) + - git diff --quiet HEAD || (git commit -m "Benchmark ${BENCHMARK} on ${EXECUTOR} with ${SYSTEM_NAME} of ginkgo-project/ginkgo@${CI_COMMIT_SHORT_SHA}" && git push) cache: [] - except: - - schedules From e81fa9ba058f6e32648782a76cd2e620cab4b99e Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Thu, 12 May 2022 21:37:47 +0200 Subject: [PATCH 04/10] Use silent curl and try to return status 0 --- .gitlab-ci.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a396ccd3dbc..53bba9b1d6e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,7 +29,7 @@ status_pending: variables: STATUS_CONTEXT: "quick" script: | - curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + curl -s -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ -d "{\"state\":\"pending\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" @@ -43,12 +43,9 @@ status_success: STATUS_CONTEXT: "quick" # we always exit with the code 3 such that it will process when retrying script: | - curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + curl -s -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ -d "{\"state\":\"success\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" - exit 3 - allow_failure: - exit_codes: 3 status_failure: stage: finalize-status @@ -60,13 +57,11 @@ status_failure: STATUS_CONTEXT: "quick" # we always exit with the code 3 such that it will process when retrying script: | - curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + curl -s -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ -d "{\"state\":\"failure\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" exit 3 when: on_failure - allow_failure: - exit_codes: 3 sync: From 99ba348996538562d4f6590d72e770d28f3f3dc6 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 17 May 2022 16:10:59 +0200 Subject: [PATCH 05/10] Try to move DPCPP/CPU job to HoreKa --- .gitlab/image.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab/image.yml b/.gitlab/image.yml index 3c6886d7fc7..58086a848fe 100644 --- a/.gitlab/image.yml +++ b/.gitlab/image.yml @@ -88,8 +88,7 @@ image: ginkgohub/oneapi:latest tags: - private_ci - - intelci-igpu - - cpu + - horeka .use_gko-oneapi-igpu: image: ginkgohub/oneapi:latest From d9ed30c9184765e36de7acc5f6be438d6a8128d5 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 14 Jun 2022 19:41:48 +0200 Subject: [PATCH 06/10] Move the DPCPP/CPU job to fairrs --- .gitlab-ci.yml | 3 +++ .gitlab/image.yml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 53bba9b1d6e..ff04274de2b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -676,6 +676,9 @@ build/dpcpp/cpu/release/static: BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "ON" SYCL_DEVICE_TYPE: "CPU" + SLURM_PARTITION: "cpu" + SLURM_TIME: "2:00:00" + # This job is not in exclusive mode # It gives two available backends of GPU on tests build/dpcpp/igpu/release/shared: diff --git a/.gitlab/image.yml b/.gitlab/image.yml index 58086a848fe..9dc1266844e 100644 --- a/.gitlab/image.yml +++ b/.gitlab/image.yml @@ -88,7 +88,7 @@ image: ginkgohub/oneapi:latest tags: - private_ci - - horeka + - fairrs .use_gko-oneapi-igpu: image: ginkgohub/oneapi:latest From 37033022ac56d4ea3dd3a61c297c3ecd05d7f5fa Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Thu, 23 Jun 2022 11:13:32 +0200 Subject: [PATCH 07/10] Fix pipeline, add documentation, always use extend --- .gitlab-ci.yml | 15 ++++++---- .gitlab/rules.yml | 17 ++++++++++++ .gitlab/scripts.yml | 68 ++++++++++++++++++++++++--------------------- 3 files changed, 63 insertions(+), 37 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ff04274de2b..cc3f9917c16 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -32,6 +32,9 @@ status_pending: curl -s -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ -d "{\"state\":\"pending\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" + exit 3 + allow_failure: + exit_codes: 3 status_success: stage: finalize-status @@ -46,6 +49,9 @@ status_success: curl -s -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ -d "{\"state\":\"success\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" + exit 3 + allow_failure: + exit_codes: 3 status_failure: stage: finalize-status @@ -62,6 +68,8 @@ status_failure: -d "{\"state\":\"failure\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" exit 3 when: on_failure + allow_failure: + exit_codes: 3 sync: @@ -833,7 +841,7 @@ sonarqube_cov_: stage: code_quality extends: - .default_variables - - .quick_test_condition + - .quick_test_short_lived_condition - .before_script_template - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019 script: @@ -857,9 +865,6 @@ sonarqube_cov_: -Dsonar.cfamily.gcov.reportsPath=build/Testing/CoverageInfo ${sonar_branching} - bash <(curl -s https://codecov.io/bash) -f "\!*examples*" -f "\!*third_party*" -f "\!*c\\+\\+*" -f "\!*benchmark*" - rules: - - if: $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_TAG - when: never # For long living branches, do not detect the PR. A PR would always be detected # (the one that was merged). @@ -1000,7 +1005,6 @@ benchmark-cuda-spmv-build: extends: - .build_template - .default_variables - - .quick_test_condition - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 - .benchmark-spmv-cuda-rules stage: benchmark-build @@ -1022,7 +1026,6 @@ benchmark-cuda-spmv: extends: - .benchmark_template - .default_variables - - .quick_test_condition - .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020 - .benchmark-spmv-cuda-rules stage: benchmark-cuda diff --git a/.gitlab/rules.yml b/.gitlab/rules.yml index 3021d6a29cc..50d663425ab 100644 --- a/.gitlab/rules.yml +++ b/.gitlab/rules.yml @@ -1,13 +1,21 @@ +# The way rules work are explained in https://docs.gitlab.com/ee/ci/yaml/#rules +# As a summary: Rules are evaluated when the pipeline is created, and evaluated +# in order until the first match. When a match is found, the job is either +# included or excluded from the pipeline, depending on the configuration. + .pr_condition: rules: + # Exclude `develop`, `master`, and tags with `when: never` - if: $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_TAG when: never + # Run only when the `RUN_CI_TAG` variable is set - if: $RUN_CI_TAG dependencies: [] .full_test_condition: rules: + # Run only when the `RUN_CI_TAG` variable is set and this is a full pipeline, or for `master`, `develop` or tags. - if: $RUN_CI_TAG && ($STATUS_CONTEXT == "full" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_TAG) dependencies: [] @@ -17,6 +25,12 @@ - if: $RUN_CI_TAG && $STATUS_CONTEXT == null dependencies: [] +.quick_test_short_lived_condition: + rules: + - if: $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_TAG + when: never + - if: $RUN_CI_TAG && $STATUS_CONTEXT == null + dependencies: [] .deploy_condition: rules: @@ -26,9 +40,12 @@ .benchmark-spmv-cuda-rules: rules: + # First, check that we have any of the following file changes - changes: - cuda/matrix/* - include/ginkgo/core/matrix/* - common/cuda_hip/matrix/* # - common/unified/matrix/* # for now no SpMV there? + # Then, check that we are in the full pipeline or master/develop/tags + - if: $RUN_CI_TAG && ($STATUS_CONTEXT == "full" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_TAG) when: manual diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml index 9133b519d32..f6f83a62121 100644 --- a/.gitlab/scripts.yml +++ b/.gitlab/scripts.yml @@ -1,11 +1,11 @@ -.before_script_template: &default_before_script +.before_script_template: before_script: - export NUM_CORES=${CI_PARALLELISM} - export OMP_NUM_THREADS=${NUM_CORES} - export CCACHE_DIR=${CCACHE_DIR} - export CCACHE_MAXSIZE=${CCACHE_MAXSIZE} -.before_script_git_template: &git_before_script +.before_script_git_template: before_script: - eval $(ssh-agent -s) - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null @@ -15,7 +15,7 @@ - git config --global user.name "${BOT_USER}" - git config --global user.email "${BOT_EMAIL}" -.build_template: &default_build +.build_template: stage: build extends: - .before_script_template @@ -29,6 +29,9 @@ export OMPI_ALLOW_RUN_AS_ROOT=1; export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; fi + - if [[ "${GINKGO_BUILD_MPI}" == "ON" ]]; then + MPI_STR=-DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT}; + fi - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} -GNinja -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} @@ -38,8 +41,7 @@ -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} - -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} - -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} + -DGINKGO_BUILD_MPI=${BUILD_MPI} ${MPI_STR} -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON -DGINKGO_FAST_TESTS=${FAST_TESTS} @@ -52,7 +54,7 @@ - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi dependencies: [] -.build_and_test_template: &default_build_with_test +.build_and_test_template: stage: build extends: - .before_script_template @@ -68,6 +70,9 @@ export OMPI_ALLOW_RUN_AS_ROOT=1; export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1; fi + - if [[ "${GINKGO_BUILD_MPI}" == "ON" ]]; then + MPI_STR=-DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT}; + fi - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} -GNinja -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} @@ -77,8 +82,7 @@ -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} - -DGINKGO_BUILD_MPI=${BUILD_MPI} -DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX} - -DMPI_RUN_AS_ROOT=${MPI_AS_ROOT} + -DGINKGO_BUILD_MPI=${BUILD_MPI} ${MPI_STR} -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON -DGINKGO_FAST_TESTS=${FAST_TESTS} @@ -122,7 +126,7 @@ dependencies: [] -.horeka_test_template: &horeka_test +.horeka_test_template: stage: test before_script: # Sanity checks @@ -135,7 +139,7 @@ # We should be reusing the previous image and build directory. # Speedup the tests by limiting the number of OMP threads to CI parallelism. - export OMP_NUM_THREADS=${CI_PARALLELISM} - # The test job should have the name name as the image job, only changing + # The test job should have the name as the image job, only changing # build to test - cd ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} - cd ${CI_JOB_NAME/test/build} @@ -144,29 +148,31 @@ cache: [] -.horeka_benchmark_before_script_template: &horeka_benchmark_before_script - # Sanity checks - - if [ -z "${USE_NAME}" ]; then exit 111; fi - - if [ -z "${SLURM_PARTITION}" ]; then exit 222; fi - - if [[ ! "${SLURM_GRES}" =~ "^gpu*" ]]; then export NVIDIA_VISIBLE_DEVICES=void; fi - - if [[ -z "${EXECUTOR}" ]]; then exit 233; fi - - if [[ -z "${SYSTEM_NAME}" ]]; then exit 233; fi - - if [[ -z "${BENCHMARK}" ]]; then exit 233; fi - # Set environment variables - - eval $(ssh-agent -s) - - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null - - mkdir -p ~/.ssh - - chmod 700 ~/.ssh - - ssh-keyscan -t rsa github.com >>~/.ssh/known_hosts - - chmod 644 ~/.ssh/known_hosts - - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config - - unset OMP_NUM_THREADS - - git config --global user.name "${BOT_USER}" - - git config --global user.email "${BOT_EMAIL}" +.horeka_benchmark_before_script_template: + before_script: + # Sanity checks + - if [ -z "${USE_NAME}" ]; then exit 111; fi + - if [ -z "${SLURM_PARTITION}" ]; then exit 222; fi + - if [[ ! "${SLURM_GRES}" =~ "^gpu*" ]]; then export NVIDIA_VISIBLE_DEVICES=void; fi + - if [[ -z "${EXECUTOR}" ]]; then exit 233; fi + - if [[ -z "${SYSTEM_NAME}" ]]; then exit 233; fi + - if [[ -z "${BENCHMARK}" ]]; then exit 233; fi + # Set environment variables + - eval $(ssh-agent -s) + - echo "${BOT_KEY}" | tr -d '\r' | ssh-add - >/dev/null + - mkdir -p ~/.ssh + - chmod 700 ~/.ssh + - ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + - chmod 644 ~/.ssh/known_hosts + - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config + - unset OMP_NUM_THREADS + - git config --global user.name "${BOT_USER}" + - git config --global user.email "${BOT_EMAIL}" -.benchmark_template: &horeka_benchmark - before_script: *horeka_benchmark_before_script +.benchmark_template: + extends: + - .horeka_benchmark_before_script_template variables: GIT_CLEAN_FLAGS: none script: From a4984a25c58bbb4d93b7fa8a345ab8cd4ef14fbe Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Thu, 23 Jun 2022 18:59:36 +0200 Subject: [PATCH 08/10] Do not print summary to stderr. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c1b3a66f19..8ce85c847ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -428,4 +428,4 @@ if(GINKGO_CONFIG_LOG_DETAILED) else() FILE(READ ${PROJECT_BINARY_DIR}/minimal.log GINKGO_LOG_SUMMARY) endif() -MESSAGE("${GINKGO_LOG_SUMMARY}") +MESSAGE(STATUS "${GINKGO_LOG_SUMMARY}") From 2fc63fb753a2e08a5fa9fb966a6b926f01091b8a Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 28 Jun 2022 14:41:48 +0200 Subject: [PATCH 09/10] Fix `trigger_pipeline` with a new rule. The problem with rules is that they cannot be extended in the job, they are instead overriden. https://docs.gitlab.com/ee/ci/yaml/yaml_optimization.html#merge-details --- .gitlab-ci.yml | 6 ++---- .gitlab/rules.yml | 10 ++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cc3f9917c16..fe90804058c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -96,7 +96,7 @@ trigger_pipeline: stage: trigger_pipeline extends: - .default_variables - - .pr_condition + - .pr_trigger_condition - .use_status-job-settings variables: STATUS_CONTEXT: "quick" @@ -134,9 +134,7 @@ trigger_pipeline: else echo "Can not find the corresponding Pull Request" fi - # Override variables condition from .pr_condition - rules: - - if: $RUN_CI_TAG && $STATUS_CONTEXT == "quick" + # Build jobs # Job with example runs. diff --git a/.gitlab/rules.yml b/.gitlab/rules.yml index 50d663425ab..24b5814362f 100644 --- a/.gitlab/rules.yml +++ b/.gitlab/rules.yml @@ -13,6 +13,16 @@ dependencies: [] +.pr_trigger_condition: + rules: + # Exclude `develop`, `master`, and tags with `when: never` + - if: $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_TAG + when: never + # Run only for quick pipelines and when the `RUN_CI_TAG` variable is set + - if: $RUN_CI_TAG && $STATUS_CONTEXT == "quick" + dependencies: [] + + .full_test_condition: rules: # Run only when the `RUN_CI_TAG` variable is set and this is a full pipeline, or for `master`, `develop` or tags. From 739d134ac947b0d43ce94f6bb55dd0d4c8e8671e Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 28 Jun 2022 15:21:47 +0200 Subject: [PATCH 10/10] Simplify spmv benchmark rules --- .gitlab/rules.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitlab/rules.yml b/.gitlab/rules.yml index 24b5814362f..0280017c08b 100644 --- a/.gitlab/rules.yml +++ b/.gitlab/rules.yml @@ -51,11 +51,11 @@ .benchmark-spmv-cuda-rules: rules: # First, check that we have any of the following file changes - - changes: - - cuda/matrix/* - - include/ginkgo/core/matrix/* - - common/cuda_hip/matrix/* - # - common/unified/matrix/* # for now no SpMV there? - # Then, check that we are in the full pipeline or master/develop/tags - - if: $RUN_CI_TAG && ($STATUS_CONTEXT == "full" || $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop" || $CI_COMMIT_TAG) - when: manual + - if: $RUN_CI_TAG && $STATUS_CONTEXT == "full" && $CI_PIPELINE_SOURCE == "merge_request_event" || $CI_PIPELINE_SOURCE == "external_pull_request_event" + changes: + - cuda/matrix/* + - include/ginkgo/core/matrix/* + - common/cuda_hip/matrix/* + # - common/unified/matrix/* # for now no SpMV there? + when: manual + allow_failure: true