diff --git a/cime/config/e3sm/tests.py b/cime/config/e3sm/tests.py index 3efa11f2c443..7a2def01c985 100644 --- a/cime/config/e3sm/tests.py +++ b/cime/config/e3sm/tests.py @@ -88,6 +88,7 @@ "tests" : ( "PGN_P1x1.ne4_ne4.FC5AV1C-L", "TSC.ne4_ne4.FC5AV1C-L", + "MVK_PL.ne4_ne4.FC5AV1C-L", ) }, diff --git a/cime/scripts/climate_reproducibility/README.md b/cime/scripts/climate_reproducibility/README.md index d5bc5375d8e7..5d2ec104e58b 100644 --- a/cime/scripts/climate_reproducibility/README.md +++ b/cime/scripts/climate_reproducibility/README.md @@ -17,36 +17,64 @@ a comprehensive analysis of both a baseline climate and the currently produced c Through the CMDV Software project, we've provided a set of climate reproducibility tests to determine whether or not non-bit-for-bit (nb4b) model changes are climate changing. The current tests provided are: -* **MVK** -- This tests the null hypothesis that the baseline (n) and modified (m) model Short Independent - Simulation Ensembles (SISE) represent the same climate state, based on the equality of distribution - of each variable's annual global average in the standard monthly model output between the two - simulations. The (per variable) null hypothesis uses the non-parametric, two-sample (n and m) - Kolmogorov-Smirnov test as the univariate test of of equality of distribution of global means. + * **MVK** -- This tests the null hypothesis that the baseline (n) and modified (m) model Short Independent + Simulation Ensembles (SISE) represent the same climate state, based on the equality of distribution + of each variable's annual global average in the standard monthly model output between the two + simulations. The (per variable) null hypothesis uses the non-parametric, two-sample (n and m) + Kolmogorov-Smirnov test as the univariate test of of equality of distribution of global means. + + * **PGN** -- This tests the null hypothesis that the reference (n) and modified (m) model + ensembles represent the same atmospheric state after each physics parameterization + is applied within a single time-step using the two-sample (n and m) T-test for equal + averages at a 95% confidence level. Ensembles are generated by repeating the + simulation for many initial conditions, with each initial condition subject to + multiple perturbations. + + * **TSC** -- This tests the null hypothesis that the convergence of the time stepping error + for a set of key atmospheric variables is the same for a reference ensemble and + a test ensemble. Both the reference and test ensemble are generated with a + two-second time step, and for each variable the RMSD between each ensemble and + a truth ensemble, generated with a one-second time step, is calculated. At each + 10 second interval during the 10 minute long simulations, the difference + in the reference and test RMSDs for each variable, each ensemble member, and each + domain are calculated and these ΔRMSDs should be zero for identical climates. A + one sided (due to self convergence) Student's T Test is used to test the null + hypothesis that the ensemble mean ΔRMSD is statistically zero. + Running the tests ----------------- These tests are built into E3SM-CIME as system tests and will be launched using the `create_test` scripts. -*However*, since these tests use high level statistics, they have additional python dependencies which +*However*, because these tests use high level statistics, they have additional python dependencies which need to be installed on your system and accessible via the compute nodes (if you're on a batch machine). Primarily, the statistical analysis of the climates is done through [EVV](https://github.com/LIVVkit/evv4esm) -(confluence page: [EVV](https://acme-climate.atlassian.net/wiki/spaces/EIDMG/pages/774177270/EVV)) which will -generate a portable test website to describe the results (pass or fail) in detail (see the extended output -section below). The full set of additional requirements are listed in the `requirements.txt` file in this directory. To install these -dependencies, make sure a python version `> 2.7` is loaded/installed, and then use `pip` like: +which will generate a portable test website to describe the results (pass or fail) in detail (see the extended output +section below). + +For E3SM supported machines, the `e3sm_simple` conda environment is provided for these tests and includes the `EVV` +conda package. You can activate the `e3sm_simple` environment in the same way as `e3sm_unified` environment: ``` -pip install --user -r requirements.txt -``` +source /load_latest_e3sm_simple.sh +``` + +where `` is the machine-specific location of the activation script as described on this confluence page: + +https://acme-climate.atlassian.net/wiki/spaces/EIDMG/pages/780271950/Diagnostics+and+Analysis+Quickstart#DiagnosticsandAnalysisQuickstart-Accessingmetapackagesoftwarebyactivatingacondaenvironment + +If you don't have access to confluence or are unable to activate this environment for whatever reason, you can install +your own `e3sm_simple` conda environment with this command (once you have anaconda/miniconda installed): + +``` +conda create -n e3sm-simple -c conda-forge -c e3sm e3sm-simple +``` -*NOTE: Work is underway to include all of these dependencies in the* `e3sm_unified` *conda environment, -and allow CIME to optionally load and use this environment for these tests. Once this is done, you will -not have to manage these dependencies yourself. If you run into problems with getting these dependencies -working on your machine, please open an issue on E3SM's Github and tag @jhkennedy, or send -Joseph H. Kennedy an email.* +*NOTE: If you run into problems with getting this environment working on your machine, please open an issue on E3SM's +Github and tag @jhkennedy, or send Joseph H. Kennedy an email.* -After the dependencies are installed, change to the `$E3SM/cime/scripts` directory (where `$E3SM` is the +After you've activated the `e3sm_simple` environment, change to the `$E3SM/cime/scripts` directory (where `$E3SM` is the directory containing E3SM). Then to run one of the tests, you will use the `create_test` script like normal. To run the `MVK` test and generate a baseline, you would run a command like: @@ -56,58 +84,33 @@ To run the `MVK` test and generate a baseline, you would run a command like: And to compare to the baseline, you would run a command like: - ``` ./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 -c --baseline-root "/PATH/TO/BASELINE" ``` -Importantly, these tests run a 20 member ensemble for at least 13 months (using the last 12 for the +*NOTE: The MVK run a 20 member ensemble for at least 13 months (using the last 12 for the statistical tests) and, depending on the machine, may take some fiddling to execute within a particular queue's wallclock time limit. You may want to over-ride the requested walltime using `--walltime HH:MM:SS` -option to `create_test`. Additionally, if messing with the wallclock time isn't enough, you can adjust the -`STOP_N` and `RESUBMIT` setting for the tests in `$E3SM/cime/config/config_tests.xml`. For example, the -MVK test is setup like: - -```xml - - climate reproducibility test using the multivariate K-S test - 1 - FALSE - FALSE - nmonths - 2 - $STOP_OPTION - $STOP_N - $STOP_OPTION - $STOP_N - 6 - -``` - -which will submit a job to run for two months, stop, and then resubmit itself 6 more times (continuing from -where it left off), leading to 14 months of simulation time. These settings, combined with the -`--walltime 02:00:00` option, allowed this test to run successfully on OLCF's Titan machine. The full set of -commands to run the MVK test used on titan are: - -*Install dependencies (only need to do once):* -``` -module load python -cd $E3SM/cime/scripts/climate_reproducibility -python pip --user -r requirements.txt -``` +option to `create_test`.* + +The full set of commands to run the MVK test used on Cori are: *Generate a baseline:* ``` -module load python # need python 2.7; default is 2.6 cd $E3SM/cime/scripts -./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 --baseline-root "${PROJWORK}/cli115/${USER}/baselines" -g --walltime 02:00:00 + +source /global/project/projectdirs/acme/software/anaconda_envs/load_latest_e3sm_simple.sh + +./create_test MVK_PL.ne4_ne4.FC5AV1C-04P2 --baseline-root "${CSCRATCH}/baselines" --project acme -g -o --walltime 01:00:00 ``` *Compare to a baseline:* ``` -module load python # need python 2.7; default is 2.6 cd $E3SM/cime/scripts -./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 --baseline-root "${PROJWORK}/cli115/${USER}/baselines" -c --walltime 02:00:00 + +source /global/project/projectdirs/acme/software/anaconda_envs/load_latest_e3sm_simple.sh + +./create_test MVK_PL.ne4_ne4.FC5AV1C-04P2 --baseline-root "${CSCRATCH}/baselines" --project acme -c --walltime 01:00:00 ``` Test pass/fail and extended output @@ -117,9 +120,9 @@ When you launch these tests, CIME will ouput the location of the case directory, something like this: ``` -# On titan: -./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 --baseline-root "${PROJWORK}/cli115/${USER}/baselines" -c --walltime 02:00:00 - Creating test directory /ccs/home/$USER/acme_scratch/cli115/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID +# On cori-knl: +./create_test MVK_PL.ne4_ne4.FC5AV1C-04P2 --baseline-root "${CSCRATCH}/baselines" --project acme -c --walltime 01:00:00 + Creating test directory /global/cscratch1/sd/${USER}/acme_scratch/cori-knl/MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9 ``` Let's call that directory `$CASE_DIR`. Once all the jobs are finished, navigate to that directory and @@ -127,9 +130,9 @@ you can `cat TestStatus` to determine if the test passed or failed by looking at ``` cd $CASE_DIR -view TestStatus +cat TestStatus ... - PASS MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi BASELINE + PASS MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel BASELINE ... ``` @@ -138,8 +141,9 @@ To get some basic summary statistics about the test that was run, look in the ou submission for EVV's analysis: ``` -view MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJOBID +view MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJOBID ... + 2019-08-14 22:09:02 CASE.RUN HAS FINISHED -------------------------------------------------------------------- ______ __ __ __ __ | ____| \ \ / / \ \ / / @@ -151,17 +155,18 @@ view MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJ Extended Verification and Validation for Earth System Models -------------------------------------------------------------------- - Current run: 2018-08-02 23:24:26 - User: kennedy - OS Type: Linux 3.0.101-0.46.1_1.0502.8871-cray_gem_s - Machine: titan-batch7 + Current run: 2019-08-14 21:51:32 + User: ${USER} + OS Type: Linux 4.12.14-25.22_5.0.70-cray_ari_c + Machine: nid06598 ------------------------------------------------------------------- ----------------------------------------------------------------- Beginning extensions test suite - ----------------------------------------------------------------- + ----------------------------------------------------------------- + - Kolmogorov-Smirnov Test: YYYYMMDD_HHMMSS_RANDOMID + Kolmogorov-Smirnov test: YYYYMMDD_HHMMSS_RANDOMID Variables analyzed: 378 Rejecting: 9 Critical value: 13.0 @@ -173,7 +178,7 @@ view MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJ ------------------------------------------------------------------- Done! Results can be seen in a web browser at: - /lustre/atlas/proj-shared/cli115/$USER/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID/run/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve/index.html + /global/cscratch1/sd/${USER}/acme_scratch/cori-knl/MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9/run/MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9.evv/index.html ------------------------------------------------------------------- ... ``` @@ -191,11 +196,10 @@ protocol). You can view the website by either copying it to a hosted location (` local http server (included in python!) and viewing it through an address like `http://0.0.0.0:8000/index.html`. **For the easiest viewing** we recommend copying the website to your local machine, and using EVV to -view it. you can install EVV locally by running this command (will work with both python and anaconda -environments): +view it. you can install EVV locally by running this command: ``` -pip install evv4esm +conda install evv4esm ``` Then, copy the website to your local machine, and view it: @@ -204,7 +208,7 @@ Then, copy the website to your local machine, and view it: ``` # on your local machine scp -r /lustre/atlas/proj-shared/cli115/$USER/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID/run/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve . -evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve -s +evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.evv -s -------------------------------------------------------------------- ______ __ __ __ __ | ____| \ \ / / \ \ / / @@ -217,7 +221,7 @@ evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve - -------------------------------------------------------------------- Current run: 2018-08-06 15:15:03 - User: fjk + User: ${USER} OS Type: Linux 4.15.0-29-generic Machine: pc0101123 @@ -226,7 +230,7 @@ evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve - View the generated website by navigating to: - http://0.0.0.0:8000/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve/index.html + http://0.0.0.0:8000/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.evv/index.html Exit by pressing `ctrl+c` to send a keyboard interrupt. diff --git a/cime/scripts/climate_reproducibility/requirements.txt b/cime/scripts/climate_reproducibility/requirements.txt deleted file mode 100644 index 5234819a3e2c..000000000000 --- a/cime/scripts/climate_reproducibility/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -six -numpy -scipy -pandas -netCDF4 -matplotlib -evv4esm>=0.1.1 -livvkit>=2.1.6 -json_tricks==3.11.0 diff --git a/cime/scripts/lib/CIME/SystemTests/mvk.py b/cime/scripts/lib/CIME/SystemTests/mvk.py index a215a61bce52..c7d26309a060 100644 --- a/cime/scripts/lib/CIME/SystemTests/mvk.py +++ b/cime/scripts/lib/CIME/SystemTests/mvk.py @@ -7,28 +7,22 @@ """ import os -import re -import stat import json -import shutil import logging +import CIME.test_status from CIME.SystemTests.system_tests_common import SystemTestsCommon from CIME.case.case_setup import case_setup -from CIME.hist_utils import _get_all_hist_files, BLESS_LOG_NAME -from CIME.utils import append_testlog, get_current_commit, get_timestamp, get_model -from CIME.utils import expect - -import CIME.test_status +from CIME.hist_utils import _get_all_hist_files +from CIME.utils import safe_copy, SharedArea import evv4esm # pylint: disable=import-error -from evv4esm.__main__ import main as evv # pylint: disable=import-error +from evv4esm.__main__ import main as evv # pylint: disable=import-error evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__)) logger = logging.getLogger(__name__) -# Build executable with multiple instances -ninst = 20 +NINST = 20 class MVK(SystemTestsCommon): @@ -45,7 +39,6 @@ def __init__(self, case): else: self._case.set_value("COMPARE_BASELINE", False) - def build_phase(self, sharedlib_only=False, model_only=False): # Only want this to happen once. It will impact the sharedlib build # so it has to happen there. @@ -56,9 +49,9 @@ def build_phase(self, sharedlib_only=False, model_only=False): ntasks = self._case.get_value("NTASKS_{}".format(comp)) - self._case.set_value('NTASKS_{}'.format(comp), ntasks*ninst) + self._case.set_value('NTASKS_{}'.format(comp), ntasks * NINST) if comp != 'CPL': - self._case.set_value('NINST_{}'.format(comp), ninst) + self._case.set_value('NINST_{}'.format(comp), NINST) self._case.set_value('ATM_NCPL', 18) @@ -68,99 +61,35 @@ def build_phase(self, sharedlib_only=False, model_only=False): self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) - # ================================================================= - # Run-time settings. - # Do this already in build_phase so that we can check the xml and - # namelist files before job starts running. - # ================================================================= - - # namelist specifications for each instance - for iinst in range(1, ninst+1): + for iinst in range(1, NINST + 1): with open('user_nl_cam_{:04d}'.format(iinst), 'w') as nl_atm_file: nl_atm_file.write('new_random = .true.\n') nl_atm_file.write('pertlim = 1.0e-10\n') nl_atm_file.write('seed_custom = {}\n'.format(iinst)) - def _generate_baseline(self): """ generate a new baseline case based on the current test """ - with self._test_status: - # generate baseline + super(MVK, self)._generate_baseline() - # BEGIN: modified CIME.hist_utils.generate_baseline - rundir = self._case.get_value("RUNDIR") + with SharedArea(): basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE")) - testcase = self._case.get_value("CASE") - - if not os.path.isdir(basegen_dir): - os.makedirs(basegen_dir) - - if os.path.isdir(os.path.join(basegen_dir, testcase)): - expect(False, " Cowardly refusing to overwrite existing baseline directory") - comments = "Generating baselines into '{}'\n".format(basegen_dir) - num_gen = 0 + rundir = self._case.get_value("RUNDIR") + ref_case = self._case.get_value("RUN_REFCASE") model = 'cam' - comments += " generating for model '{}'\n".format(model) - hists = _get_all_hist_files(testcase, model, rundir) - logger.debug("mvk_hist_files: {}".format(hists)) - - num_gen += len(hists) + hists = _get_all_hist_files(model, rundir, [r'h\d*.*\.nc'], ref_case=ref_case) + logger.debug("MVK additional baseline files: {}".format(hists)) for hist in hists: basename = hist[hist.rfind(model):] baseline = os.path.join(basegen_dir, basename) if os.path.exists(baseline): os.remove(baseline) - shutil.copy(hist, baseline) - comments += " generating baseline '{}' from file {}\n".format(baseline, hist) - - newestcpllogfile = self._case.get_latest_cpl_log(coupler_log_path=self._case.get_value("LOGDIR")) - if newestcpllogfile is None: - logger.warning("No cpl.log file found in log directory {}".format(self._case.get_value("LOGDIR"))) - else: - shutil.copyfile(newestcpllogfile, - os.path.join(basegen_dir, "cpl.log.gz")) - - expect(num_gen > 0, "Could not generate any hist files for case '{}', something is seriously wrong".format( - os.path.join(rundir, testcase))) - # make sure permissions are open in baseline directory - for root, _, files in os.walk(basegen_dir): - for name in files: - try: - os.chmod(os.path.join(root, name), - stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH) - except OSError: - # We tried. Not worth hard failure here. - pass - - if get_model() == "e3sm": - bless_log = os.path.join(basegen_dir, BLESS_LOG_NAME) - with open(bless_log, "a") as fd: - fd.write("sha:{} date:{}\n".format(get_current_commit(repo=self._case.get_value("CIMEROOT")), - get_timestamp(timestamp_format="%Y-%m-%d_%H:%M:%S"))) - # END: modified CIME.hist_utils.generate_baseline - - append_testlog(comments) - status = CIME.test_status.TEST_PASS_STATUS - baseline_name = self._case.get_value("BASEGEN_CASE") - self._test_status.set_status("{}".format(CIME.test_status.GENERATE_PHASE), status, - comments=os.path.dirname(baseline_name)) - basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE")) - # copy latest cpl log to baseline - # drop the date so that the name is generic - newestcpllogfiles = self._get_latest_cpl_logs() - for cpllog in newestcpllogfiles: - m = re.search(r"/(cpl.*.log).*.gz", cpllog) - if m is not None: - baselog = os.path.join(basegen_dir, m.group(1)) + ".gz" - shutil.copyfile(cpllog, - os.path.join(basegen_dir, baselog)) - + safe_copy(hist, baseline, preserve_meta=False) def _compare_baseline(self): with self._test_status: @@ -168,25 +97,28 @@ def _compare_baseline(self): # This is here because the comparison is run for each submission # and we only want to compare once the whole run is finished. We # need to return a pass here to continue the submission process. - self._test_status.set_status(CIME.test_status.BASELINE_PHASE, CIME.test_status.TEST_PASS_STATUS) + self._test_status.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_PASS_STATUS) return - self._test_status.set_status(CIME.test_status.BASELINE_PHASE, CIME.test_status.TEST_FAIL_STATUS) + self._test_status.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_FAIL_STATUS) run_dir = self._case.get_value("RUNDIR") case_name = self._case.get_value("CASE") - basecmp_case = self._case.get_value("BASECMP_CASE") - base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), basecmp_case) + base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), + self._case.get_value("BASECMP_CASE")) test_name = "{}".format(case_name.split('.')[-1]) evv_config = { test_name: { "module": os.path.join(evv_lib_dir, "extensions", "ks.py"), - "case1": "Test", - "dir1": run_dir, - "case2": "Baseline", - "dir2": base_dir, - "ninst": ninst, + "test-case": "Test", + "test-dir": run_dir, + "ref-case": "Baseline", + "ref-dir": base_dir, + "var-set": "default", + "ninst": NINST, "critical": 13 } } @@ -195,16 +127,16 @@ def _compare_baseline(self): with open(json_file, 'w') as config_file: json.dump(evv_config, config_file, indent=4) - evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'eve'])) + evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv'])) evv(['-e', json_file, '-o', evv_out_dir]) - with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f: + with open(os.path.join(evv_out_dir, 'index.json')) as evv_f: evv_status = json.load(evv_f) for evv_elem in evv_status['Data']['Elements']: - if evv_elem['Type'] == 'ValSummary': - if evv_elem['TableTitle'] == 'Kolmogorov-Smirnov': - if evv_elem['Data'][test_name]['']['Ensembles'] == 'identical': - self._test_status.set_status(CIME.test_status.BASELINE_PHASE, - CIME.test_status.TEST_PASS_STATUS) - break + if evv_elem['Type'] == 'ValSummary' \ + and evv_elem['TableTitle'] == 'Kolmogorov-Smirnov test': + if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass': + self._test_status.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_PASS_STATUS) + break diff --git a/cime/scripts/lib/CIME/SystemTests/pgn.py b/cime/scripts/lib/CIME/SystemTests/pgn.py index 9bd074548726..026c7a6e599b 100644 --- a/cime/scripts/lib/CIME/SystemTests/pgn.py +++ b/cime/scripts/lib/CIME/SystemTests/pgn.py @@ -7,76 +7,42 @@ """ +from __future__ import division + import os -import glob +import re +import json import shutil -import numpy as np -import scipy.stats as stats +import logging +import pandas as pd +import numpy as np +from collections import OrderedDict -from netCDF4 import Dataset -from math import sqrt -from sklearn.metrics import mean_squared_error - -from CIME.test_status import * -from CIME.XML.standard_module_setup import * +import CIME.test_status from CIME.SystemTests.system_tests_common import SystemTestsCommon from CIME.case.case_setup import case_setup -from CIME.build import post_build -from CIME.hist_utils import rename_all_hist_files from CIME.utils import expect -#Logic for PGN ensemble runs: -# We need two inputs: -# A. Number of inic cond files -# B. perturbations (e.g. currently we have: without prt, pos prt and neg prt) +import evv4esm # pylint: disable=import-error +from evv4esm.extensions import pg # pylint: disable=import-error +from evv4esm.__main__ import main as evv # pylint: disable=import-error -# Based off the above, we compute number of instances to have (A * B) -# Build phase : Change the user_nl* files to add perturbations and other flags -# Run pahse : Rename history files -# Baselines generation phase: Compute cloud, store in netcdf file, copy netcdf file to baseline folder -# Baseline Comparison phase : Compare against baseline cloud to know pass/fail, and plot (optional) - - -#TODO: -#1. The loop to change user_nl* files is run twice as we call build again after changing ninst, which changes ntasks -#2. Do we want to remove user_nl_cam, user_nl_clm etc. files as they are not used in the simulation? -#3. change code so that pergro_ptend_names.txt is not generated if it is already there or only one instance writes this file... -#4. Plot generation is very basic at this point(no lables, legends etc.), improve it! -#5. Decision making about PASS/FAIL should have multiple criteria +evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__)) logger = logging.getLogger(__name__) -#-------------------------------------------------------- -#Variables which needs global scope for various functions -#-------------------------------------------------------- - -#number of inital conditions -NINIT_COND = 6 #12 - -#perturbations for runs -PRT = [0.0, 1.0e-14, -1.0e-14] -PRTSTR = ['woprt','posprt','negprt'] +NUMBER_INITIAL_CONDITIONS = 6 +PERTURBATIONS = OrderedDict([('woprt', 0.0), + ('posprt', 1.0e-14), + ('negprt', -1.0e-14), + ]) +FCLD_NC = 'cam.h0.cloud.nc' +INIT_COND_FILE_TEMPLATE = \ + "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.{}.{}.0002-{:02d}-01-00000.nc" +# FIXME: should 'cam' be 'atm' now? +INSTANCE_FILE_TEMPLATE = '{}cam_{:04d}.h0.0001-01-01-00000{}.nc' -#file name for file containing PGE cloud -FCLD_NC = 'cam.h0.cloud.nc' - -#For preparing paths for namelist files for initial condition files -FILE_PREF_ATM = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.cam.i.0002-" -FILE_PREF_LND = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.clm2.r.0002-" - -FILE_SUF_ATM = "-01-00000.nc" -FILE_SUF_LND = "-01-00000.nc" - -#------------------------------------------------------------ -# Some flags for debugging or invoking extra features -#------------------------------------------------------------ - -#DEBUGGING: prints out max rmse diffs if set to True -INDEX = False - -# generate a plot -DOPLOT = False #It impacts performance! class PGN(SystemTestsCommon): @@ -84,123 +50,52 @@ def __init__(self, case): """ initialize an object interface to the PGN test """ + super(PGN, self).__init__(case) - #perturbation values and number of perturbation strings should be same - expect(len(PRT)== len(PRTSTR),"Number of perturbation values ("+str(len(PRT))+") are NOT " - "equal to number of perturbation strings("+ str(len(PRTSTR))+")") - SystemTestsCommon.__init__(self, case) - - #================================================================= - # Compile model with multiple instances - #================================================================= def build_phase(self, sharedlib_only=False, model_only=False): - - #------------------------------------------------------ - #Compute number of instances: - #------------------------------------------------------ - - #------------------------------------------------------ - #Number of instances: - #~~~~~~~~~~~~~~~~~~~ - #Comput it from number of initial conditions to use and - #number of perturbation ensemble members to use - #------------------------------------------------------ - nprt = len(PRT) - ninst = NINIT_COND * nprt - + ninst = NUMBER_INITIAL_CONDITIONS * len(PERTURBATIONS) logger.debug('PGN_INFO: number of instance: '+str(ninst)) - - #------------------------------------------------------ - #Fake Build: - #~~~~~~~~~~ - #(for debugging only) Building the model can take - #significant amount of time. Setting fake_bld to True - #can savethat time - #------------------------------------------------------ - fake_bld = False - - #Find number of instance in the default setup + default_ninst = self._case.get_value("NINST_ATM") - - #Sanity check: see if NINST is same for all model components, otherwise exit with error - for comp in ['OCN','WAV','GLC','ICE','ROF','LND']: - iinst = self._case.get_value("NINST_%s"%comp) - expect(default_ninst == iinst, "ERROR: component "+str(comp)+" NINST("+str(iinst)+")" - " is different from component ATM NINST("+str(default_ninst)+")") - - #------------------------------------------------------ - #Setup multi-instances for model components: - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #Set the model for multi-instance ONLY if NINST == 1 - #for all model components. This is because, for - #NINST > 1 (e.g. rebuilding an old case) the following - #loop will increase the ntasks to a multiple of ninst - #(requiring a clean build again). We hit this issue if - #we launch ./case.build in the case directory of PGN - #test - #------------------------------------------------------ - - if(default_ninst == 1): #if multi-instance is not already set + + if default_ninst == 1: # if multi-instance is not already set # Only want this to happen once. It will impact the sharedlib build # so it has to happen here. if not model_only: # Lay all of the components out concurrently - logger.debug("PGN_INFO: Updating NINST for multi-instance in env_mach_pes.xml") - for comp in ['ATM','OCN','WAV','GLC','ICE','ROF','LND']: - ntasks = self._case.get_value("NTASKS_%s"%comp) - self._case.set_value("ROOTPE_%s"%comp, 0) - self._case.set_value("NINST_%s"%comp, ninst) - self._case.set_value("NTASKS_%s"%comp, ntasks*ninst) - - self._case.set_value("ROOTPE_CPL",0) - self._case.set_value("NTASKS_CPL",ntasks*ninst) + logger.debug("PGN_INFO: Updating NINST for multi-instance in " + "env_mach_pes.xml") + for comp in ['ATM', 'OCN', 'WAV', 'GLC', 'ICE', 'ROF', 'LND']: + ntasks = self._case.get_value("NTASKS_{}".format(comp)) + self._case.set_value("ROOTPE_{}".format(comp), 0) + self._case.set_value("NINST_{}".format(comp), ninst) + self._case.set_value("NTASKS_{}".format(comp), ntasks*ninst) + + self._case.set_value("ROOTPE_CPL", 0) + self._case.set_value("NTASKS_CPL", ntasks*ninst) self._case.flush() case_setup(self._case, test_mode=False, reset=True) - #Faking a bld can save the time code spend in building the model components - if fake_bld: - logger.debug("PGN_INFO: FAKE Build") - if (not sharedlib_only): - post_build(self._case, []) - else: - # Build exectuable with multiple instances - self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) - - - #---------------------------------------------------------------- - # Namelist settings: - #~~~~~~~~~~~~~~~~~~ - # Do this already in build_phase so that we can check the xml and - # namelist files before job starts running. - #---------------------------------------------------------------- + self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) + logger.debug("PGN_INFO: Updating user_nl_* files") csmdata_root = self._case.get_value("DIN_LOC_ROOT") - csmdata_atm = csmdata_root+"/atm/cam/inic/homme/ne4_v1_init/" - csmdata_lnd = csmdata_root+"/lnd/clm2/initdata/ne4_v1_init/b58d55680/" + csmdata_atm = os.path.join(csmdata_root, "atm/cam/inic/homme/ne4_v1_init") + csmdata_lnd = os.path.join(csmdata_root, "lnd/clm2/initdata/ne4_v1_init/b58d55680") iinst = 1 - for icond in range(NINIT_COND): - icond_label_2digits = str(icond+1).zfill(2) - fatm_in = FILE_PREF_ATM + icond_label_2digits + FILE_SUF_ATM - flnd_in = FILE_PREF_LND + icond_label_2digits + FILE_SUF_LND - for iprt in PRT: - with open('user_nl_cam_'+str(iinst).zfill(4), 'w') as atmnlfile, \ - open('user_nl_clm_'+str(iinst).zfill(4), 'w') as lndnlfile: - - #atm/lnd intitial conditions - - #initial condition files to use for atm and land - #atmnlfile.write("ncdata = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + fatm_in+"' \n") - #lndnlfile.write("finidat = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + flnd_in+"' \n") - - #uncomment the following when there files are on SVN server - atmnlfile.write("ncdata = '"+ csmdata_atm + "/" + fatm_in+"' \n") - lndnlfile.write("finidat = '"+ csmdata_lnd + "/" + flnd_in+"' \n") - - - #atm model output + for icond in range(1, NUMBER_INITIAL_CONDITIONS + 1): + fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('cam', 'i', icond)) + flnd_in = os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', icond)) + for iprt in PERTURBATIONS.values(): + with open('user_nl_cam_{:04d}'.format(iinst), 'w') as atmnlfile, \ + open('user_nl_clm_{:04d}'.format(iinst), 'w') as lndnlfile: + + atmnlfile.write("ncdata = '{}' \n".format(fatm_in)) + lndnlfile.write("finidat = '{}' \n".format(flnd_in)) + atmnlfile.write("avgflag_pertape = 'I' \n") atmnlfile.write("nhtfrq = 1 \n") atmnlfile.write("mfilt = 2 \n") @@ -208,416 +103,146 @@ def build_phase(self, sharedlib_only=False, model_only=False): atmnlfile.write("pergro_mods = .true. \n") atmnlfile.write("pergro_test_active = .true. \n") - #atmnlfile.write("empty_htapes = .true. \n") - #atmnlfile.write("fincl1 = 'PS','U','V','T','Q','CLDLIQ','CLDICE','NUMLIQ','NUMICE','num_a1','num_a2','num_a3','LANDFRAC' \n") - #atmnlfile.write("phys_debug_lat = 41.3495891345") - #atmnlfile.write("phys_debug_lon = 45.0" ) - - if(iprt != 0.0): - atmnlfile.write("pertlim = "+str(iprt)+" \n") - - iinst += 1 - - #-------------------------------- - #Settings common to all instances - #-------------------------------- + if iprt != 0.0: + atmnlfile.write("pertlim = {} \n".format(iprt)) - #Coupler settings which are applicable to ALL the instances (as there is only one coupler for all instances) - self._case.set_value("STOP_N", "1") - self._case.set_value("STOP_OPTION","nsteps") - - - #=========================================================== - # Some user defined functions to avoid repeated calculations - #=========================================================== + iinst += 1 - def get_fname_wo_ext(self,rundir,casename,iinst): - """ - construct file name given the input - """ - #form file name withOUT extension - tstmp = '00000' - date_str = '.h0.0001-01-01-' - cam_str = 'cam_' - if(casename != "" ): - cam_str = '.'+cam_str - return os.path.join(rundir,casename+cam_str+str(iinst).zfill(4)+date_str+tstmp) + self._case.set_value("STOP_N", "1") + self._case.set_value("STOP_OPTION", "nsteps") def get_var_list(self): """ Get variable list for pergro specific output vars """ - rundir = self._case.get_value("RUNDIR") - casename = self._case.get_value("CASE") + rundir = self._case.get_value("RUNDIR") prg_fname = 'pergro_ptend_names.txt' - var_file = os.path.join(rundir,prg_fname) - expect(os.path.isfile(var_file),"File "+prg_fname+" does not exist in: "+rundir) + var_file = os.path.join(rundir, prg_fname) + expect(os.path.isfile(var_file), + "File {} does not exist in: {}".format(prg_fname, rundir)) with open(var_file, 'r') as fvar: var_list = fvar.readlines() - - return map(str.strip,var_list) - - def nc_write_handle(self,fname_nc,rmse_nc_var): - """ - Opens and write netcdf file for PGE curves - This function is here purely to avoid duplicate - codes so that it is easy to maintain code longterm - """ - - fhndl = Dataset(fname_nc,'w', format='NETCDF4') - - var_list = self.get_var_list() - len_var_list = len(var_list) - nprt = len(PRT) - - #create dims - fhndl.createDimension('ninit', NINIT_COND) - fhndl.createDimension('nprt', nprt) - fhndl.createDimension('nprt_m1', nprt-1) - fhndl.createDimension('nvars', len_var_list) - - - #create variables in the file - init_cond_nc = fhndl.createVariable('init_cond_fnames', 'S100', 'ninit') - prt_nc = fhndl.createVariable('perturb_strings', 'S10', 'nprt') - variables_nc = fhndl.createVariable('perturb_vnames', 'S20', 'nvars') - rmse_nc = fhndl.createVariable(rmse_nc_var, 'f8', ('ninit', 'nprt_m1', 'nvars')) - - - #assign variables for writing to the netcdf file - for iprt in range(nprt): - prt_nc[iprt] = PRTSTR[iprt] - - for ivar in range(len_var_list): - variables_nc[ivar] = var_list[ivar] - for icond in range(NINIT_COND): - icond_label_2digits = str(icond+1).zfill(2) - init_cond_nc[icond] = FILE_PREF_ATM + icond_label_2digits + FILE_SUF_ATM - - return fhndl, rmse_nc - - def rmse_var(self,ifile_test, ifile_cntl, var_list, var_suffix ): - """ - Compute RMSE difference between ifile_test and ifile_cntl for - variables listed in var_list - """ - - #------------------ARGS------------------------- - #ifile_test: path of test file - #ifile_cntl: path of control file - #var_list : List of all variables - #var_suffix: Suffix for var_list (e.g. t_, t_ qv_ etc.) - #----------------------------------------------- - - # See if the files exists or not.... - expect(os.path.isfile(ifile_test), "ERROR: Test file "+ifile_test+" does not exist") - expect(os.path.isfile(ifile_cntl), "ERROR: CNTL file "+ifile_cntl+" does not exist") - - ftest = Dataset(ifile_test, mode='r') - fcntl = Dataset(ifile_cntl, mode='r') - - #if max RMSE/DIFF is to be printed, extract lat lons from a file - if(INDEX): - lat = ftest.variables['lat'] - lon = ftest.variables['lon'] - - rmse = np.zeros(shape=(len(var_list))) - icntvar = 0 - - is_se = (len(ftest.variables[var_suffix+var_list[icntvar]].dimensions)==3) # see if it is SE grid - - for ivar in var_list: - var = var_suffix+ivar - if var in ftest.variables: - vtest = ftest.variables[var.strip()][0,...] # first dimention is time (=0) - vcntl = fcntl.variables[var.strip()][0,...] # first dimention is time (=0) - - #reshape for RMSE - if(is_se ): - nx, ny = vtest.shape #shape will be same for both arrays - nz = 1 - else: - nx, ny, nz = vtest.shape #shape will be same for both arrays - - rmse[icntvar] = sqrt(mean_squared_error(vtest.reshape((nx,ny*nz)), vcntl.reshape((nx,ny*nz)))) - - if(INDEX): - #NEED to work on formatting this.... - diff_arr = abs(vtest[...] - vcntl[...]) - max_diff = np.amax(diff_arr) - ind_max = np.unravel_index(diff_arr.argmax(),diff_arr.shape) - print(ind_max) - print_str = var+' '+str(max_diff)+' '+str(vtest[ind_max])+' ' +str(vcntl[ind_max])+' '+str(lat[ind_max[1]])+' '+ str(lon[ind_max[1]])+' '+str(ind_max[0]) - print("{}".format(print_str)) - #print('{0:15}{1:15}{2:15}\n'.format(name, a, b))#TRY THIS!! - - - #normalize by mean values of the field in the control case - rmse[icntvar] = rmse[icntvar]/np.mean(vcntl) - icntvar += 1 - vtest = None - vcntl = None - var = None - ftest.close() - fcntl.close() - return rmse + return list(map(str.strip, var_list)) def _compare_baseline(self): - - #import time - #t0 = time.time() """ Compare baselines in the pergro test sense. That is, compare PGE from the test simulation with the baseline cloud """ - logger.debug("PGN_INFO:BASELINE COMPARISON STARTS") - - rundir = self._case.get_value("RUNDIR") - casename = self._case.get_value("CASE") + with self._test_status: + self._test_status.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_FAIL_STATUS) + + logger.debug("PGN_INFO:BASELINE COMPARISON STARTS") + + run_dir = self._case.get_value("RUNDIR") + case_name = self._case.get_value("CASE") + base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), + self._case.get_value("BASECMP_CASE")) + + var_list = self.get_var_list() + + test_name = "{}".format(case_name.split('.')[-1]) + evv_config = { + test_name: { + "module": os.path.join(evv_lib_dir, "extensions", "pg.py"), + "test-case": case_name, + "test-name": "Test", + "test-dir": run_dir, + "ref-name": "Baseline", + "ref-dir": base_dir, + "variables": var_list, + "perturbations": PERTURBATIONS, + "pge-cld": FCLD_NC, + "ninit": NUMBER_INITIAL_CONDITIONS, + "init-file-template": INIT_COND_FILE_TEMPLATE, + "instance-file-template": INSTANCE_FILE_TEMPLATE, + } + } + + json_file = os.path.join(run_dir, '.'.join([case_name, 'json'])) + with open(json_file, 'w') as config_file: + json.dump(evv_config, config_file, indent=4) + + evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv'])) + evv(['-e', json_file, '-o', evv_out_dir]) + + with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f: + evv_status = json.load(evv_f) + + for evv_elem in evv_status['Data']['Elements']: + if evv_elem['Type'] == 'ValSummary' \ + and evv_elem['TableTitle'] == 'Perturbation growth test': + if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass': + self._test_status.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_PASS_STATUS) + break - var_list = self.get_var_list() - len_var_list = len(var_list) - nprt = len(PRT) - - #baseline directory names - base_root = self._case.get_value("BASELINE_ROOT") - base_comp = self._case.get_value("BASECMP_CASE") - - #baseline directory is:base_root/base_comp - base_dir = os.path.join(base_root,base_comp) - - #for test cases (new environment etc.) - logger.debug("PGN_INFO: Test case comparison...") - - #--------------------------------------------- - # Write netcdf file for comparison - #--------------------------------------------- - fcomp_nc = 'comp_cld.nc' - fcomp_cld, comp_rmse_nc = self.nc_write_handle(fcomp_nc,'comp_rmse') - - iinst = 0 - for icond in range(NINIT_COND): - iinst += 1; iprt = 0 - ifile_cntl = os.path.join(base_dir,self.get_fname_wo_ext('','',iinst)+'_woprt.nc') - expect(os.path.isfile(ifile_cntl), "ERROR: File "+ifile_cntl+" does not exist") - logger.debug("PGN_INFO:CNTL_TST:"+ifile_cntl) - - for aprt in PRTSTR[1:]: - iinst += 1; - ifile_test = os.path.join(rundir,self.get_fname_wo_ext(rundir,casename,iinst)+'_'+aprt+'.nc') - expect(os.path.isfile(ifile_test), "ERROR: File "+ifile_test+" does not exist") - comp_rmse_nc[icond,iprt,0:len_var_list] = self.rmse_var(ifile_test, ifile_cntl, var_list, 't_' ) - logger.debug("PGN_INFO:Compared to TEST_TST:"+ifile_test) - - iprt += 1 - - #-------------------------------------------- - #Student's t-test - #------------------------------------------- - - #Extract last element of each PGE curve of the cloud - path_cld_nc = os.path.join(base_dir,FCLD_NC) - expect(os.path.isfile(path_cld_nc), "ERROR: "+FCLD_NC+" does not exist at:"+path_cld_nc) - - fcld = Dataset(path_cld_nc,'r', format='NETCDF4') - - #Get dimentions of cloud PGE curves - ninic_cld = fcld.variables['cld_rmse'].shape[0] - nprt_cld = fcld.variables['cld_rmse'].shape[1] - nvar_cld = fcld.variables['cld_rmse'].shape[2] - - expect(ninic_cld == NINIT_COND, "BASELINE COMPARISON: Number of initial conditions should be same:" \ - "inic cld:"+str(ninic_cld)+" inic comp:"+str(NINIT_COND)) - - expect(nprt_cld == nprt-1, "BASELINE COMPARISON: Number of perturbations should be same:" \ - "prt cld:"+str(nprt_cld)+" prt comp:"+str(nprt - 1)) - - expect(nvar_cld == len_var_list, "BASELINE COMPARISON: Number of phys update variables should be same:" \ - "nvar cld:"+str(nvar_cld)+" nvar comp:"+str(len_var_list)) - - - pgecld = fcld.variables['cld_rmse'] - - if (DOPLOT): - import pylab as pl - #generate plot - ax = pl.gca() - for icond in range(NINIT_COND): - for iprt in range(nprt-1): - ax.semilogy(pgecld[icond,iprt,:],color='b') - ax.semilogy(comp_rmse_nc[icond,iprt,:],color='r') - pl.savefig('plot_comp.png') - - pge_ends_cld = fcld.variables['cld_rmse'][:,:,len_var_list-1] - pge_ends_comp = comp_rmse_nc[:,0:nprt-1,len_var_list-1] - - #run the t-test - pge_ends_cld = pge_ends_cld.flatten() - pge_ends_comp = pge_ends_comp.flatten() - - t_stat, p_val = stats.ttest_ind(pge_ends_cld, pge_ends_comp) - - t_stat = abs(t_stat) #only value of t_stat matters, not the sign - - print("PGN_INFO: T value:"+str(t_stat)) - print("PGN_INFO: P value:"+str(p_val)) - - logger.warn(" T value:"+str(t_stat)) - logger.warn(" P value:"+str(p_val)) - - - #There should be multiple criteria to determin pass/fail - #This part of decision making should be more polished - - if (t_stat > 3): - with self._test_status: - self._test_status.set_status(BASELINE_PHASE, TEST_FAIL_STATUS) - print('PGN_INFO:TEST FAIL') - else: - with self._test_status: - self._test_status.set_status(BASELINE_PHASE, TEST_PASS_STATUS) - print('PGN_INFO:TEST PASS') - - #Close this file after you access "comp_rmse_nc" variable - fcomp_cld.close() - logger.debug("PGN_INFO: POST PROCESSING PHASE ENDS") - - #t1 = time.time() - #print('time: '+str(t1-t0)) - #================================================================= - # run_phase - #================================================================= def run_phase(self): logger.debug("PGN_INFO: RUN PHASE") - + self.run_indv() - #cwd = os.getcwd() + # Here were are in case directory, we need to go to the run directory + # and rename files + rundir = self._case.get_value("RUNDIR") + casename = self._case.get_value("CASE") + logger.debug("PGN_INFO: Case name is:{}".format(casename)) - #Here were are in case directory, we need to go to the run directory and rename files - rundir = self._case.get_value("RUNDIR") - casename = self._case.get_value("CASE") - logger.debug("PGN_INFO: Case name is:"+casename ) + for icond in range(NUMBER_INITIAL_CONDITIONS): + for iprt, (prt_name, prt_value) in enumerate(PERTURBATIONS.items()): + iinst = pg._sub2instance(icond, iprt, len(PERTURBATIONS)) + fname = os.path.join(rundir, INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst, '')) + renamed_fname = re.sub(r'\.nc$', '_{}.nc'.format(prt_name), fname) - iinst = 1 - for icond in range(NINIT_COND): - for iprt in range(len(PRT)): - - #------------------------------------------------------ - #SANITY CHECK - To confirm that history file extension - #~~~~~~~~~~~~ - # corresponds to the right perturbation - #------------------------------------------------------ - #find corresponding atm_in_* - fatm_in = os.path.join(rundir,'atm_in_'+str(iinst).zfill(4)) - #see if atm_in_* file has pertlim same as PRT[iprt] (sanity check) - found = False - prtval = 0.0 - with open(fatm_in) as atmfile: - for line in atmfile: - if(line.find('pertlim') > 0): - found = True - prtval = float(line.split('=')[1]) - expect(prtval == PRT[iprt], "ERROR: prtval doesn't match, prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt])) - logger.debug("PGN_INFO:prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt])) - - if(not found): - expect(prtval == PRT[iprt], "ERROR: default prtval doesn't match, prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt])) - logger.debug("PGN_INFO:def prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt])) - - #--------------------------------------------------------- - #Rename file - #--------------------------------------------------------- - - #form file name - fname = self.get_fname_wo_ext(rundir,casename,iinst) #NOTE:without extension - logger.debug("PGN_INFO: fname to rename:"+fname ) - - renamed_fname = fname +'_'+PRTSTR[iprt]+'.nc' - fname_w_ext = fname + '.nc' #add extention - - #see if file exists - if (os.path.isfile(fname_w_ext)): - #rename file - shutil.move(fname_w_ext,renamed_fname) - logger.debug("PGN_INFO: Renamed file:"+renamed_fname) - else: - expect(os.path.isfile(renamed_fname), "ERROR: File "+renamed_fname+" does not exist") - logger.debug("PGN_INFO: Renamed file already exists:"+renamed_fname) - - iinst += 1 - - #cloud generation - - logger.debug("PGN_INFO: cloud generation-gen base") - - var_list = self.get_var_list() - len_var_list = len(var_list) - nprt = len(PRT) - - #for trusted cloud sims - logger.debug("PGN_INFO: Computing cloud") - - cld_res = np.empty([len_var_list]) - - #--------------------------------------------- - # Write netcdf file for cloud in rundir - #--------------------------------------------- - os.chdir(rundir) - fcld, cld_rmse_nc = self.nc_write_handle(FCLD_NC,'cld_rmse') - - iinst = 0 - for icond in range(NINIT_COND): - iinst += 1; iprt = 0 - ifile_cntl = os.path.join(rundir,self.get_fname_wo_ext('',casename,iinst)+'_'+PRTSTR[iprt]+'.nc') - expect(os.path.isfile(ifile_cntl), "ERROR: File "+ifile_cntl+" does not exist") - logger.debug("PGN_INFO:CNTL_CLD:"+ifile_cntl) - - for aprt in PRTSTR[1:]: - iinst += 1; - iprt = PRTSTR.index(aprt) - 1 # subtracted 1 as we will only get two curves for each inic (wo-pos and wo-neg) - ifile_test = os.path.join(rundir,self.get_fname_wo_ext('',casename,iinst)+'_'+aprt+'.nc') - expect(os.path.isfile(ifile_test), "ERROR: File "+ifile_test+" does not exist") - cld_rmse_nc[icond,iprt,0:len_var_list] = self.rmse_var(ifile_test, ifile_cntl, var_list, 't_' ) - logger.debug("PGN_INFO:Compared to CLD:"+ifile_test) - - fcld.close() - - if(self._case.get_value("GENERATE_BASELINE")): - - #baseline directory names - base_root = self._case.get_value("BASELINE_ROOT") - base_gen = self._case.get_value("BASEGEN_CASE") - - #baseline directory is:base_root/base_gen - base_dir = os.path.join(base_root,base_gen) - - #first copy files to the baseline directory - self._generate_baseline()#BALLI-CHECK IF THIS IS OKAY - - #copy cloud.nc file to baseline directory - logger.debug("PGN_INFO:copy:"+FCLD_NC+" to "+base_dir) - shutil.copy(FCLD_NC,base_dir) - - logger.debug("PGN_INFO: RUN PHASE ENDS" ) - - -#===================================================== -#Debugging: -#===================================================== - -#----------------------------------------------------- -#DEBUG type 1 (DB1): Ensure that model produces BFB instances -#----------------------------------------------------- -##Replace the following lines in the script with the following updated values -#PRT = [0.0, 0.0] #DB1 -#PRTSTR = ['woprt','posprt'] - -## Comment out all namelist changes so that all instances have same namelist values -## For testing effect of namelist changes, uncomment namlist changes such that -## namelists are same for all instances - -## Comment out sanity checks as well if needed.... + logger.debug("PGN_INFO: fname to rename:{}".format(fname)) + logger.debug("PGN_INFO: Renamed file:{}".format(renamed_fname)) + try: + shutil.move(fname, renamed_fname) + except IOError: + expect(os.path.isfile(renamed_fname), + "ERROR: File {} does not exist".format(renamed_fname)) + logger.debug("PGN_INFO: Renamed file already exists:" + "{}".format(renamed_fname)) + + logger.debug("PGN_INFO: RUN PHASE ENDS") + + def _generate_baseline(self): + super(PGN, self)._generate_baseline() + + basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), + self._case.get_value("BASEGEN_CASE")) + + rundir = self._case.get_value("RUNDIR") + casename = self._case.get_value("CASE") + + var_list = self.get_var_list() + nvar = len(var_list) + nprt = len(PERTURBATIONS) + rmse_prototype = {} + for icond in range(NUMBER_INITIAL_CONDITIONS): + prt_rmse = {} + for iprt, prt_name in enumerate(PERTURBATIONS): + if prt_name == 'woprt': + continue + iinst_ctrl = pg._sub2instance(icond, 0, nprt) + ifile_ctrl = os.path.join(rundir, + INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst_ctrl, '_woprt')) + + iinst_test = pg._sub2instance(icond, iprt, nprt) + ifile_test = os.path.join(rundir, + INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst_test, '_' + prt_name)) + + prt_rmse[prt_name] = pg.variables_rmse(ifile_test, ifile_ctrl, var_list, 't_') + rmse_prototype[icond] = pd.concat(prt_rmse) + rmse = pd.concat(rmse_prototype) + cld_rmse = np.reshape(rmse.RMSE.values, (NUMBER_INITIAL_CONDITIONS, nprt - 1, nvar)) + + pg.rmse_writer(os.path.join(rundir, FCLD_NC), + cld_rmse, list(PERTURBATIONS.keys()), var_list, INIT_COND_FILE_TEMPLATE) + + logger.debug("PGN_INFO:copy:{} to {}".format(FCLD_NC, basegen_dir)) + shutil.copy(os.path.join(rundir, FCLD_NC), basegen_dir) diff --git a/cime/scripts/lib/CIME/SystemTests/tsc.py b/cime/scripts/lib/CIME/SystemTests/tsc.py index 90be18488204..86e50f7eb26c 100644 --- a/cime/scripts/lib/CIME/SystemTests/tsc.py +++ b/cime/scripts/lib/CIME/SystemTests/tsc.py @@ -7,155 +7,175 @@ This class inherits from SystemTestsCommon. """ -from CIME.XML.standard_module_setup import * +import os +import json +import logging + +import CIME.test_status from CIME.SystemTests.system_tests_common import SystemTestsCommon from CIME.case.case_setup import case_setup -from CIME.build import post_build -from CIME.hist_utils import rename_all_hist_files -from CIME.test_status import * -#import CIME.utils -#from CIME.check_lockedfiles import * +from CIME.hist_utils import rename_all_hist_files, _get_all_hist_files +from CIME.utils import safe_copy, SharedArea + +import evv4esm # pylint: disable=import-error +from evv4esm.__main__ import main as evv # pylint: disable=import-error + +evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__)) logger = logging.getLogger(__name__) -class TSC(SystemTestsCommon): +NINST = 12 +SIM_LENGTH = 600 # seconds +OUT_FREQ = 10 # seconds +INSPECT_AT = [300, 450, 600] # seconds +INIT_COND_FILE_TEMPLATE = \ + "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.{}.{}.0002-{:02d}-01-00000.nc" +VAR_LIST = ["T", "Q", "V", "CLDLIQ", "CLDICE", "NUMLIQ", "NUMICE", "num_a1", "num_a2", "num_a3"] +P_THRESHOLD = 0.005 + + +class TSC(SystemTestsCommon): def __init__(self, case): """ initialize an object interface to the TSC test """ - SystemTestsCommon.__init__(self, case) + super(TSC, self).__init__(case) - #================================================================= - # Compile model with multiple instances - #================================================================= def build_phase(self, sharedlib_only=False, model_only=False): - - ninst = 12 - - #BSINGH: For debugging only - Building the model can take - #significant amount of time. Setting fake_bld to True can save - #that time - fake_bld = False - - # Build exectuable with multiple instances - # (in the development phase we use 3) - # Lay all of the components out concurrently - # Only want this to happen once. It will impact the sharedlib build # so it has to happen there. if not model_only: logging.warning("Starting to build multi-instance exe") - for comp in ['ATM','OCN','WAV','GLC','ICE','ROF','LND']: - ntasks = self._case.get_value("NTASKS_%s"%comp) - self._case.set_value("ROOTPE_%s"%comp, 0) - self._case.set_value("NINST_%s"%comp, ninst) - self._case.set_value("NTASKS_%s"%comp, ntasks*ninst) - - self._case.set_value("ROOTPE_CPL",0) - self._case.set_value("NTASKS_CPL",ntasks*ninst) + for comp in ['ATM', 'OCN', 'WAV', 'GLC', 'ICE', 'ROF', 'LND']: + ntasks = self._case.get_value("NTASKS_{}".format(comp)) + self._case.set_value("ROOTPE_{}".format(comp), 0) + self._case.set_value("NINST_{}".format(comp), NINST) + self._case.set_value("NTASKS_{}".format(comp), ntasks * NINST) + + self._case.set_value("ROOTPE_CPL", 0) + self._case.set_value("NTASKS_CPL", ntasks * NINST) self._case.flush() case_setup(self._case, test_mode=False, reset=True) - #BSINGH: Faking a bld can save the time code spend in building the model components - if fake_bld: - if (not sharedlib_only): - post_build(self._case, []) - else: - self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) - - #================================================================= - # Conduct one multi-instance run with specified time step size. - # The complete run_phase (defined below) will contain two calls - # of this function using different dtime. - #================================================================= - def _run_with_specified_dtime(self,dtime=2): - - # dtime is model time step size in seconds. Default is set to 2. + self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only) - # Change the coupling frequency accordingly - ncpl = 86400/dtime - self._case.set_value("ATM_NCPL",ncpl) + def _run_with_specified_dtime(self, dtime=2): + """ + Conduct one multi-instance run with a specified time step size. - # Simulation length = 10 minutes - nsteps = 600/dtime - self._case.set_value("STOP_N", nsteps) - self._case.set_value("STOP_OPTION","nsteps") + :param dtime (int): Specified time step size in seconds + """ + coupling_frequency = 86400 // dtime + self._case.set_value("ATM_NCPL", str(coupling_frequency)) - # Write output every 10 seconds - nhtfrq = 10/dtime + nsteps = SIM_LENGTH // dtime + self._case.set_value("STOP_N", str(nsteps)) + self._case.set_value("STOP_OPTION", "nsteps") - # generate paths/file names for initial conditons csmdata_root = self._case.get_value("DIN_LOC_ROOT") - csmdata_atm = csmdata_root+"/atm/cam/inic/homme/ne4_v1_init/" - csmdata_lnd = csmdata_root+"/lnd/clm2/initdata/ne4_v1_init/b58d55680/" - file_pref_atm = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.cam.i.0002-" - file_pref_lnd = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.clm2.r.0002-" - - file_suf_atm = "-01-00000.nc" - file_suf_lnd = "-01-00000.nc" - - # user_nl_... files are modified for every instance. - # The number instances specified here has to be consistent with the - # value set in build_phase. - ninst = 12 - for iinst in range(1, ninst+1): + csmdata_atm = os.path.join(csmdata_root, "atm/cam/inic/homme/ne4_v1_init") + csmdata_lnd = os.path.join(csmdata_root, "lnd/clm2/initdata/ne4_v1_init/b58d55680") + nstep_output = OUT_FREQ // dtime + for iinst in range(1, NINST+1): with open('user_nl_cam_'+str(iinst).zfill(4), 'w') as atmnlfile, \ open('user_nl_clm_'+str(iinst).zfill(4), 'w') as lndnlfile: - # atm/lnd intitial conditions - - inst_label_2digits = str(iinst).zfill(2) - - atmnlfile.write("ncdata = '"+ csmdata_atm + "/" + file_pref_atm + inst_label_2digits + file_suf_atm+"' \n") - lndnlfile.write("finidat = '"+ csmdata_lnd + "/" + file_pref_lnd + inst_label_2digits + file_suf_lnd+"' \n") + fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('cam', 'i', iinst)) + flnd_in = os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', iinst)) + atmnlfile.write("ncdata = '{}' \n".format(fatm_in)) + lndnlfile.write("finidat = '{}' \n".format(flnd_in)) - # for initial testing on constance@pnnl - #atmnlfile.write("ncdata = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + file_pref_atm + inst_label_2digits + file_suf_atm+"' \n") - #lndnlfile.write("finidat = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + file_pref_lnd + inst_label_2digits + file_suf_lnd+"' \n") + lndnlfile.write("dtime = {} \n".format(dtime)) - # time step sizes - - atmnlfile.write("dtime = "+str(dtime)+" \n") + atmnlfile.write("dtime = {} \n".format(dtime)) atmnlfile.write("iradsw = 2 \n") atmnlfile.write("iradlw = 2 \n") - lndnlfile.write("dtime = "+str(dtime)+" \n") - - # atm model output - atmnlfile.write("avgflag_pertape = 'I' \n") - atmnlfile.write("nhtfrq = "+str(nhtfrq)+" \n") - atmnlfile.write("mfilt = 1 \n") - atmnlfile.write("ndens = 1 \n") + atmnlfile.write("nhtfrq = {} \n".format(nstep_output)) + atmnlfile.write("mfilt = 1 \n") + atmnlfile.write("ndens = 1 \n") atmnlfile.write("empty_htapes = .true. \n") - atmnlfile.write("fincl1 = 'PS','U','V','T','Q','CLDLIQ','CLDICE','NUMLIQ','NUMICE','num_a1','num_a2','num_a3','LANDFRAC' \n") + atmnlfile.write("fincl1 = 'PS','U','LANDFRAC',{} \n".format( + ''.join(["'{}',".format(s) for s in VAR_LIST])[:-1] + )) # Force rebuild namelists self._skip_pnl = False - # Namelist settings done. Now run the model. self.run_indv() - # Append "DT000x" to the history file names - rename_all_hist_files( self._case, suffix="DT"+str(dtime).zfill(4) ) + rename_all_hist_files(self._case, suffix="DT{:04d}".format(dtime)) - - #================================================================= - # run_phase - #================================================================= def run_phase(self): - - # Run simulations with 2-s time step self._run_with_specified_dtime(dtime=2) - # Run simulations with 1-s time step. This is NOT needed - # for testing new code or new computing environment. if self._case.get_value("GENERATE_BASELINE"): self._run_with_specified_dtime(dtime=1) + def _compare_baseline(self): - #currently faking it as PENDING with self._test_status as ts: - ts.set_status(BASELINE_PHASE, TEST_PEND_STATUS) + ts.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_FAIL_STATUS) + + run_dir = self._case.get_value("RUNDIR") + case_name = self._case.get_value("CASE") + base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), + self._case.get_value("BASECMP_CASE")) + + test_name = "{}".format(case_name.split('.')[-1]) + evv_config = { + test_name: { + "module": os.path.join(evv_lib_dir, "extensions", "tsc.py"), + "test-case": case_name, + "test-dir": run_dir, + "ref-case": "Baseline", + "ref-dir": base_dir, + "time-slice": [OUT_FREQ, SIM_LENGTH], + "inspect-times": INSPECT_AT, + "variables": VAR_LIST, + "p-threshold": P_THRESHOLD, + } + } + + json_file = os.path.join(run_dir, '.'.join([case_name, 'json'])) + with open(json_file, 'w') as config_file: + json.dump(evv_config, config_file, indent=4) + + evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv'])) + evv(['-e', json_file, '-o', evv_out_dir]) + + with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f: + evv_status = json.load(evv_f) + + for evv_elem in evv_status['Data']['Elements']: + if evv_elem['Type'] == 'ValSummary' \ + and evv_elem['TableTitle'] == 'Time step convergence test': + if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass': + self._test_status.set_status(CIME.test_status.BASELINE_PHASE, + CIME.test_status.TEST_PASS_STATUS) + break + + def _generate_baseline(self): + super(TSC, self)._generate_baseline() + + with SharedArea(): + basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), + self._case.get_value("BASEGEN_CASE")) + + rundir = self._case.get_value("RUNDIR") + ref_case = self._case.get_value("RUN_REFCASE") + + model = 'cam' + hists = _get_all_hist_files(model, rundir, [r'h\d*.*\.nc\.DT\d*'], ref_case=ref_case) + logger.debug("TSC additional baseline files: {}".format(hists)) + for hist in hists: + basename = hist[hist.rfind(model):] + baseline = os.path.join(basegen_dir, basename) + if os.path.exists(baseline): + os.remove(baseline) + + safe_copy(hist, baseline, preserve_meta=False) diff --git a/cime/scripts/lib/CIME/hist_utils.py b/cime/scripts/lib/CIME/hist_utils.py index 7f1c988497dd..f757477857d7 100644 --- a/cime/scripts/lib/CIME/hist_utils.py +++ b/cime/scripts/lib/CIME/hist_utils.py @@ -576,7 +576,7 @@ def _generate_baseline_impl(case, baseline_dir=None, allow_baseline_overwrite=Fa testname = case.get_value("TESTCASE") testopts = parse_test_name(case.get_value("CASEBASEID"))[1] testopts = [] if testopts is None else testopts - expect(num_gen > 0 or (testname == "PFS" or "B" in testopts), + expect(num_gen > 0 or (testname in ["PFS", "TSC"] or "B" in testopts), "Could not generate any hist files for case '{}', something is seriously wrong".format(os.path.join(rundir, testcase))) if get_model() == "e3sm":