From f92b89cbe071cb1752d36bb8a504f5bcf37a9380 Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Fri, 5 Apr 2019 09:41:59 -0400
Subject: [PATCH 1/8] Update MVK for E3SM atmosphere component directory name
 changes

---
 cime/scripts/lib/CIME/SystemTests/mvk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cime/scripts/lib/CIME/SystemTests/mvk.py b/cime/scripts/lib/CIME/SystemTests/mvk.py
index a215a61bce52..4d5fa27df9e3 100644
--- a/cime/scripts/lib/CIME/SystemTests/mvk.py
+++ b/cime/scripts/lib/CIME/SystemTests/mvk.py
@@ -104,7 +104,7 @@ def _generate_baseline(self):
             comments = "Generating baselines into '{}'\n".format(basegen_dir)
             num_gen = 0
 
-            model = 'cam'
+            model = 'atm'
             comments += "  generating for model '{}'\n".format(model)
             hists = _get_all_hist_files(testcase, model, rundir)
             logger.debug("mvk_hist_files: {}".format(hists))

From 53c4989711352259ae5ab49a83761b9b0f26600d Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Fri, 5 Apr 2019 11:13:15 -0400
Subject: [PATCH 2/8] PEP8 and string formatting updates for PNG test

---
 cime/scripts/lib/CIME/SystemTests/pgn.py | 737 ++++++++++++-----------
 1 file changed, 392 insertions(+), 345 deletions(-)

diff --git a/cime/scripts/lib/CIME/SystemTests/pgn.py b/cime/scripts/lib/CIME/SystemTests/pgn.py
index 9bd074548726..6fd0bcfb07f0 100644
--- a/cime/scripts/lib/CIME/SystemTests/pgn.py
+++ b/cime/scripts/lib/CIME/SystemTests/pgn.py
@@ -7,8 +7,7 @@
 
 """
 
-import os
-import glob
+
 import shutil
 import numpy as np
 import scipy.stats as stats
@@ -23,60 +22,68 @@
 from CIME.SystemTests.system_tests_common import SystemTestsCommon
 from CIME.case.case_setup import case_setup
 from CIME.build import post_build
-from CIME.hist_utils import rename_all_hist_files
 from CIME.utils import expect
 
-#Logic for PGN ensemble runs:
+# Logic for PGN ensemble runs:
+# ----------------------------
 # We need two inputs:
 # A. Number of inic cond files
 # B. perturbations (e.g. currently we have: without prt, pos prt and neg prt)
 
 # Based off the above, we compute number of instances to have (A * B)
-# Build phase               : Change the user_nl* files to add perturbations and other flags 
-# Run pahse                 : Rename history files
-# Baselines generation phase: Compute cloud, store in netcdf file, copy netcdf file to baseline folder
-# Baseline Comparison phase : Compare against baseline cloud to know pass/fail, and plot (optional)
-
-
-#TODO: 
-#1. The loop to change user_nl* files is run twice as we call build again after changing ninst, which changes ntasks
-#2. Do we want to remove user_nl_cam, user_nl_clm etc. files as they are not used in the simulation?
-#3. change code so that pergro_ptend_names.txt is not generated if it is already there or only one instance writes this file...
-#4. Plot generation is very basic at this point(no lables, legends etc.), improve it! 
-#5. Decision making about PASS/FAIL should have multiple criteria
+# Build phase               : Change the user_nl* files to add perturbations and
+#                             other flags
+# Run phase                 : Rename history files
+# Baselines generation phase: Compute cloud, store in netcdf file, copy netcdf
+#                             file to baseline folder
+# Baseline Comparison phase : Compare against baseline cloud to know pass/fail,
+#                             and plot (optional)
+
+
+# TODO:
+# 1. The loop to change user_nl* files is run twice as we call build again after
+#    changing ninst, which changes ntasks
+# 2. Do we want to remove user_nl_cam, user_nl_clm etc. files as they are not
+#    used in the simulation?
+# 3. change code so that pergro_ptend_names.txt is not generated if it is
+#    already there or only one instance writes this file...
+# 4. Plot generation is very basic at this point(no labels, legends etc.),
+#    improve it!
+# 5. Decision making about PASS/FAIL should have multiple criteria
 
 logger = logging.getLogger(__name__)
 
-#--------------------------------------------------------
-#Variables which needs global scope for various functions
-#--------------------------------------------------------
+# --------------------------------------------------------
+# Variables which needs global scope for various functions
+# --------------------------------------------------------
 
-#number of inital conditions
-NINIT_COND = 6 #12
-        
-#perturbations for runs
-PRT        = [0.0, 1.0e-14, -1.0e-14]
-PRTSTR     = ['woprt','posprt','negprt']
+# number of initial conditions
+NINIT_COND = 6  # 12
 
-#file name for file containing PGE cloud
-FCLD_NC  = 'cam.h0.cloud.nc'
+# perturbations for runs
+PRT = [0.0, 1.0e-14, -1.0e-14]
+PRTSTR = ['woprt', 'posprt', 'negprt']
 
-#For preparing paths for namelist files for initial condition files
+# file name for file containing PGE cloud
+FCLD_NC = 'cam.h0.cloud.nc'
+
+# For preparing paths for namelist files for initial condition files
 FILE_PREF_ATM = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.cam.i.0002-"
 FILE_PREF_LND = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.clm2.r.0002-"
 
 FILE_SUF_ATM = "-01-00000.nc"
 FILE_SUF_LND = "-01-00000.nc"
 
-#------------------------------------------------------------
+# ------------------------------------------------------------
 # Some flags for debugging or invoking extra features
-#------------------------------------------------------------
+# ------------------------------------------------------------
 
-#DEBUGGING: prints out max rmse diffs if set to True
-INDEX = False 
+# DEBUGGING: prints out max rmse diffs if set to True
+INDEX = False
 
 # generate a plot
-DOPLOT = False  #It impacts performance!
+DOPLOT = False  # It impacts performance!
+
 
 class PGN(SystemTestsCommon):
 
@@ -85,100 +92,103 @@ def __init__(self, case):
         initialize an object interface to the PGN test
         """
 
-        #perturbation values and number of perturbation strings should be same
-        expect(len(PRT)== len(PRTSTR),"Number of perturbation values ("+str(len(PRT))+") are NOT " 
-               "equal to number of perturbation strings("+ str(len(PRTSTR))+")")
+        # perturbation values and number of perturbation strings should be same
+        expect(len(PRT) == len(PRTSTR),
+               "Number of perturbation values ({}) are NOT equal to number of "
+               "perturbation strings({})".format(len(PRT), len(PRTSTR)))
         SystemTestsCommon.__init__(self, case)
 
-    #=================================================================
+    # =================================================================
     # Compile model with multiple instances
-    #=================================================================
+    # =================================================================
     def build_phase(self, sharedlib_only=False, model_only=False):
 
-        #------------------------------------------------------
-        #Compute number of instances:
-        #------------------------------------------------------
+        # ------------------------------------------------------
+        # Compute number of instances:
+        # ------------------------------------------------------
 
-        #------------------------------------------------------
-        #Number of instances:
-        #~~~~~~~~~~~~~~~~~~~
-        #Comput it from number of initial conditions to use and 
-        #number of perturbation ensemble members to use
-        #------------------------------------------------------
-        nprt       = len(PRT)
-        ninst      = NINIT_COND * nprt
+        # ------------------------------------------------------
+        # Number of instances:
+        # ~~~~~~~~~~~~~~~~~~~
+        # Compute it from number of initial conditions to use and
+        # number of perturbation ensemble members to use
+        # ------------------------------------------------------
+        nprt = len(PRT)
+        ninst = NINIT_COND * nprt
 
         logger.debug('PGN_INFO: number of instance: '+str(ninst))
-        
-        #------------------------------------------------------
-        #Fake Build:
-        #~~~~~~~~~~
-        #(for debugging only) Building the model can take 
-        #significant amount of time. Setting fake_bld to True 
-        #can savethat time
-        #------------------------------------------------------
+
+        # ------------------------------------------------------
+        # Fake Build:
+        # ~~~~~~~~~~
+        # (for debugging only) Building the model can take
+        # significant amount of time. Setting fake_bld to True
+        # can save that time
+        # ------------------------------------------------------
         fake_bld = False
 
-        #Find number of instance in the default setup
+        # Find number of instance in the default setup
         default_ninst = self._case.get_value("NINST_ATM")
-        
-        #Sanity check: see if NINST is same for all model components, otherwise exit with error
-        for comp in ['OCN','WAV','GLC','ICE','ROF','LND']:
-            iinst = self._case.get_value("NINST_%s"%comp)
-            expect(default_ninst == iinst, "ERROR: component "+str(comp)+" NINST("+str(iinst)+")"
-                   " is different from component ATM NINST("+str(default_ninst)+")")
-
-        #------------------------------------------------------
-        #Setup multi-instances for model components:
-        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-        #Set the model for multi-instance ONLY if NINST == 1 
-        #for all model components. This is because, for 
-        #NINST > 1 (e.g. rebuilding an old case) the following 
-        #loop will increase the ntasks to a multiple of ninst 
-        #(requiring a clean build again). We hit this issue if 
-        #we launch ./case.build in the case directory of PGN 
-        #test
-        #------------------------------------------------------
-
-        if(default_ninst == 1): #if multi-instance is not already set
+
+        # Sanity check: see if NINST is same for all model components, otherwise
+        # exit with error
+        for comp in ['OCN', 'WAV', 'GLC', 'ICE', 'ROF', 'LND']:
+            iinst = self._case.get_value("NINST_{}".format(comp))
+            expect(default_ninst == iinst,
+                   "ERROR: component {}  NINST({}) is different from  component"
+                   " ATM NINST({}).".format(comp, iinst, default_ninst))
+
+        # ------------------------------------------------------
+        # Setup multi-instances for model components:
+        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        # Set the model for multi-instance ONLY if NINST == 1
+        # for all model components. This is because, for
+        # NINST > 1 (e.g. rebuilding an old case) the following
+        # loop will increase the ntasks to a multiple of ninst
+        # (requiring a clean build again). We hit this issue if
+        # we launch ./case.build in the case directory of PGN
+        # test
+        # ------------------------------------------------------
+
+        if default_ninst == 1:  # if multi-instance is not already set
             # Only want this to happen once. It will impact the sharedlib build
             # so it has to happen here.
             if not model_only:
                 # Lay all of the components out concurrently
-                logger.debug("PGN_INFO: Updating NINST for multi-instance in env_mach_pes.xml")
-                for comp in ['ATM','OCN','WAV','GLC','ICE','ROF','LND']:
-                    ntasks = self._case.get_value("NTASKS_%s"%comp)
-                    self._case.set_value("ROOTPE_%s"%comp, 0)
-                    self._case.set_value("NINST_%s"%comp,  ninst)
-                    self._case.set_value("NTASKS_%s"%comp, ntasks*ninst)
-
-                self._case.set_value("ROOTPE_CPL",0)
-                self._case.set_value("NTASKS_CPL",ntasks*ninst)
+                logger.debug("PGN_INFO: Updating NINST for multi-instance in "
+                             "env_mach_pes.xml")
+                for comp in ['ATM', 'OCN', 'WAV', 'GLC', 'ICE', 'ROF', 'LND']:
+                    ntasks = self._case.get_value("NTASKS_{}".format(comp))
+                    self._case.set_value("ROOTPE_{}".format(comp), 0)
+                    self._case.set_value("NINST_{}".format(comp),  ninst)
+                    self._case.set_value("NTASKS_{}".format(comp), ntasks*ninst)
+
+                self._case.set_value("ROOTPE_CPL", 0)
+                self._case.set_value("NTASKS_CPL", ntasks*ninst)
                 self._case.flush()
 
                 case_setup(self._case, test_mode=False, reset=True)
 
-        #Faking a bld can save the time code spend in building the model components
+        # Faking a bld can save the time code spend in building the model components
         if fake_bld:
             logger.debug("PGN_INFO: FAKE Build")
-            if (not sharedlib_only):
+            if not sharedlib_only:
                 post_build(self._case, [])
         else:
-            # Build exectuable with multiple instances
+            # Build executable with multiple instances
             self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
 
-
-        #----------------------------------------------------------------
+        # ----------------------------------------------------------------
         # Namelist settings:
-        #~~~~~~~~~~~~~~~~~~
+        # ~~~~~~~~~~~~~~~~~~
         # Do this already in build_phase so that we can check the xml and
         # namelist files before job starts running.
-        #----------------------------------------------------------------
+        # ----------------------------------------------------------------
         logger.debug("PGN_INFO: Updating user_nl_* files")
 
         csmdata_root = self._case.get_value("DIN_LOC_ROOT")
-        csmdata_atm  = csmdata_root+"/atm/cam/inic/homme/ne4_v1_init/"
-        csmdata_lnd  = csmdata_root+"/lnd/clm2/initdata/ne4_v1_init/b58d55680/"
+        csmdata_atm = csmdata_root+"/atm/cam/inic/homme/ne4_v1_init/"
+        csmdata_lnd = csmdata_root+"/lnd/clm2/initdata/ne4_v1_init/b58d55680/"
 
         iinst = 1
         for icond in range(NINIT_COND):
@@ -188,19 +198,24 @@ def build_phase(self, sharedlib_only=False, model_only=False):
             for iprt in PRT:
                 with open('user_nl_cam_'+str(iinst).zfill(4), 'w') as atmnlfile, \
                         open('user_nl_clm_'+str(iinst).zfill(4), 'w') as lndnlfile:
-                    
-                    #atm/lnd intitial conditions                   
-                    
-                    #initial condition files to use for atm and land
-                    #atmnlfile.write("ncdata  = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + fatm_in+"' \n")
-                    #lndnlfile.write("finidat = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + flnd_in+"' \n")
-                    
-                    #uncomment the following when there files are on SVN server
-                    atmnlfile.write("ncdata  = '"+ csmdata_atm + "/" + fatm_in+"' \n")
-                    lndnlfile.write("finidat = '"+ csmdata_lnd + "/" + flnd_in+"' \n")
-                    
-                    
-                    #atm model output
+
+                    # atm/lnd intitial conditions
+
+                    # initial condition files to use for atm and land
+                    # atmnlfile.write("ncdata  = '/pic/projects/uq_climate/"
+                    #                 "wanh895/acme_input/ne4_v1_init/{}'"
+                    #                 " \n".format(fatm_in))
+                    # lndnlfile.write("finidat = '/pic/projects/uq_climate/"
+                    #                 "wanh895/acme_input/ne4_v1_init/{}'"
+                    #                 " \n".format(flnd_in))
+
+                    # uncomment the following when there files are on SVN server
+                    atmnlfile.write("ncdata  = '{}/{}' \n".format(csmdata_atm,
+                                                                  fatm_in))
+                    lndnlfile.write("finidat = '{}/{}' \n".format(csmdata_lnd,
+                                                                  flnd_in))
+
+                    # atm model output
                     atmnlfile.write("avgflag_pertape = 'I' \n")
                     atmnlfile.write("nhtfrq = 1 \n")
                     atmnlfile.write("mfilt  = 2  \n")
@@ -208,271 +223,286 @@ def build_phase(self, sharedlib_only=False, model_only=False):
                     atmnlfile.write("pergro_mods  = .true. \n")
                     atmnlfile.write("pergro_test_active = .true. \n")
 
-                    #atmnlfile.write("empty_htapes = .true. \n")
-                    #atmnlfile.write("fincl1 = 'PS','U','V','T','Q','CLDLIQ','CLDICE','NUMLIQ','NUMICE','num_a1','num_a2','num_a3','LANDFRAC' \n")
-                    #atmnlfile.write("phys_debug_lat = 41.3495891345")
-                    #atmnlfile.write("phys_debug_lon = 45.0" )
-                
-                    if(iprt != 0.0):
-                        atmnlfile.write("pertlim = "+str(iprt)+" \n")
-                        
-                    iinst += 1
+                    # atmnlfile.write("empty_htapes = .true. \n")
+                    # atmnlfile.write("fincl1 = 'PS','U','V','T','Q','CLDLIQ',"
+                    #                 "'CLDICE','NUMLIQ','NUMICE','num_a1',"
+                    #                 "'num_a2','num_a3','LANDFRAC' \n")
+                    # atmnlfile.write("phys_debug_lat = 41.3495891345")
+                    # atmnlfile.write("phys_debug_lon = 45.0" )
+
+                    if iprt != 0.0:
+                        atmnlfile.write("pertlim = {} \n".format(iprt))
 
-        #--------------------------------
-        #Settings common to all instances
-        #--------------------------------
+                    iinst += 1
 
-        #Coupler settings which are applicable to ALL the instances (as there is only one coupler for all instances)
-        self._case.set_value("STOP_N",     "1")
-        self._case.set_value("STOP_OPTION","nsteps")
+        # --------------------------------
+        # Settings common to all instances
+        # --------------------------------
 
+        # Coupler settings which are applicable to ALL the instances (as there is
+        # only one coupler for all instances)
+        self._case.set_value("STOP_N", "1")
+        self._case.set_value("STOP_OPTION", "nsteps")
 
-    #===========================================================
+    # ===========================================================
     # Some user defined functions to avoid repeated calculations
-    #===========================================================
+    # ===========================================================
 
-    def get_fname_wo_ext(self,rundir,casename,iinst):
+    def get_fname_wo_ext(self, rundir, casename, iinst):
         """
         construct file name given the input
         """
-        #form file name withOUT extension
-        tstmp    = '00000'
-        date_str = '.h0.0001-01-01-'
-        cam_str  = 'cam_'
-        if(casename != "" ):
-            cam_str = '.'+cam_str
-        return os.path.join(rundir,casename+cam_str+str(iinst).zfill(4)+date_str+tstmp)
+        # form file name withOUT extension
+        if casename != "":
+            casename += '.'
+        return os.path.join(rundir,
+                            '{}cam_{:04d}.h0.0001-01-01-00000'.format(casename,
+                                                                      iinst))
 
     def get_var_list(self):
         """
         Get variable list for pergro specific output vars
         """
-        rundir    = self._case.get_value("RUNDIR")
-        casename  = self._case.get_value("CASE")    
+        rundir = self._case.get_value("RUNDIR")
         prg_fname = 'pergro_ptend_names.txt'
-        var_file = os.path.join(rundir,prg_fname)
-        expect(os.path.isfile(var_file),"File "+prg_fname+" does not exist in: "+rundir)
+        var_file = os.path.join(rundir, prg_fname)
+        expect(os.path.isfile(var_file),
+               "File {} does not exist in: {}".format(prg_fname, rundir))
 
         with open(var_file, 'r') as fvar:
             var_list = fvar.readlines()
-                    
-        return  map(str.strip,var_list)
 
-    def nc_write_handle(self,fname_nc,rmse_nc_var):
+        return map(str.strip, var_list)
+
+    def nc_write_handle(self, fname_nc, rmse_nc_var):
         """
         Opens and write netcdf file for PGE curves
         This function is here purely to avoid duplicate 
         codes so that it is easy to maintain code longterm        
         """
 
-        fhndl = Dataset(fname_nc,'w', format='NETCDF4')
+        fhndl = Dataset(fname_nc, 'w', format='NETCDF4')
 
-        var_list     = self.get_var_list()
+        var_list = self.get_var_list()
         len_var_list = len(var_list)
-        nprt         = len(PRT)
+        nprt = len(PRT)
 
-        #create dims
+        # create dims
         fhndl.createDimension('ninit', NINIT_COND)
         fhndl.createDimension('nprt', nprt)
         fhndl.createDimension('nprt_m1', nprt-1)
         fhndl.createDimension('nvars', len_var_list)
 
-
-        #create variables in the file
+        # create variables in the file
         init_cond_nc = fhndl.createVariable('init_cond_fnames', 'S100', 'ninit')
-        prt_nc       = fhndl.createVariable('perturb_strings', 'S10', 'nprt')
+        prt_nc = fhndl.createVariable('perturb_strings', 'S10', 'nprt')
         variables_nc = fhndl.createVariable('perturb_vnames', 'S20', 'nvars')
-        rmse_nc      = fhndl.createVariable(rmse_nc_var, 'f8', ('ninit', 'nprt_m1', 'nvars'))
+        rmse_nc = fhndl.createVariable(rmse_nc_var, 'f8', ('ninit', 'nprt_m1', 'nvars'))
 
-
-        #assign variables for writing to the netcdf file
+        # assign variables for writing to the netcdf file
         for iprt in range(nprt):
-            prt_nc[iprt]       = PRTSTR[iprt]
+            prt_nc[iprt] = PRTSTR[iprt]
 
         for ivar in range(len_var_list):
             variables_nc[ivar] = var_list[ivar]
 
         for icond in range(NINIT_COND):
-            icond_label_2digits = str(icond+1).zfill(2)
-            init_cond_nc[icond] =  FILE_PREF_ATM + icond_label_2digits + FILE_SUF_ATM
+            icond_label_2digits = '{:02d}'.format(icond+1)
+            init_cond_nc[icond] = FILE_PREF_ATM + icond_label_2digits + FILE_SUF_ATM
 
         return fhndl, rmse_nc
-        
-    def rmse_var(self,ifile_test, ifile_cntl,  var_list, var_suffix ):
+
+    def rmse_var(self, ifile_test, ifile_cntl,  var_list, var_suffix):
         """
         Compute RMSE difference between ifile_test and ifile_cntl for
         variables listed in var_list
         """
-        
-        #------------------ARGS-------------------------
-        #ifile_test: path of test file
-        #ifile_cntl: path of control file
-        #var_list  : List of all variables
-        #var_suffix: Suffix for var_list (e.g. t_, t_ qv_ etc.)
-        #-----------------------------------------------
+
+        # ------------------ARGS-------------------------
+        # ifile_test: path of test file
+        # ifile_cntl: path of control file
+        # var_list  : List of all variables
+        # var_suffix: Suffix for var_list (e.g. t_, t_ qv_ etc.)
+        # -----------------------------------------------
 
         # See if the files exists or not....
-        expect(os.path.isfile(ifile_test), "ERROR: Test file "+ifile_test+" does not exist")
+        expect(os.path.isfile(ifile_test),
+               "ERROR: Test file {} does not exist".format(ifile_test))
         expect(os.path.isfile(ifile_cntl), "ERROR: CNTL file "+ifile_cntl+" does not exist")
-        
+
         ftest = Dataset(ifile_test,  mode='r')
         fcntl = Dataset(ifile_cntl, mode='r')
 
-        #if max RMSE/DIFF is to be printed, extract lat lons from a file
-        if(INDEX):
-            lat = ftest.variables['lat']
-            lon = ftest.variables['lon']
-        
+        # if max RMSE/DIFF is to be printed, extract lat lons from a file
+        lat = ftest.variables['lat']
+        lon = ftest.variables['lon']
+
         rmse = np.zeros(shape=(len(var_list)))
         icntvar = 0
-        
-        is_se = (len(ftest.variables[var_suffix+var_list[icntvar]].dimensions)==3) # see if it is SE grid
-        
+
+        dims = len(ftest.variables[var_suffix+var_list[icntvar]].dimensions)
         for ivar in var_list:
             var = var_suffix+ivar
             if var in ftest.variables:
-                vtest    = ftest.variables[var.strip()][0,...] # first dimention is time (=0)
-                vcntl    = fcntl.variables[var.strip()][0,...] # first dimention is time (=0)
+                vtest = ftest.variables[var.strip()][0, ...]  # first dimension is time (=0)
+                vcntl = fcntl.variables[var.strip()][0, ...]  # first dimension is time (=0)
 
-                #reshape for RMSE
-                if(is_se ):
-                    nx, ny = vtest.shape #shape will be same for both arrays
+                # reshape for RMSE
+                if dims == 3:  # see if it is SE grid
+                    nx, ny = vtest.shape  # shape will be same for both arrays
                     nz = 1
                 else:
-                    nx, ny, nz = vtest.shape #shape will be same for both arrays
+                    nx, ny, nz = vtest.shape  # shape will be same for both arrays
 
-                rmse[icntvar]  = sqrt(mean_squared_error(vtest.reshape((nx,ny*nz)), vcntl.reshape((nx,ny*nz))))
+                rmse[icntvar] = sqrt(mean_squared_error(
+                        vtest.reshape((nx, ny*nz)), vcntl.reshape((nx, ny*nz))))
 
-                if(INDEX):
-                    #NEED to work on formatting this....
+                if INDEX:
+                    # NEED to work on formatting this....
                     diff_arr = abs(vtest[...] - vcntl[...])
                     max_diff = np.amax(diff_arr)
-                    ind_max = np.unravel_index(diff_arr.argmax(),diff_arr.shape)
+                    ind_max = np.unravel_index(diff_arr.argmax(),
+                                               diff_arr.shape)
                     print(ind_max)
-                    print_str = var+' '+str(max_diff)+' '+str(vtest[ind_max])+' ' +str(vcntl[ind_max])+' '+str(lat[ind_max[1]])+' '+ str(lon[ind_max[1]])+' '+str(ind_max[0])
-                    print("{}".format(print_str))
-                    #print('{0:15}{1:15}{2:15}\n'.format(name, a, b))#TRY THIS!!
-
-
-                #normalize by mean values of the field in the control case
+                    print("{} {} {} {} {} {} {}".format(var,
+                                                        max_diff,
+                                                        vtest[ind_max],
+                                                        vcntl[ind_max],
+                                                        lat[ind_max[1]],
+                                                        lon[ind_max[1]],
+                                                        ind_max[0]))
+                    # print('{0:15}{1:15}{2:15}\n'.format(name, a, b))  # TRY THIS!!
+
+                # normalize by mean values of the field in the control case
                 rmse[icntvar] = rmse[icntvar]/np.mean(vcntl)
                 icntvar += 1
-                vtest = None
-                vcntl = None
-                var   = None
+                # vtest = None
+                # vcntl = None
+                # var   = None
+
         ftest.close()
         fcntl.close()
+
         return rmse
 
     def _compare_baseline(self):
-        
-        #import time
-        #t0 = time.time()
         """
         Compare baselines in the pergro test sense. That is,
         compare PGE from the test simulation with the baseline 
         cloud
         """
+        # import time
+        # t0 = time.time()
+
         logger.debug("PGN_INFO:BASELINE COMPARISON STARTS")
 
-        rundir    = self._case.get_value("RUNDIR")
-        casename  = self._case.get_value("CASE") 
+        rundir = self._case.get_value("RUNDIR")
+        casename = self._case.get_value("CASE")
 
         var_list = self.get_var_list()
         len_var_list = len(var_list)
-        nprt         = len(PRT)
+        nprt = len(PRT)
 
-        #baseline directory names
+        # baseline directory names
         base_root = self._case.get_value("BASELINE_ROOT")
-        base_comp = self._case.get_value("BASECMP_CASE")        
+        base_comp = self._case.get_value("BASECMP_CASE")
 
-        #baseline directory is:base_root/base_comp
-        base_dir = os.path.join(base_root,base_comp)
+        # baseline directory is:base_root/base_comp
+        base_dir = os.path.join(base_root, base_comp)
 
-        #for test cases (new environment etc.)        
+        # for test cases (new environment etc.)
         logger.debug("PGN_INFO: Test case comparison...")
 
-        #---------------------------------------------
+        # ---------------------------------------------
         # Write netcdf file for comparison
-        #---------------------------------------------
+        # ---------------------------------------------
         fcomp_nc = 'comp_cld.nc'
-        fcomp_cld, comp_rmse_nc  = self.nc_write_handle(fcomp_nc,'comp_rmse')
+        fcomp_cld, comp_rmse_nc = self.nc_write_handle(fcomp_nc, 'comp_rmse')
 
         iinst = 0
         for icond in range(NINIT_COND):
-            iinst += 1;  iprt = 0
-            ifile_cntl = os.path.join(base_dir,self.get_fname_wo_ext('','',iinst)+'_woprt.nc')
+            iinst += 1
+            iprt = 0
+            ifile_cntl = os.path.join(base_dir,
+                                      '{}_woprt.nc'.format(
+                                              self.get_fname_wo_ext('', '', iinst))
+                                      )
             expect(os.path.isfile(ifile_cntl), "ERROR: File "+ifile_cntl+" does not exist")
             logger.debug("PGN_INFO:CNTL_TST:"+ifile_cntl)
 
             for aprt in PRTSTR[1:]:
-                iinst += 1;                
-                ifile_test = os.path.join(rundir,self.get_fname_wo_ext(rundir,casename,iinst)+'_'+aprt+'.nc')
-                expect(os.path.isfile(ifile_test), "ERROR: File "+ifile_test+" does not exist")
-                comp_rmse_nc[icond,iprt,0:len_var_list] = self.rmse_var(ifile_test, ifile_cntl,  var_list, 't_' )
-                logger.debug("PGN_INFO:Compared to TEST_TST:"+ifile_test)
+                iinst += 1
+                ifile_test = os.path.join(rundir, '{}_{}.nc'.format(
+                        self.get_fname_wo_ext(rundir, casename, iinst),
+                        aprt))
+                expect(os.path.isfile(ifile_test),
+                       "ERROR: File {} does not exist".format(ifile_test))
+                comp_rmse_nc[icond, iprt, 0:len_var_list] = self.rmse_var(
+                        ifile_test, ifile_cntl,  var_list, 't_')
+                logger.debug("PGN_INFO:Compared to TEST_TST:{}".format(ifile_test))
 
                 iprt += 1
-        
-        #--------------------------------------------
-        #Student's t-test
-        #-------------------------------------------
 
-        #Extract last element of each PGE curve of the cloud
-        path_cld_nc  = os.path.join(base_dir,FCLD_NC)
-        expect(os.path.isfile(path_cld_nc), "ERROR: "+FCLD_NC+"  does not exist at:"+path_cld_nc)
+        # --------------------------------------------
+        # Student's t-test
+        # -------------------------------------------
 
-        fcld         = Dataset(path_cld_nc,'r', format='NETCDF4')
+        # Extract last element of each PGE curve of the cloud
+        path_cld_nc = os.path.join(base_dir, FCLD_NC)
+        expect(os.path.isfile(path_cld_nc),
+               "ERROR: {} does not exist at:{}".format(FCLD_NC, path_cld_nc))
 
-        #Get dimentions of cloud PGE curves
-        ninic_cld = fcld.variables['cld_rmse'].shape[0]
-        nprt_cld  = fcld.variables['cld_rmse'].shape[1]
-        nvar_cld  = fcld.variables['cld_rmse'].shape[2]
+        fcld = Dataset(path_cld_nc, 'r', format='NETCDF4')
 
-        expect(ninic_cld == NINIT_COND, "BASELINE COMPARISON: Number of initial conditions should be same:" \
-               "inic cld:"+str(ninic_cld)+"  inic comp:"+str(NINIT_COND))
+        # Get dimentions of cloud PGE curves
+        ninic_cld = fcld.variables['cld_rmse'].shape[0]
+        nprt_cld = fcld.variables['cld_rmse'].shape[1]
+        nvar_cld = fcld.variables['cld_rmse'].shape[2]
 
-        expect(nprt_cld == nprt-1, "BASELINE COMPARISON: Number of perturbations should be same:" \
-               "prt cld:"+str(nprt_cld)+"  prt comp:"+str(nprt - 1))
+        expect(ninic_cld == NINIT_COND,
+               "BASELINE COMPARISON: Number of initial conditions should be same:" 
+               "inic cld:{}  inic comp:{}".format(ninic_cld, NINIT_COND))
 
-        expect(nvar_cld == len_var_list, "BASELINE COMPARISON: Number of phys update variables should be same:" \
-               "nvar cld:"+str(nvar_cld)+"  nvar comp:"+str(len_var_list))
+        expect(nprt_cld == nprt-1,
+               "BASELINE COMPARISON: Number of perturbations should be same:"
+               "prt cld:{}  prt comp:{}".format(nprt_cld, nprt - 1))
 
+        expect(nvar_cld == len_var_list,
+               "BASELINE COMPARISON: Number of phys update variables should be same:"
+               "nvar cld:{}  nvar comp:{}".format(nvar_cld, len_var_list))
 
-        pgecld =  fcld.variables['cld_rmse']
+        pgecld = fcld.variables['cld_rmse']
 
-        if (DOPLOT):
-            import pylab as pl            
-            #generate plot
+        if DOPLOT:
+            import pylab as pl
+            # generate plot
             ax = pl.gca()
             for icond in range(NINIT_COND):
                 for iprt in range(nprt-1):
-                    ax.semilogy(pgecld[icond,iprt,:],color='b')
-                    ax.semilogy(comp_rmse_nc[icond,iprt,:],color='r')
+                    ax.semilogy(pgecld[icond, iprt, :], color='b')
+                    ax.semilogy(comp_rmse_nc[icond, iprt, :], color='r')
             pl.savefig('plot_comp.png')
 
-        pge_ends_cld  = fcld.variables['cld_rmse'][:,:,len_var_list-1]
-        pge_ends_comp = comp_rmse_nc[:,0:nprt-1,len_var_list-1]
+        pge_ends_cld = fcld.variables['cld_rmse'][:, :, len_var_list-1]
+        pge_ends_comp = comp_rmse_nc[:, 0:nprt-1, len_var_list-1]
 
-        #run the t-test
+        # run the t-test
         pge_ends_cld = pge_ends_cld.flatten()
-        pge_ends_comp = pge_ends_comp.flatten()        
+        pge_ends_comp = pge_ends_comp.flatten()
 
         t_stat, p_val = stats.ttest_ind(pge_ends_cld, pge_ends_comp)
 
-        t_stat = abs(t_stat) #only value of t_stat matters, not the sign
+        t_stat = abs(t_stat)  # only value of t_stat matters, not the sign
 
         print("PGN_INFO: T value:"+str(t_stat))
         print("PGN_INFO: P value:"+str(p_val))
 
         logger.warn(" T value:"+str(t_stat))
         logger.warn(" P value:"+str(p_val))
-        
-        
-        #There should be multiple criteria to determin pass/fail
-        #This part of decision making should be more polished
 
-        if (t_stat > 3):
+        # There should be multiple criteria to determin pass/fail
+        # This part of decision making should be more polished
+        if t_stat > 3:
             with self._test_status:
                 self._test_status.set_status(BASELINE_PHASE, TEST_FAIL_STATUS)
             print('PGN_INFO:TEST FAIL')
@@ -480,144 +510,161 @@ def _compare_baseline(self):
             with self._test_status:
                 self._test_status.set_status(BASELINE_PHASE, TEST_PASS_STATUS)
             print('PGN_INFO:TEST PASS')
-                    
-        #Close this file after you access "comp_rmse_nc" variable
+
+        # Close this file after you access "comp_rmse_nc" variable
         fcomp_cld.close()
         logger.debug("PGN_INFO: POST PROCESSING PHASE ENDS")
-        
-        #t1 = time.time()
-        #print('time: '+str(t1-t0))
-    #=================================================================
+
+        # t1 = time.time()
+        # print('time: '+str(t1-t0))
+    # =================================================================
     # run_phase
-    #=================================================================
+    # =================================================================
     def run_phase(self):
         logger.debug("PGN_INFO: RUN PHASE")
-        
+
         self.run_indv()
 
-        #cwd = os.getcwd()
+        # cwd = os.getcwd()
 
-        #Here were are in case directory, we need to go to the run directory and rename files
-        rundir   = self._case.get_value("RUNDIR")
-        casename = self._case.get_value("CASE") 
-        logger.debug("PGN_INFO: Case name is:"+casename )
+        # Here were are in case directory, we need to go to the run directory
+        # and rename files
+        rundir = self._case.get_value("RUNDIR")
+        casename = self._case.get_value("CASE")
+        logger.debug("PGN_INFO: Case name is:{}".format(casename))
 
         iinst = 1
         for icond in range(NINIT_COND):
             for iprt in range(len(PRT)):
-                
-                #------------------------------------------------------
-                #SANITY CHECK - To confirm that history file extension
-                #~~~~~~~~~~~~
+
+                # ------------------------------------------------------
+                # SANITY CHECK - To confirm that history file extension
+                # ~~~~~~~~~~~~
                 # corresponds to the right perturbation
-                #------------------------------------------------------
-                #find corresponding atm_in_*
-                fatm_in = os.path.join(rundir,'atm_in_'+str(iinst).zfill(4))
-                #see if atm_in_* file has pertlim same as PRT[iprt] (sanity check)
-                found  = False
+                # ------------------------------------------------------
+                # find corresponding atm_in_*
+                fatm_in = os.path.join(rundir, 'atm_in_{:04d}'.format(iinst))
+                # see if atm_in_* file has pertlim same as PRT[iprt] (sanity check)
+                found = False
                 prtval = 0.0
                 with open(fatm_in) as atmfile:
                     for line in atmfile:
-                        if(line.find('pertlim') > 0):
+                        if line.find('pertlim') > 0:
                             found = True
                             prtval = float(line.split('=')[1])
-                            expect(prtval == PRT[iprt], "ERROR: prtval doesn't match, prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt]))
-                            logger.debug("PGN_INFO:prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt]))
-                
-                if(not found):
-                    expect(prtval == PRT[iprt], "ERROR: default prtval doesn't match, prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt]))
-                    logger.debug("PGN_INFO:def prtval:"+str(prtval)+"; prt["+str(iprt)+"]:"+str(PRT[iprt]))
-                  
-                #---------------------------------------------------------
-                #Rename file
-                #---------------------------------------------------------
-
-                #form file name
-                fname = self.get_fname_wo_ext(rundir,casename,iinst) #NOTE:without extension
-                logger.debug("PGN_INFO: fname to rename:"+fname )
-                
-                renamed_fname = fname +'_'+PRTSTR[iprt]+'.nc'
-                fname_w_ext   = fname + '.nc' #add extention
-
-                #see if file exists
-                if (os.path.isfile(fname_w_ext)):
-                    #rename file
-                    shutil.move(fname_w_ext,renamed_fname)
-                    logger.debug("PGN_INFO: Renamed file:"+renamed_fname)
+                            expect(prtval == PRT[iprt],
+                                   "ERROR: prtval doesn't match, "
+                                   "prtval:{}; prt[{}]:{}".format(
+                                           prtval, iprt, PRT[iprt]))
+                            logger.debug("PGN_INFO:prtval:{}; prt[{}]:{}".format(
+                                    prtval, iprt, PRT[iprt]))
+
+                if not found:
+                    expect(prtval == PRT[iprt],
+                           "ERROR: default prtval doesn't match, "
+                           "prtval:{}; prt[{}]:{}".format(prtval, iprt, PRT[iprt]))
+                    logger.debug("PGN_INFO:def prtval:{}; prt[{}]:{}".format(
+                            prtval, iprt, PRT[iprt]))
+
+                # ---------------------------------------------------------
+                # Rename file
+                # ---------------------------------------------------------
+
+                # form file name
+                fname = self.get_fname_wo_ext(rundir, casename, iinst)  # NOTE:without extension
+                logger.debug("PGN_INFO: fname to rename:{}".format(fname))
+
+                renamed_fname = '{}_{}.nc'.format(fname, PRTSTR[iprt])
+                fname_w_ext = '{}.nc'.format(fname)  # add extention
+
+                # see if file exists
+                if os.path.isfile(fname_w_ext):
+                    # rename file
+                    shutil.move(fname_w_ext, renamed_fname)
+                    logger.debug("PGN_INFO: Renamed file:{}".format(renamed_fname))
                 else:
-                    expect(os.path.isfile(renamed_fname), "ERROR: File "+renamed_fname+" does not exist")
-                    logger.debug("PGN_INFO: Renamed file already exists:"+renamed_fname)
-                
+                    expect(os.path.isfile(renamed_fname),
+                           "ERROR: File {} does not exist".format(renamed_fname))
+                    logger.debug("PGN_INFO: Renamed file already exists:"
+                                 "{}".format(renamed_fname))
+
                 iinst += 1
 
-        #cloud generation
+        # cloud generation
 
         logger.debug("PGN_INFO: cloud generation-gen base")
-            
-        var_list     = self.get_var_list()
+
+        var_list = self.get_var_list()
         len_var_list = len(var_list)
-        nprt         = len(PRT)
-        
-        #for trusted cloud sims
+        # nprt = len(PRT)
+
+        # for trusted cloud sims
         logger.debug("PGN_INFO: Computing cloud")
-            
-        cld_res = np.empty([len_var_list])
-            
-        #---------------------------------------------
+
+        # cld_res = np.empty([len_var_list])
+
+        # ---------------------------------------------
         # Write netcdf file for cloud in rundir
-        #---------------------------------------------
+        # ---------------------------------------------
         os.chdir(rundir)
-        fcld, cld_rmse_nc = self.nc_write_handle(FCLD_NC,'cld_rmse')
+        fcld, cld_rmse_nc = self.nc_write_handle(FCLD_NC, 'cld_rmse')
 
         iinst = 0
         for icond in range(NINIT_COND):
-            iinst += 1;  iprt = 0
-            ifile_cntl = os.path.join(rundir,self.get_fname_wo_ext('',casename,iinst)+'_'+PRTSTR[iprt]+'.nc')
-            expect(os.path.isfile(ifile_cntl), "ERROR: File "+ifile_cntl+" does not exist")
-            logger.debug("PGN_INFO:CNTL_CLD:"+ifile_cntl)            
-            
+            iinst += 1
+            iprt = 0
+            ifile_cntl = os.path.join(rundir, '{}_{}.nc'.format(
+                    self.get_fname_wo_ext('', casename, iinst), PRTSTR[iprt]))
+            expect(os.path.isfile(ifile_cntl),
+                   "ERROR: File {} does not exist".format(ifile_cntl))
+            logger.debug("PGN_INFO:CNTL_CLD:{}".format(ifile_cntl))
+
             for aprt in PRTSTR[1:]:
-                iinst += 1;
-                iprt = PRTSTR.index(aprt) - 1 # subtracted 1 as we will only get two curves for each inic (wo-pos and wo-neg)
-                ifile_test = os.path.join(rundir,self.get_fname_wo_ext('',casename,iinst)+'_'+aprt+'.nc')
-                expect(os.path.isfile(ifile_test), "ERROR: File "+ifile_test+" does not exist")
-                cld_rmse_nc[icond,iprt,0:len_var_list] = self.rmse_var(ifile_test, ifile_cntl,  var_list, 't_' )
-                logger.debug("PGN_INFO:Compared to CLD:"+ifile_test)
+                iinst += 1
+                # subtracted 1 as we will only get two curves for each inic
+                # (wo-pos and wo-neg)
+                iprt = PRTSTR.index(aprt) - 1
+                ifile_test = os.path.join(rundir, '{}_{}.nc'.format(
+                        self.get_fname_wo_ext('', casename, iinst), aprt))
+                expect(os.path.isfile(ifile_test),
+                       "ERROR: File {} does not exist".format(ifile_test))
+                cld_rmse_nc[icond, iprt, 0:len_var_list] = self.rmse_var(
+                        ifile_test, ifile_cntl,  var_list, 't_')
+                logger.debug("PGN_INFO:Compared to CLD:{}".format(ifile_test))
 
         fcld.close()
 
-        if(self._case.get_value("GENERATE_BASELINE")):
-            
-            #baseline directory names
+        if self._case.get_value("GENERATE_BASELINE"):
+
+            # baseline directory names
             base_root = self._case.get_value("BASELINE_ROOT")
-            base_gen  = self._case.get_value("BASEGEN_CASE")
-        
-            #baseline directory is:base_root/base_gen
-            base_dir = os.path.join(base_root,base_gen)
+            base_gen = self._case.get_value("BASEGEN_CASE")
 
-            #first copy files to the baseline directory
-            self._generate_baseline()#BALLI-CHECK IF THIS IS OKAY
+            # baseline directory is:base_root/base_gen
+            base_dir = os.path.join(base_root, base_gen)
 
-            #copy cloud.nc file to baseline directory
-            logger.debug("PGN_INFO:copy:"+FCLD_NC+" to "+base_dir)
-            shutil.copy(FCLD_NC,base_dir)
+            # first copy files to the baseline directory
+            self._generate_baseline()  # BALLI-CHECK IF THIS IS OKAY
 
-        logger.debug("PGN_INFO: RUN PHASE ENDS" )
+            # copy cloud.nc file to baseline directory
+            logger.debug("PGN_INFO:copy:{} to {}".format(FCLD_NC, base_dir))
+            shutil.copy(FCLD_NC, base_dir)
 
+        logger.debug("PGN_INFO: RUN PHASE ENDS")
 
-#=====================================================
-#Debugging:
-#=====================================================
+# =====================================================
+# Debugging:
+# =====================================================
 
-#-----------------------------------------------------
-#DEBUG type 1 (DB1): Ensure that model produces BFB instances
-#-----------------------------------------------------
-##Replace the following lines in the script with the following updated values
-#PRT        = [0.0, 0.0] #DB1
-#PRTSTR     = ['woprt','posprt']
+# -----------------------------------------------------
+# DEBUG type 1 (DB1): Ensure that model produces BFB instances
+# -----------------------------------------------------
+# Replace the following lines in the script with the following updated values
+# PRT = [0.0, 0.0]  # DB1
+# PRTSTR = ['woprt', 'posprt']
 
-## Comment out all namelist changes so that all instances have same namelist values
-## For testing effect of namelist changes, uncomment namlist changes such that
-## namelists are same for all instances
+# Comment out all namelist changes so that all instances have same namelist values
+# For testing effect of namelist changes, uncomment namelist changes such that
+# namelists are same for all instances
 
-## Comment out sanity checks as well if needed....
+# Comment out sanity checks as well if needed....

From eea675df1b07896037a14d267d39ce94410ab64f Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Tue, 21 May 2019 13:56:54 -0400
Subject: [PATCH 3/8] Minor updates for MVK system tests

---
 cime/scripts/lib/CIME/SystemTests/mvk.py | 26 +++++++++++++-----------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/cime/scripts/lib/CIME/SystemTests/mvk.py b/cime/scripts/lib/CIME/SystemTests/mvk.py
index 4d5fa27df9e3..c4a828ca98a3 100644
--- a/cime/scripts/lib/CIME/SystemTests/mvk.py
+++ b/cime/scripts/lib/CIME/SystemTests/mvk.py
@@ -22,7 +22,7 @@
 import CIME.test_status
 
 import evv4esm  # pylint: disable=import-error
-from evv4esm.__main__ import main as evv # pylint: disable=import-error
+from evv4esm.__main__ import main as evv  # pylint: disable=import-error
 
 evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__))
 logger = logging.getLogger(__name__)
@@ -168,15 +168,17 @@ def _compare_baseline(self):
                 # This is here because the comparison is run for each submission
                 # and we only want to compare once the whole run is finished. We
                 # need to return a pass here to continue the submission process.
-                self._test_status.set_status(CIME.test_status.BASELINE_PHASE, CIME.test_status.TEST_PASS_STATUS)
+                self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
+                                             CIME.test_status.TEST_PASS_STATUS)
                 return
 
-            self._test_status.set_status(CIME.test_status.BASELINE_PHASE, CIME.test_status.TEST_FAIL_STATUS)
+            self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
+                                         CIME.test_status.TEST_FAIL_STATUS)
 
             run_dir = self._case.get_value("RUNDIR")
             case_name = self._case.get_value("CASE")
-            basecmp_case = self._case.get_value("BASECMP_CASE")
-            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), basecmp_case)
+            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
+                                    self._case.get_value("BASECMP_CASE"))
 
             test_name = "{}".format(case_name.split('.')[-1])
             evv_config = {
@@ -195,16 +197,16 @@ def _compare_baseline(self):
             with open(json_file, 'w') as config_file:
                 json.dump(evv_config, config_file, indent=4)
 
-            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'eve']))
+            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
             evv(['-e', json_file, '-o', evv_out_dir])
 
             with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f:
                 evv_status = json.load(evv_f)
 
             for evv_elem in evv_status['Data']['Elements']:
-                if evv_elem['Type'] == 'ValSummary':
-                    if evv_elem['TableTitle'] == 'Kolmogorov-Smirnov':
-                        if evv_elem['Data'][test_name]['']['Ensembles'] == 'identical':
-                            self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
-                                                         CIME.test_status.TEST_PASS_STATUS)
-                            break
+                if evv_elem['Type'] == 'ValSummary' \
+                        and evv_elem['TableTitle'] == 'Kolmogorov-Smirnov':
+                    if evv_elem['Data'][test_name]['']['Ensembles'] == 'identical':
+                        self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
+                                                     CIME.test_status.TEST_PASS_STATUS)
+                        break

From cefa6ac69f251d8cd7db5c122e3360832fe8aa5f Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Tue, 21 May 2019 13:58:41 -0400
Subject: [PATCH 4/8] Partial refactor of PGN system test

---
 cime/scripts/lib/CIME/SystemTests/pgn.py | 539 +++++++----------------
 1 file changed, 151 insertions(+), 388 deletions(-)

diff --git a/cime/scripts/lib/CIME/SystemTests/pgn.py b/cime/scripts/lib/CIME/SystemTests/pgn.py
index 6fd0bcfb07f0..1c7b07d82757 100644
--- a/cime/scripts/lib/CIME/SystemTests/pgn.py
+++ b/cime/scripts/lib/CIME/SystemTests/pgn.py
@@ -7,23 +7,27 @@
 
 """
 
+from __future__ import division
 
+import os
+import re
+import json
 import shutil
-import numpy as np
-import scipy.stats as stats
+import logging
 
+from collections import OrderedDict
 
-from netCDF4 import Dataset
-from math import sqrt
-from sklearn.metrics import mean_squared_error
-
-from CIME.test_status import *
-from CIME.XML.standard_module_setup import *
+import CIME.test_status
 from CIME.SystemTests.system_tests_common import SystemTestsCommon
 from CIME.case.case_setup import case_setup
 from CIME.build import post_build
 from CIME.utils import expect
 
+import evv4esm  # pylint: disable=import-error
+from evv4esm.__main__ import main as evv  # pylint: disable=import-error
+
+evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__))
+
 # Logic for PGN ensemble runs:
 # ----------------------------
 # We need two inputs:
@@ -56,33 +60,29 @@
 # --------------------------------------------------------
 # Variables which needs global scope for various functions
 # --------------------------------------------------------
-
 # number of initial conditions
 NINIT_COND = 6  # 12
-
 # perturbations for runs
-PRT = [0.0, 1.0e-14, -1.0e-14]
-PRTSTR = ['woprt', 'posprt', 'negprt']
-
+PERTURBATIONS = OrderedDict([('woprt', 0.0),
+                             ('posprt', 1.0e-14),
+                             ('negprt', -1.0e-14),
+                             ])
 # file name for file containing PGE cloud
 FCLD_NC = 'cam.h0.cloud.nc'
-
 # For preparing paths for namelist files for initial condition files
-FILE_PREF_ATM = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.cam.i.0002-"
-FILE_PREF_LND = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.clm2.r.0002-"
-
-FILE_SUF_ATM = "-01-00000.nc"
-FILE_SUF_LND = "-01-00000.nc"
-
+INIT_COND_FILE_TEMPLATE = \
+    "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.{}.{}.0002-{:02d}-01-00000.nc"
+# For finding the instance files
+# FIXME: should 'cam' be 'atm' now?
+INSTANCE_FILE_TEMPLATE = '{}.cam_{:04d}.h0.0001-01-01-00000.nc'
 # ------------------------------------------------------------
 # Some flags for debugging or invoking extra features
 # ------------------------------------------------------------
-
-# DEBUGGING: prints out max rmse diffs if set to True
+# prints out max rmse diffs if set to True
 INDEX = False
-
-# generate a plot
-DOPLOT = False  # It impacts performance!
+# Building the model can take a significant amount of time. Setting fake_bld to
+# True can save that time
+FAKE_BUILD = False
 
 
 class PGN(SystemTestsCommon):
@@ -91,52 +91,25 @@ def __init__(self, case):
         """
         initialize an object interface to the PGN test
         """
+        super(PGN, self).__init__(self, case)
 
-        # perturbation values and number of perturbation strings should be same
-        expect(len(PRT) == len(PRTSTR),
-               "Number of perturbation values ({}) are NOT equal to number of "
-               "perturbation strings({})".format(len(PRT), len(PRTSTR)))
-        SystemTestsCommon.__init__(self, case)
 
-    # =================================================================
-    # Compile model with multiple instances
-    # =================================================================
     def build_phase(self, sharedlib_only=False, model_only=False):
 
-        # ------------------------------------------------------
-        # Compute number of instances:
-        # ------------------------------------------------------
-
-        # ------------------------------------------------------
-        # Number of instances:
-        # ~~~~~~~~~~~~~~~~~~~
-        # Compute it from number of initial conditions to use and
-        # number of perturbation ensemble members to use
-        # ------------------------------------------------------
-        nprt = len(PRT)
-        ninst = NINIT_COND * nprt
-
+        ninst = NINIT_COND * len(PERTURBATIONS)
         logger.debug('PGN_INFO: number of instance: '+str(ninst))
 
-        # ------------------------------------------------------
-        # Fake Build:
-        # ~~~~~~~~~~
-        # (for debugging only) Building the model can take
-        # significant amount of time. Setting fake_bld to True
-        # can save that time
-        # ------------------------------------------------------
-        fake_bld = False
-
         # Find number of instance in the default setup
         default_ninst = self._case.get_value("NINST_ATM")
 
         # Sanity check: see if NINST is same for all model components, otherwise
         # exit with error
-        for comp in ['OCN', 'WAV', 'GLC', 'ICE', 'ROF', 'LND']:
-            iinst = self._case.get_value("NINST_{}".format(comp))
-            expect(default_ninst == iinst,
-                   "ERROR: component {}  NINST({}) is different from  component"
-                   " ATM NINST({}).".format(comp, iinst, default_ninst))
+        for comp in self._case.get_values("COMP_CLASSES"):
+            if comp != 'CPL':
+                iinst = self._case.get_value("NINST_{}".format(comp))
+                expect(default_ninst == iinst,
+                       "ERROR: component {}  NINST({}) is different from  component"
+                       " ATM NINST({}).".format(comp, iinst, default_ninst))
 
         # ------------------------------------------------------
         # Setup multi-instances for model components:
@@ -169,51 +142,29 @@ def build_phase(self, sharedlib_only=False, model_only=False):
 
                 case_setup(self._case, test_mode=False, reset=True)
 
-        # Faking a bld can save the time code spend in building the model components
-        if fake_bld:
+        if FAKE_BUILD:
             logger.debug("PGN_INFO: FAKE Build")
             if not sharedlib_only:
                 post_build(self._case, [])
         else:
-            # Build executable with multiple instances
             self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
 
-        # ----------------------------------------------------------------
-        # Namelist settings:
-        # ~~~~~~~~~~~~~~~~~~
-        # Do this already in build_phase so that we can check the xml and
-        # namelist files before job starts running.
-        # ----------------------------------------------------------------
         logger.debug("PGN_INFO: Updating user_nl_* files")
 
         csmdata_root = self._case.get_value("DIN_LOC_ROOT")
-        csmdata_atm = csmdata_root+"/atm/cam/inic/homme/ne4_v1_init/"
-        csmdata_lnd = csmdata_root+"/lnd/clm2/initdata/ne4_v1_init/b58d55680/"
+        csmdata_atm = os.path.join(csmdata_root, "atm/cam/inic/homme/ne4_v1_init")
+        csmdata_lnd = os.path.join(csmdata_root, "lnd/clm2/initdata/ne4_v1_init/b58d55680")
 
         iinst = 1
-        for icond in range(NINIT_COND):
-            icond_label_2digits = str(icond+1).zfill(2)
-            fatm_in = FILE_PREF_ATM + icond_label_2digits + FILE_SUF_ATM
-            flnd_in = FILE_PREF_LND + icond_label_2digits + FILE_SUF_LND
-            for iprt in PRT:
-                with open('user_nl_cam_'+str(iinst).zfill(4), 'w') as atmnlfile, \
-                        open('user_nl_clm_'+str(iinst).zfill(4), 'w') as lndnlfile:
-
-                    # atm/lnd intitial conditions
-
-                    # initial condition files to use for atm and land
-                    # atmnlfile.write("ncdata  = '/pic/projects/uq_climate/"
-                    #                 "wanh895/acme_input/ne4_v1_init/{}'"
-                    #                 " \n".format(fatm_in))
-                    # lndnlfile.write("finidat = '/pic/projects/uq_climate/"
-                    #                 "wanh895/acme_input/ne4_v1_init/{}'"
-                    #                 " \n".format(flnd_in))
-
-                    # uncomment the following when there files are on SVN server
-                    atmnlfile.write("ncdata  = '{}/{}' \n".format(csmdata_atm,
-                                                                  fatm_in))
-                    lndnlfile.write("finidat = '{}/{}' \n".format(csmdata_lnd,
-                                                                  flnd_in))
+        for icond in range(1, NINIT_COND+1):
+            fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('atm', 'i', icond))
+            flnd_in =  os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', icond))
+            for iprt in PERTURBATIONS.values():
+                with open('user_nl_cam_{:04d}'.format(iinst), 'w') as atmnlfile, \
+                        open('user_nl_clm_{:04d}'.format(iinst), 'w') as lndnlfile:
+
+                    atmnlfile.write("ncdata  = '{}' \n".format(fatm_in))
+                    lndnlfile.write("finidat = '{}' \n".format(flnd_in))
 
                     # atm model output
                     atmnlfile.write("avgflag_pertape = 'I' \n")
@@ -235,30 +186,11 @@ def build_phase(self, sharedlib_only=False, model_only=False):
 
                     iinst += 1
 
-        # --------------------------------
-        # Settings common to all instances
-        # --------------------------------
-
-        # Coupler settings which are applicable to ALL the instances (as there is
-        # only one coupler for all instances)
         self._case.set_value("STOP_N", "1")
         self._case.set_value("STOP_OPTION", "nsteps")
 
-    # ===========================================================
-    # Some user defined functions to avoid repeated calculations
-    # ===========================================================
-
-    def get_fname_wo_ext(self, rundir, casename, iinst):
-        """
-        construct file name given the input
-        """
-        # form file name withOUT extension
-        if casename != "":
-            casename += '.'
-        return os.path.join(rundir,
-                            '{}cam_{:04d}.h0.0001-01-01-00000'.format(casename,
-                                                                      iinst))
 
+    # FIXME: Can this be done during __init__? Only should be done once right?
     def get_var_list(self):
         """
         Get variable list for pergro specific output vars
@@ -274,116 +206,6 @@ def get_var_list(self):
 
         return map(str.strip, var_list)
 
-    def nc_write_handle(self, fname_nc, rmse_nc_var):
-        """
-        Opens and write netcdf file for PGE curves
-        This function is here purely to avoid duplicate 
-        codes so that it is easy to maintain code longterm        
-        """
-
-        fhndl = Dataset(fname_nc, 'w', format='NETCDF4')
-
-        var_list = self.get_var_list()
-        len_var_list = len(var_list)
-        nprt = len(PRT)
-
-        # create dims
-        fhndl.createDimension('ninit', NINIT_COND)
-        fhndl.createDimension('nprt', nprt)
-        fhndl.createDimension('nprt_m1', nprt-1)
-        fhndl.createDimension('nvars', len_var_list)
-
-        # create variables in the file
-        init_cond_nc = fhndl.createVariable('init_cond_fnames', 'S100', 'ninit')
-        prt_nc = fhndl.createVariable('perturb_strings', 'S10', 'nprt')
-        variables_nc = fhndl.createVariable('perturb_vnames', 'S20', 'nvars')
-        rmse_nc = fhndl.createVariable(rmse_nc_var, 'f8', ('ninit', 'nprt_m1', 'nvars'))
-
-        # assign variables for writing to the netcdf file
-        for iprt in range(nprt):
-            prt_nc[iprt] = PRTSTR[iprt]
-
-        for ivar in range(len_var_list):
-            variables_nc[ivar] = var_list[ivar]
-
-        for icond in range(NINIT_COND):
-            icond_label_2digits = '{:02d}'.format(icond+1)
-            init_cond_nc[icond] = FILE_PREF_ATM + icond_label_2digits + FILE_SUF_ATM
-
-        return fhndl, rmse_nc
-
-    def rmse_var(self, ifile_test, ifile_cntl,  var_list, var_suffix):
-        """
-        Compute RMSE difference between ifile_test and ifile_cntl for
-        variables listed in var_list
-        """
-
-        # ------------------ARGS-------------------------
-        # ifile_test: path of test file
-        # ifile_cntl: path of control file
-        # var_list  : List of all variables
-        # var_suffix: Suffix for var_list (e.g. t_, t_ qv_ etc.)
-        # -----------------------------------------------
-
-        # See if the files exists or not....
-        expect(os.path.isfile(ifile_test),
-               "ERROR: Test file {} does not exist".format(ifile_test))
-        expect(os.path.isfile(ifile_cntl), "ERROR: CNTL file "+ifile_cntl+" does not exist")
-
-        ftest = Dataset(ifile_test,  mode='r')
-        fcntl = Dataset(ifile_cntl, mode='r')
-
-        # if max RMSE/DIFF is to be printed, extract lat lons from a file
-        lat = ftest.variables['lat']
-        lon = ftest.variables['lon']
-
-        rmse = np.zeros(shape=(len(var_list)))
-        icntvar = 0
-
-        dims = len(ftest.variables[var_suffix+var_list[icntvar]].dimensions)
-        for ivar in var_list:
-            var = var_suffix+ivar
-            if var in ftest.variables:
-                vtest = ftest.variables[var.strip()][0, ...]  # first dimension is time (=0)
-                vcntl = fcntl.variables[var.strip()][0, ...]  # first dimension is time (=0)
-
-                # reshape for RMSE
-                if dims == 3:  # see if it is SE grid
-                    nx, ny = vtest.shape  # shape will be same for both arrays
-                    nz = 1
-                else:
-                    nx, ny, nz = vtest.shape  # shape will be same for both arrays
-
-                rmse[icntvar] = sqrt(mean_squared_error(
-                        vtest.reshape((nx, ny*nz)), vcntl.reshape((nx, ny*nz))))
-
-                if INDEX:
-                    # NEED to work on formatting this....
-                    diff_arr = abs(vtest[...] - vcntl[...])
-                    max_diff = np.amax(diff_arr)
-                    ind_max = np.unravel_index(diff_arr.argmax(),
-                                               diff_arr.shape)
-                    print(ind_max)
-                    print("{} {} {} {} {} {} {}".format(var,
-                                                        max_diff,
-                                                        vtest[ind_max],
-                                                        vcntl[ind_max],
-                                                        lat[ind_max[1]],
-                                                        lon[ind_max[1]],
-                                                        ind_max[0]))
-                    # print('{0:15}{1:15}{2:15}\n'.format(name, a, b))  # TRY THIS!!
-
-                # normalize by mean values of the field in the control case
-                rmse[icntvar] = rmse[icntvar]/np.mean(vcntl)
-                icntvar += 1
-                # vtest = None
-                # vcntl = None
-                # var   = None
-
-        ftest.close()
-        fcntl.close()
-
-        return rmse
 
     def _compare_baseline(self):
         """
@@ -391,151 +213,72 @@ def _compare_baseline(self):
         compare PGE from the test simulation with the baseline 
         cloud
         """
-        # import time
-        # t0 = time.time()
-
-        logger.debug("PGN_INFO:BASELINE COMPARISON STARTS")
-
-        rundir = self._case.get_value("RUNDIR")
-        casename = self._case.get_value("CASE")
-
-        var_list = self.get_var_list()
-        len_var_list = len(var_list)
-        nprt = len(PRT)
-
-        # baseline directory names
-        base_root = self._case.get_value("BASELINE_ROOT")
-        base_comp = self._case.get_value("BASECMP_CASE")
-
-        # baseline directory is:base_root/base_comp
-        base_dir = os.path.join(base_root, base_comp)
-
-        # for test cases (new environment etc.)
-        logger.debug("PGN_INFO: Test case comparison...")
-
-        # ---------------------------------------------
-        # Write netcdf file for comparison
-        # ---------------------------------------------
-        fcomp_nc = 'comp_cld.nc'
-        fcomp_cld, comp_rmse_nc = self.nc_write_handle(fcomp_nc, 'comp_rmse')
-
-        iinst = 0
-        for icond in range(NINIT_COND):
-            iinst += 1
-            iprt = 0
-            ifile_cntl = os.path.join(base_dir,
-                                      '{}_woprt.nc'.format(
-                                              self.get_fname_wo_ext('', '', iinst))
-                                      )
-            expect(os.path.isfile(ifile_cntl), "ERROR: File "+ifile_cntl+" does not exist")
-            logger.debug("PGN_INFO:CNTL_TST:"+ifile_cntl)
-
-            for aprt in PRTSTR[1:]:
-                iinst += 1
-                ifile_test = os.path.join(rundir, '{}_{}.nc'.format(
-                        self.get_fname_wo_ext(rundir, casename, iinst),
-                        aprt))
-                expect(os.path.isfile(ifile_test),
-                       "ERROR: File {} does not exist".format(ifile_test))
-                comp_rmse_nc[icond, iprt, 0:len_var_list] = self.rmse_var(
-                        ifile_test, ifile_cntl,  var_list, 't_')
-                logger.debug("PGN_INFO:Compared to TEST_TST:{}".format(ifile_test))
-
-                iprt += 1
-
-        # --------------------------------------------
-        # Student's t-test
-        # -------------------------------------------
-
-        # Extract last element of each PGE curve of the cloud
-        path_cld_nc = os.path.join(base_dir, FCLD_NC)
-        expect(os.path.isfile(path_cld_nc),
-               "ERROR: {} does not exist at:{}".format(FCLD_NC, path_cld_nc))
-
-        fcld = Dataset(path_cld_nc, 'r', format='NETCDF4')
-
-        # Get dimentions of cloud PGE curves
-        ninic_cld = fcld.variables['cld_rmse'].shape[0]
-        nprt_cld = fcld.variables['cld_rmse'].shape[1]
-        nvar_cld = fcld.variables['cld_rmse'].shape[2]
-
-        expect(ninic_cld == NINIT_COND,
-               "BASELINE COMPARISON: Number of initial conditions should be same:" 
-               "inic cld:{}  inic comp:{}".format(ninic_cld, NINIT_COND))
-
-        expect(nprt_cld == nprt-1,
-               "BASELINE COMPARISON: Number of perturbations should be same:"
-               "prt cld:{}  prt comp:{}".format(nprt_cld, nprt - 1))
-
-        expect(nvar_cld == len_var_list,
-               "BASELINE COMPARISON: Number of phys update variables should be same:"
-               "nvar cld:{}  nvar comp:{}".format(nvar_cld, len_var_list))
-
-        pgecld = fcld.variables['cld_rmse']
-
-        if DOPLOT:
-            import pylab as pl
-            # generate plot
-            ax = pl.gca()
-            for icond in range(NINIT_COND):
-                for iprt in range(nprt-1):
-                    ax.semilogy(pgecld[icond, iprt, :], color='b')
-                    ax.semilogy(comp_rmse_nc[icond, iprt, :], color='r')
-            pl.savefig('plot_comp.png')
-
-        pge_ends_cld = fcld.variables['cld_rmse'][:, :, len_var_list-1]
-        pge_ends_comp = comp_rmse_nc[:, 0:nprt-1, len_var_list-1]
-
-        # run the t-test
-        pge_ends_cld = pge_ends_cld.flatten()
-        pge_ends_comp = pge_ends_comp.flatten()
-
-        t_stat, p_val = stats.ttest_ind(pge_ends_cld, pge_ends_comp)
-
-        t_stat = abs(t_stat)  # only value of t_stat matters, not the sign
-
-        print("PGN_INFO: T value:"+str(t_stat))
-        print("PGN_INFO: P value:"+str(p_val))
-
-        logger.warn(" T value:"+str(t_stat))
-        logger.warn(" P value:"+str(p_val))
-
-        # There should be multiple criteria to determin pass/fail
-        # This part of decision making should be more polished
-        if t_stat > 3:
-            with self._test_status:
-                self._test_status.set_status(BASELINE_PHASE, TEST_FAIL_STATUS)
-            print('PGN_INFO:TEST FAIL')
-        else:
-            with self._test_status:
-                self._test_status.set_status(BASELINE_PHASE, TEST_PASS_STATUS)
-            print('PGN_INFO:TEST PASS')
-
-        # Close this file after you access "comp_rmse_nc" variable
-        fcomp_cld.close()
-        logger.debug("PGN_INFO: POST PROCESSING PHASE ENDS")
-
-        # t1 = time.time()
-        # print('time: '+str(t1-t0))
-    # =================================================================
-    # run_phase
-    # =================================================================
+        with self._test_status:
+            self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
+                                         CIME.test_status.TEST_FAIL_STATUS)
+
+            logger.debug("PGN_INFO:BASELINE COMPARISON STARTS")
+
+            run_dir = self._case.get_value("RUNDIR")
+            case_name = self._case.get_value("CASE")
+            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
+                                    self._case.get_value("BASECMP_CASE"))
+
+            var_list = self.get_var_list()
+            path_cld_nc = os.path.join(base_dir, FCLD_NC)
+
+            test_name = "{}".format(case_name.split('.')[-1])
+            evv_config = {
+                test_name: {
+                    "module": os.path.join(evv_lib_dir, "extensions", "pg.py"),
+                    "test-case": case_name,
+                    "test-name": "Test",
+                    "test-dir": run_dir,
+                    "ref-name": "Baseline",
+                    "ref-dir": base_dir,
+                    "variables": var_list,
+                    "perturbations": PERTURBATIONS,
+                    "pge-cld": FCLD_NC,
+                    "ninit": NINIT_COND,
+                    "init-file-template": INIT_COND_FILE_TEMPLATE,
+                    "instance-file-template": INSTANCE_FILE_TEMPLATE,
+
+                }
+            }
+
+            json_file = os.path.join(run_dir, '.'.join([case_name, 'json']))
+            with open(json_file, 'w') as config_file:
+                json.dump(evv_config, config_file, indent=4)
+
+            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
+            evv(['-e', json_file, '-o', evv_out_dir])
+
+            with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f:
+                evv_status = json.load(evv_f)
+
+            for evv_elem in evv_status['Data']['Elements']:
+                # FIXME: TableTitle!
+                if evv_elem['Type'] == 'ValSummary' \
+                        and evv_elem['TableTitle'] == 'Peturbation-Growth':
+                    if evv_elem['Data'][test_name]['']['Ensembles'] == 'identical':
+                        self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
+                                                     CIME.test_status.TEST_PASS_STATUS)
+                        break
+
+    # FIXME: REFACTOR
     def run_phase(self):
         logger.debug("PGN_INFO: RUN PHASE")
 
         self.run_indv()
 
-        # cwd = os.getcwd()
-
         # Here were are in case directory, we need to go to the run directory
         # and rename files
         rundir = self._case.get_value("RUNDIR")
         casename = self._case.get_value("CASE")
         logger.debug("PGN_INFO: Case name is:{}".format(casename))
 
-        iinst = 1
         for icond in range(NINIT_COND):
-            for iprt in range(len(PRT)):
+            for iprt, (prt, prt_name) in enumerate(PERTURBATIONS.items()):
 
                 # ------------------------------------------------------
                 # SANITY CHECK - To confirm that history file extension
@@ -543,6 +286,7 @@ def run_phase(self):
                 # corresponds to the right perturbation
                 # ------------------------------------------------------
                 # find corresponding atm_in_*
+                iinst = _sub2instance(icond, iprt)
                 fatm_in = os.path.join(rundir, 'atm_in_{:04d}'.format(iinst))
                 # see if atm_in_* file has pertlim same as PRT[iprt] (sanity check)
                 found = False
@@ -552,44 +296,36 @@ def run_phase(self):
                         if line.find('pertlim') > 0:
                             found = True
                             prtval = float(line.split('=')[1])
-                            expect(prtval == PRT[iprt],
+                            expect(prtval == prt,
                                    "ERROR: prtval doesn't match, "
                                    "prtval:{}; prt[{}]:{}".format(
-                                           prtval, iprt, PRT[iprt]))
+                                           prtval, iprt, prt))
                             logger.debug("PGN_INFO:prtval:{}; prt[{}]:{}".format(
-                                    prtval, iprt, PRT[iprt]))
+                                    prtval, iprt, prt))
 
                 if not found:
-                    expect(prtval == PRT[iprt],
+                    expect(prtval == prt,
                            "ERROR: default prtval doesn't match, "
-                           "prtval:{}; prt[{}]:{}".format(prtval, iprt, PRT[iprt]))
+                           "prtval:{}; prt[{}]:{}".format(prtval, iprt, prt))
                     logger.debug("PGN_INFO:def prtval:{}; prt[{}]:{}".format(
-                            prtval, iprt, PRT[iprt]))
+                            prtval, iprt, prt))
 
                 # ---------------------------------------------------------
                 # Rename file
                 # ---------------------------------------------------------
+                fname = os.path.join(rundir, INSTANCE_FILE_TEMPLATE.format(casename, iinst))
+                renamed_fname = re.sub(r'\.nc$', '_{}.nc'.format(prt_name), fname)
 
-                # form file name
-                fname = self.get_fname_wo_ext(rundir, casename, iinst)  # NOTE:without extension
                 logger.debug("PGN_INFO: fname to rename:{}".format(fname))
-
-                renamed_fname = '{}_{}.nc'.format(fname, PRTSTR[iprt])
-                fname_w_ext = '{}.nc'.format(fname)  # add extention
-
-                # see if file exists
-                if os.path.isfile(fname_w_ext):
-                    # rename file
-                    shutil.move(fname_w_ext, renamed_fname)
-                    logger.debug("PGN_INFO: Renamed file:{}".format(renamed_fname))
-                else:
+                logger.debug("PGN_INFO: Renamed file:{}".format(renamed_fname))
+                try:
+                    shutil.move(fname, renamed_fname)
+                except IOError:
                     expect(os.path.isfile(renamed_fname),
                            "ERROR: File {} does not exist".format(renamed_fname))
                     logger.debug("PGN_INFO: Renamed file already exists:"
                                  "{}".format(renamed_fname))
 
-                iinst += 1
-
         # cloud generation
 
         logger.debug("PGN_INFO: cloud generation-gen base")
@@ -612,23 +348,24 @@ def run_phase(self):
         iinst = 0
         for icond in range(NINIT_COND):
             iinst += 1
-            iprt = 0
             ifile_cntl = os.path.join(rundir, '{}_{}.nc'.format(
-                    self.get_fname_wo_ext('', casename, iinst), PRTSTR[iprt]))
+                    self.get_fname_wo_ext('', casename, iinst), PERTURBATIONS))
             expect(os.path.isfile(ifile_cntl),
                    "ERROR: File {} does not exist".format(ifile_cntl))
             logger.debug("PGN_INFO:CNTL_CLD:{}".format(ifile_cntl))
 
-            for aprt in PRTSTR[1:]:
+            for iprt, prt_name in enumerate(PERTURBATIONS):
+                if prt_name == 'woprt':
+                    continue
                 iinst += 1
-                # subtracted 1 as we will only get two curves for each inic
-                # (wo-pos and wo-neg)
-                iprt = PRTSTR.index(aprt) - 1
+
                 ifile_test = os.path.join(rundir, '{}_{}.nc'.format(
-                        self.get_fname_wo_ext('', casename, iinst), aprt))
+                        self.get_fname_wo_ext('', casename, iinst), prt_name))
                 expect(os.path.isfile(ifile_test),
                        "ERROR: File {} does not exist".format(ifile_test))
-                cld_rmse_nc[icond, iprt, 0:len_var_list] = self.rmse_var(
+                # NOTE: iprt-1 as we will only get two curves for each inic
+                # (wo-pos and wo-neg)
+                cld_rmse_nc[icond, iprt-1, 0:len_var_list] = self.rmse_var(
                         ifile_test, ifile_cntl,  var_list, 't_')
                 logger.debug("PGN_INFO:Compared to CLD:{}".format(ifile_test))
 
@@ -659,12 +396,38 @@ def run_phase(self):
 # -----------------------------------------------------
 # DEBUG type 1 (DB1): Ensure that model produces BFB instances
 # -----------------------------------------------------
-# Replace the following lines in the script with the following updated values
-# PRT = [0.0, 0.0]  # DB1
-# PRTSTR = ['woprt', 'posprt']
+# Replace PERTURBATIONS dictionary at the top of the script with the following:
+# PERTURBATIONS = OrderedDict([('woprt', 0.0), ('posprt', 0.0)])
 
 # Comment out all namelist changes so that all instances have same namelist values
 # For testing effect of namelist changes, uncomment namelist changes such that
 # namelists are same for all instances
 
 # Comment out sanity checks as well if needed....
+
+
+def _instance2sub(instance_number):
+    """
+    Converts an instance number (ii) to initial condition index (ci) and
+    perturbation index (pi)  subscripts
+
+    instances use 1-based indexes and vary according to this function:
+        ii = ci * len(PERTURBATIONS) + pi + 1
+    where both pi and ci use 0-based indexes.
+    """
+    perturbation_index = (instance_number - 1) % len(PERTURBATIONS)
+    initial_condition = (instance_number - 1 - perturbation_index) // len(PERTURBATIONS)
+    return initial_condition, perturbation_index
+
+
+def _sub2instance(initial_condition, perturbation_index):
+    """
+    Converts initial condition index (ci) and perturbation index (pi) subscripts
+    to an instance number (ii)
+
+    instances use 1-based indexes and vary according to this function:
+        ii = ci * len(PERTURBATIONS) + pi + 1
+    where both pi and ci use 0-based indexes.
+    """
+    instance = initial_condition * len(PERTURBATIONS) + perturbation_index + 1
+    return instance

From 488d71e9f3adc8b2dfef1187bbfac6481061bb01 Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Tue, 6 Aug 2019 20:50:57 -0400
Subject: [PATCH 5/8] Finished PGN refactor

---
 cime/scripts/lib/CIME/SystemTests/pgn.py | 241 +++++------------------
 1 file changed, 44 insertions(+), 197 deletions(-)

diff --git a/cime/scripts/lib/CIME/SystemTests/pgn.py b/cime/scripts/lib/CIME/SystemTests/pgn.py
index 1c7b07d82757..52f274bbee41 100644
--- a/cime/scripts/lib/CIME/SystemTests/pgn.py
+++ b/cime/scripts/lib/CIME/SystemTests/pgn.py
@@ -15,74 +15,33 @@
 import shutil
 import logging
 
+import pandas as pd
+import numpy as np
 from collections import OrderedDict
 
 import CIME.test_status
 from CIME.SystemTests.system_tests_common import SystemTestsCommon
 from CIME.case.case_setup import case_setup
-from CIME.build import post_build
 from CIME.utils import expect
 
 import evv4esm  # pylint: disable=import-error
+from evv4esm.extensions import pg  # pylint: disable=import-error
 from evv4esm.__main__ import main as evv  # pylint: disable=import-error
 
 evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__))
 
-# Logic for PGN ensemble runs:
-# ----------------------------
-# We need two inputs:
-# A. Number of inic cond files
-# B. perturbations (e.g. currently we have: without prt, pos prt and neg prt)
-
-# Based off the above, we compute number of instances to have (A * B)
-# Build phase               : Change the user_nl* files to add perturbations and
-#                             other flags
-# Run phase                 : Rename history files
-# Baselines generation phase: Compute cloud, store in netcdf file, copy netcdf
-#                             file to baseline folder
-# Baseline Comparison phase : Compare against baseline cloud to know pass/fail,
-#                             and plot (optional)
-
-
-# TODO:
-# 1. The loop to change user_nl* files is run twice as we call build again after
-#    changing ninst, which changes ntasks
-# 2. Do we want to remove user_nl_cam, user_nl_clm etc. files as they are not
-#    used in the simulation?
-# 3. change code so that pergro_ptend_names.txt is not generated if it is
-#    already there or only one instance writes this file...
-# 4. Plot generation is very basic at this point(no labels, legends etc.),
-#    improve it!
-# 5. Decision making about PASS/FAIL should have multiple criteria
-
 logger = logging.getLogger(__name__)
 
-# --------------------------------------------------------
-# Variables which needs global scope for various functions
-# --------------------------------------------------------
-# number of initial conditions
-NINIT_COND = 6  # 12
-# perturbations for runs
+NUMBER_INITIAL_CONDITIONS = 6
 PERTURBATIONS = OrderedDict([('woprt', 0.0),
                              ('posprt', 1.0e-14),
                              ('negprt', -1.0e-14),
                              ])
-# file name for file containing PGE cloud
 FCLD_NC = 'cam.h0.cloud.nc'
-# For preparing paths for namelist files for initial condition files
 INIT_COND_FILE_TEMPLATE = \
     "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.{}.{}.0002-{:02d}-01-00000.nc"
-# For finding the instance files
 # FIXME: should 'cam' be 'atm' now?
-INSTANCE_FILE_TEMPLATE = '{}.cam_{:04d}.h0.0001-01-01-00000.nc'
-# ------------------------------------------------------------
-# Some flags for debugging or invoking extra features
-# ------------------------------------------------------------
-# prints out max rmse diffs if set to True
-INDEX = False
-# Building the model can take a significant amount of time. Setting fake_bld to
-# True can save that time
-FAKE_BUILD = False
+INSTANCE_FILE_TEMPLATE = '{}cam_{:04d}.h0.0001-01-01-00000{}.nc'
 
 
 class PGN(SystemTestsCommon):
@@ -93,10 +52,8 @@ def __init__(self, case):
         """
         super(PGN, self).__init__(self, case)
 
-
     def build_phase(self, sharedlib_only=False, model_only=False):
-
-        ninst = NINIT_COND * len(PERTURBATIONS)
+        ninst = NUMBER_INITIAL_CONDITIONS * len(PERTURBATIONS)
         logger.debug('PGN_INFO: number of instance: '+str(ninst))
 
         # Find number of instance in the default setup
@@ -142,12 +99,7 @@ def build_phase(self, sharedlib_only=False, model_only=False):
 
                 case_setup(self._case, test_mode=False, reset=True)
 
-        if FAKE_BUILD:
-            logger.debug("PGN_INFO: FAKE Build")
-            if not sharedlib_only:
-                post_build(self._case, [])
-        else:
-            self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
+        self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
 
         logger.debug("PGN_INFO: Updating user_nl_* files")
 
@@ -156,9 +108,9 @@ def build_phase(self, sharedlib_only=False, model_only=False):
         csmdata_lnd = os.path.join(csmdata_root, "lnd/clm2/initdata/ne4_v1_init/b58d55680")
 
         iinst = 1
-        for icond in range(1, NINIT_COND+1):
+        for icond in range(1, NUMBER_INITIAL_CONDITIONS + 1):
             fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('atm', 'i', icond))
-            flnd_in =  os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', icond))
+            flnd_in = os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', icond))
             for iprt in PERTURBATIONS.values():
                 with open('user_nl_cam_{:04d}'.format(iinst), 'w') as atmnlfile, \
                         open('user_nl_clm_{:04d}'.format(iinst), 'w') as lndnlfile:
@@ -189,8 +141,6 @@ def build_phase(self, sharedlib_only=False, model_only=False):
         self._case.set_value("STOP_N", "1")
         self._case.set_value("STOP_OPTION", "nsteps")
 
-
-    # FIXME: Can this be done during __init__? Only should be done once right?
     def get_var_list(self):
         """
         Get variable list for pergro specific output vars
@@ -204,8 +154,7 @@ def get_var_list(self):
         with open(var_file, 'r') as fvar:
             var_list = fvar.readlines()
 
-        return map(str.strip, var_list)
-
+        return list(map(str.strip, var_list))
 
     def _compare_baseline(self):
         """
@@ -225,7 +174,6 @@ def _compare_baseline(self):
                                     self._case.get_value("BASECMP_CASE"))
 
             var_list = self.get_var_list()
-            path_cld_nc = os.path.join(base_dir, FCLD_NC)
 
             test_name = "{}".format(case_name.split('.')[-1])
             evv_config = {
@@ -239,7 +187,7 @@ def _compare_baseline(self):
                     "variables": var_list,
                     "perturbations": PERTURBATIONS,
                     "pge-cld": FCLD_NC,
-                    "ninit": NINIT_COND,
+                    "ninit": NUMBER_INITIAL_CONDITIONS,
                     "init-file-template": INIT_COND_FILE_TEMPLATE,
                     "instance-file-template": INSTANCE_FILE_TEMPLATE,
 
@@ -257,15 +205,13 @@ def _compare_baseline(self):
                 evv_status = json.load(evv_f)
 
             for evv_elem in evv_status['Data']['Elements']:
-                # FIXME: TableTitle!
                 if evv_elem['Type'] == 'ValSummary' \
-                        and evv_elem['TableTitle'] == 'Peturbation-Growth':
-                    if evv_elem['Data'][test_name]['']['Ensembles'] == 'identical':
+                        and evv_elem['TableTitle'] == 'Perturbation growth test':
+                    if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass':
                         self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
                                                      CIME.test_status.TEST_PASS_STATUS)
                         break
 
-    # FIXME: REFACTOR
     def run_phase(self):
         logger.debug("PGN_INFO: RUN PHASE")
 
@@ -277,43 +223,10 @@ def run_phase(self):
         casename = self._case.get_value("CASE")
         logger.debug("PGN_INFO: Case name is:{}".format(casename))
 
-        for icond in range(NINIT_COND):
+        for icond in range(NUMBER_INITIAL_CONDITIONS):
             for iprt, (prt, prt_name) in enumerate(PERTURBATIONS.items()):
-
-                # ------------------------------------------------------
-                # SANITY CHECK - To confirm that history file extension
-                # ~~~~~~~~~~~~
-                # corresponds to the right perturbation
-                # ------------------------------------------------------
-                # find corresponding atm_in_*
-                iinst = _sub2instance(icond, iprt)
-                fatm_in = os.path.join(rundir, 'atm_in_{:04d}'.format(iinst))
-                # see if atm_in_* file has pertlim same as PRT[iprt] (sanity check)
-                found = False
-                prtval = 0.0
-                with open(fatm_in) as atmfile:
-                    for line in atmfile:
-                        if line.find('pertlim') > 0:
-                            found = True
-                            prtval = float(line.split('=')[1])
-                            expect(prtval == prt,
-                                   "ERROR: prtval doesn't match, "
-                                   "prtval:{}; prt[{}]:{}".format(
-                                           prtval, iprt, prt))
-                            logger.debug("PGN_INFO:prtval:{}; prt[{}]:{}".format(
-                                    prtval, iprt, prt))
-
-                if not found:
-                    expect(prtval == prt,
-                           "ERROR: default prtval doesn't match, "
-                           "prtval:{}; prt[{}]:{}".format(prtval, iprt, prt))
-                    logger.debug("PGN_INFO:def prtval:{}; prt[{}]:{}".format(
-                            prtval, iprt, prt))
-
-                # ---------------------------------------------------------
-                # Rename file
-                # ---------------------------------------------------------
-                fname = os.path.join(rundir, INSTANCE_FILE_TEMPLATE.format(casename, iinst))
+                iinst = pg._sub2instance(icond, iprt, len(PERTURBATIONS))
+                fname = os.path.join(rundir, INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst, ''))
                 renamed_fname = re.sub(r'\.nc$', '_{}.nc'.format(prt_name), fname)
 
                 logger.debug("PGN_INFO: fname to rename:{}".format(fname))
@@ -326,108 +239,42 @@ def run_phase(self):
                     logger.debug("PGN_INFO: Renamed file already exists:"
                                  "{}".format(renamed_fname))
 
-        # cloud generation
-
-        logger.debug("PGN_INFO: cloud generation-gen base")
-
-        var_list = self.get_var_list()
-        len_var_list = len(var_list)
-        # nprt = len(PRT)
-
-        # for trusted cloud sims
-        logger.debug("PGN_INFO: Computing cloud")
+        logger.debug("PGN_INFO: RUN PHASE ENDS")
 
-        # cld_res = np.empty([len_var_list])
+    def _generate_baseline(self):
+        super(PGN, self)._generate_baseline()
 
-        # ---------------------------------------------
-        # Write netcdf file for cloud in rundir
-        # ---------------------------------------------
-        os.chdir(rundir)
-        fcld, cld_rmse_nc = self.nc_write_handle(FCLD_NC, 'cld_rmse')
+        base_root = self._case.get_value("BASELINE_ROOT")
+        base_gen = self._case.get_value("BASEGEN_CASE")
+        base_dir = os.path.join(base_root, base_gen)
 
-        iinst = 0
-        for icond in range(NINIT_COND):
-            iinst += 1
-            ifile_cntl = os.path.join(rundir, '{}_{}.nc'.format(
-                    self.get_fname_wo_ext('', casename, iinst), PERTURBATIONS))
-            expect(os.path.isfile(ifile_cntl),
-                   "ERROR: File {} does not exist".format(ifile_cntl))
-            logger.debug("PGN_INFO:CNTL_CLD:{}".format(ifile_cntl))
+        rundir = self._case.get_value("RUNDIR")
+        casename = self._case.get_value("CASE")
 
+        var_list = self.get_var_list()
+        nvar = len(var_list)
+        nprt = len(PERTURBATIONS)
+        rmse_prototype = {}
+        for icond in range(NUMBER_INITIAL_CONDITIONS):
+            prt_rmse = {}
             for iprt, prt_name in enumerate(PERTURBATIONS):
                 if prt_name == 'woprt':
                     continue
-                iinst += 1
-
-                ifile_test = os.path.join(rundir, '{}_{}.nc'.format(
-                        self.get_fname_wo_ext('', casename, iinst), prt_name))
-                expect(os.path.isfile(ifile_test),
-                       "ERROR: File {} does not exist".format(ifile_test))
-                # NOTE: iprt-1 as we will only get two curves for each inic
-                # (wo-pos and wo-neg)
-                cld_rmse_nc[icond, iprt-1, 0:len_var_list] = self.rmse_var(
-                        ifile_test, ifile_cntl,  var_list, 't_')
-                logger.debug("PGN_INFO:Compared to CLD:{}".format(ifile_test))
-
-        fcld.close()
-
-        if self._case.get_value("GENERATE_BASELINE"):
-
-            # baseline directory names
-            base_root = self._case.get_value("BASELINE_ROOT")
-            base_gen = self._case.get_value("BASEGEN_CASE")
-
-            # baseline directory is:base_root/base_gen
-            base_dir = os.path.join(base_root, base_gen)
-
-            # first copy files to the baseline directory
-            self._generate_baseline()  # BALLI-CHECK IF THIS IS OKAY
-
-            # copy cloud.nc file to baseline directory
-            logger.debug("PGN_INFO:copy:{} to {}".format(FCLD_NC, base_dir))
-            shutil.copy(FCLD_NC, base_dir)
-
-        logger.debug("PGN_INFO: RUN PHASE ENDS")
-
-# =====================================================
-# Debugging:
-# =====================================================
-
-# -----------------------------------------------------
-# DEBUG type 1 (DB1): Ensure that model produces BFB instances
-# -----------------------------------------------------
-# Replace PERTURBATIONS dictionary at the top of the script with the following:
-# PERTURBATIONS = OrderedDict([('woprt', 0.0), ('posprt', 0.0)])
-
-# Comment out all namelist changes so that all instances have same namelist values
-# For testing effect of namelist changes, uncomment namelist changes such that
-# namelists are same for all instances
-
-# Comment out sanity checks as well if needed....
-
-
-def _instance2sub(instance_number):
-    """
-    Converts an instance number (ii) to initial condition index (ci) and
-    perturbation index (pi)  subscripts
+                iinst_ctrl = pg._sub2instance(icond, 0, nprt)
+                ifile_ctrl = os.path.join(rundir,
+                                          INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst_ctrl, '_woprt'))
 
-    instances use 1-based indexes and vary according to this function:
-        ii = ci * len(PERTURBATIONS) + pi + 1
-    where both pi and ci use 0-based indexes.
-    """
-    perturbation_index = (instance_number - 1) % len(PERTURBATIONS)
-    initial_condition = (instance_number - 1 - perturbation_index) // len(PERTURBATIONS)
-    return initial_condition, perturbation_index
+                iinst_test = pg._sub2instance(icond, iprt, nprt)
+                ifile_test = os.path.join(rundir,
+                                          INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst_test, '_' + prt_name))
 
+                prt_rmse[prt_name] = pg.variables_rmse(ifile_test, ifile_ctrl, var_list, 't_')
+            rmse_prototype[icond] = pd.concat(prt_rmse)
+        rmse = pd.concat(rmse_prototype)
+        cld_rmse = np.reshape(rmse.RMSE.values, (NUMBER_INITIAL_CONDITIONS, nprt - 1, nvar))
 
-def _sub2instance(initial_condition, perturbation_index):
-    """
-    Converts initial condition index (ci) and perturbation index (pi) subscripts
-    to an instance number (ii)
+        pg.rmse_writer(os.path.join(rundir, FCLD_NC),
+                       cld_rmse, PERTURBATIONS.keys(), var_list, INIT_COND_FILE_TEMPLATE)
 
-    instances use 1-based indexes and vary according to this function:
-        ii = ci * len(PERTURBATIONS) + pi + 1
-    where both pi and ci use 0-based indexes.
-    """
-    instance = initial_condition * len(PERTURBATIONS) + perturbation_index + 1
-    return instance
+        logger.debug("PGN_INFO:copy:{} to {}".format(FCLD_NC, base_dir))
+        shutil.copy(os.path.join(rundir, FCLD_NC), base_dir)

From de3b0470a609401395cee41339b9427465908a20 Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Wed, 7 Aug 2019 09:08:04 -0400
Subject: [PATCH 6/8] Remove unneeded requirements.txt b/c of conda package

---
 cime/scripts/climate_reproducibility/requirements.txt | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 cime/scripts/climate_reproducibility/requirements.txt

diff --git a/cime/scripts/climate_reproducibility/requirements.txt b/cime/scripts/climate_reproducibility/requirements.txt
deleted file mode 100644
index 5234819a3e2c..000000000000
--- a/cime/scripts/climate_reproducibility/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-six
-numpy
-scipy
-pandas
-netCDF4
-matplotlib
-evv4esm>=0.1.1
-livvkit>=2.1.6
-json_tricks==3.11.0

From 4c05816d7ebfce2b2974f4ae71179d915c83fa83 Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Thu, 15 Aug 2019 14:56:01 -0400
Subject: [PATCH 7/8] Refactor climate reproducibility ests for e3sm_simple env
 and EVV release

In order to operationalize the climate reproducibility tests (a CMDV-SM
project), they have been refactored to use EVV, which is provided in the
e3sm_simple environment. This commit:
* Refactors MVK test to not duplicate CIME.hist_utils.generate_baseline
  and update to work with latest EVV release
* Refactors TSC and PGN tests to use EVV for determining pass-fail and
  generating a detailed test website, uses the proper
  _generate_baseline method to generate baselines, and uses the proper
  _compare_baseline method to launch EVV
* Adds TSC as an expected exception in CIME.hist_utils._generate_baseline_impl
  because TSC renames all history files to run multiple times with
  different specified time steps
* Updates the climate reproducibility testing documentation
---
 .../scripts/climate_reproducibility/README.md | 152 ++++++------
 cime/scripts/lib/CIME/SystemTests/mvk.py      | 118 ++-------
 cime/scripts/lib/CIME/SystemTests/pgn.py      |  48 +---
 cime/scripts/lib/CIME/SystemTests/tsc.py      | 224 ++++++++++--------
 cime/scripts/lib/CIME/hist_utils.py           |   2 +-
 5 files changed, 233 insertions(+), 311 deletions(-)

diff --git a/cime/scripts/climate_reproducibility/README.md b/cime/scripts/climate_reproducibility/README.md
index d5bc5375d8e7..5d2ec104e58b 100644
--- a/cime/scripts/climate_reproducibility/README.md
+++ b/cime/scripts/climate_reproducibility/README.md
@@ -17,36 +17,64 @@ a comprehensive analysis of both a baseline climate and the currently produced c
 Through the CMDV Software project, we've provided a set of climate reproducibility tests to determine
 whether or not non-bit-for-bit (nb4b) model changes are climate changing. The current tests provided are:
 
-* **MVK** --  This tests the null hypothesis that the baseline (n) and modified (m) model Short Independent 
-  Simulation Ensembles (SISE) represent the same climate state, based on the equality of distribution 
-  of each variable's annual global average in the standard monthly model output between the two 
-  simulations. The (per variable) null hypothesis uses the non-parametric, two-sample (n and m) 
-  Kolmogorov-Smirnov test as the univariate test of of equality of distribution of global means.
+ * **MVK** --  This tests the null hypothesis that the baseline (n) and modified (m) model Short Independent 
+   Simulation Ensembles (SISE) represent the same climate state, based on the equality of distribution 
+   of each variable's annual global average in the standard monthly model output between the two 
+   simulations. The (per variable) null hypothesis uses the non-parametric, two-sample (n and m) 
+   Kolmogorov-Smirnov test as the univariate test of of equality of distribution of global means.
+  
+ * **PGN** -- This tests the null hypothesis that the reference (n) and modified (m) model
+   ensembles represent the same atmospheric state after each physics parameterization
+   is applied within a single time-step using the two-sample (n and m) T-test for equal
+   averages at a 95% confidence level. Ensembles are generated by repeating the
+   simulation for many initial conditions, with each initial condition subject to
+   multiple perturbations.
+ 
+ * **TSC** -- This tests the null hypothesis that the convergence of the time stepping error
+   for a set of key atmospheric variables is the same for a reference ensemble and
+   a test ensemble. Both the reference and test ensemble are generated with a
+   two-second time step, and for each variable the RMSD between each ensemble and
+   a truth ensemble, generated with a one-second time step, is calculated. At each 
+   10 second interval during the 10 minute long simulations, the difference
+   in the reference and test RMSDs for each variable, each ensemble member, and each
+   domain are calculated and these ΔRMSDs should be zero for identical climates. A
+   one sided (due to self convergence) Student's T Test is used to test the null
+   hypothesis that the ensemble mean ΔRMSD is statistically zero.
+ 
  
  
 Running the tests
 -----------------
 
 These tests are built into E3SM-CIME as system tests and will be launched using the `create_test` scripts. 
-*However*, since these tests use high level statistics, they have additional python dependencies which
+*However*, because these tests use high level statistics, they have additional python dependencies which
 need to be installed on your system and accessible via the compute nodes (if you're on a batch machine).
 Primarily, the statistical analysis of the climates is done through [EVV](https://github.com/LIVVkit/evv4esm) 
-(confluence page: [EVV](https://acme-climate.atlassian.net/wiki/spaces/EIDMG/pages/774177270/EVV)) which will
-generate a portable test website to describe the results (pass or fail) in detail (see the extended output 
-section below). The full set of additional requirements are listed in the `requirements.txt` file in this directory. To install these
-dependencies, make sure a python version `> 2.7` is loaded/installed, and then use `pip` like: 
+which will generate a portable test website to describe the results (pass or fail) in detail (see the extended output 
+section below). 
+
+For E3SM supported machines, the `e3sm_simple` conda environment is provided for these tests and includes the `EVV` 
+conda package. You can activate the `e3sm_simple` environment in the same way as `e3sm_unified` environment:   
 
 ```
-pip install --user -r requirements.txt
-``` 
+source <activate_path>/load_latest_e3sm_simple.sh
+```
+
+where `<activate_path>` is the machine-specific location of the activation script as described on this confluence page:
+
+https://acme-climate.atlassian.net/wiki/spaces/EIDMG/pages/780271950/Diagnostics+and+Analysis+Quickstart#DiagnosticsandAnalysisQuickstart-Accessingmetapackagesoftwarebyactivatingacondaenvironment
+
+If you don't have access to confluence or are unable to activate this environment for whatever reason, you can install
+your own `e3sm_simple` conda environment with this command (once you have anaconda/miniconda installed):  
+
+```
+conda create -n e3sm-simple -c conda-forge -c e3sm e3sm-simple
+```
 
-*NOTE: Work is underway to include all of these dependencies in the* `e3sm_unified` *conda environment,
-and allow CIME to optionally load and use this environment for these tests. Once this is done, you will
-not have to manage these dependencies yourself. If you run into problems with getting these dependencies
-working on your machine, please open an issue on E3SM's Github and tag @jhkennedy, or send 
-Joseph H. Kennedy <kennedyjh@ornl.gov> an email.*
+*NOTE: If you run into problems with getting this environment working on your machine, please open an issue on E3SM's 
+Github and tag @jhkennedy, or send Joseph H. Kennedy <kennedyjh@ornl.gov> an email.*
 
-After the dependencies are installed, change to the `$E3SM/cime/scripts` directory (where `$E3SM` is the 
+After you've activated the `e3sm_simple` environment, change to the `$E3SM/cime/scripts` directory (where `$E3SM` is the 
 directory containing E3SM). Then to run one of the tests, you will use the `create_test` script like normal. 
 To run the `MVK` test and generate a baseline, you would run a command like: 
 
@@ -56,58 +84,33 @@ To run the `MVK` test and generate a baseline, you would run a command like:
 
 And to compare to the baseline, you would run a command like: 
 
-
 ```
 ./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 -c --baseline-root "/PATH/TO/BASELINE" 
 ```
 
-Importantly, these tests run a 20 member ensemble for at least 13 months (using the last 12 for the 
+*NOTE: The MVK run a 20 member ensemble for at least 13 months (using the last 12 for the 
 statistical tests) and, depending on the machine, may take some fiddling to execute within a particular 
 queue's wallclock time limit. You may want to over-ride the requested walltime using `--walltime HH:MM:SS` 
-option to `create_test`. Additionally, if messing with the wallclock time isn't enough, you can adjust the 
-`STOP_N` and `RESUBMIT` setting for the tests in `$E3SM/cime/config/config_tests.xml`. For example, the
-MVK test is setup like:   
-
-```xml
-  <test NAME="MVK">
-    <DESC>climate reproducibility test using the multivariate K-S test</DESC>
-    <INFO_DBUG>1</INFO_DBUG>
-    <DOUT_S>FALSE</DOUT_S>
-    <CONTINUE_RUN>FALSE</CONTINUE_RUN>
-    <STOP_OPTION>nmonths</STOP_OPTION>
-    <STOP_N>2</STOP_N>
-    <REST_OPTION>$STOP_OPTION</REST_OPTION>
-    <REST_N>$STOP_N</REST_N>
-    <HIST_OPTION>$STOP_OPTION</HIST_OPTION>
-    <HIST_N>$STOP_N</HIST_N>
-    <RESUBMIT>6</RESUBMIT>
-  </test>
-```  
-
-which will submit a job to run for two months, stop, and then resubmit itself 6 more times (continuing from 
-where it left off), leading to 14 months of simulation time. These settings, combined with the 
-`--walltime 02:00:00` option, allowed this test to run successfully on OLCF's Titan machine. The full set of
-commands to run the MVK test used on titan are:
-
-*Install dependencies (only need to do once):*
-```
-module load python
-cd $E3SM/cime/scripts/climate_reproducibility
-python pip --user -r requirements.txt
-``` 
+option to `create_test`.* 
+
+The full set of commands to run the MVK test used on Cori are:
 
 *Generate a baseline:*
 ```
-module load python  # need python 2.7; default is 2.6
 cd $E3SM/cime/scripts
-./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 --baseline-root "${PROJWORK}/cli115/${USER}/baselines" -g --walltime 02:00:00
+
+source /global/project/projectdirs/acme/software/anaconda_envs/load_latest_e3sm_simple.sh
+
+./create_test MVK_PL.ne4_ne4.FC5AV1C-04P2 --baseline-root "${CSCRATCH}/baselines" --project acme -g -o --walltime 01:00:00
 ```
 
 *Compare to a baseline:*
 ```
-module load python  # need python 2.7; default is 2.6
 cd $E3SM/cime/scripts
-./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 --baseline-root "${PROJWORK}/cli115/${USER}/baselines" -c --walltime 02:00:00
+
+source /global/project/projectdirs/acme/software/anaconda_envs/load_latest_e3sm_simple.sh
+
+./create_test MVK_PL.ne4_ne4.FC5AV1C-04P2 --baseline-root "${CSCRATCH}/baselines" --project acme -c --walltime 01:00:00
 ```
 
 Test pass/fail and extended output
@@ -117,9 +120,9 @@ When you launch these tests, CIME will ouput the location of the case directory,
 something like this:
 
 ```
-# On titan:
-./create_test MVK_PL.ne4_oQU240.FC5AV1C-04P2 --baseline-root "${PROJWORK}/cli115/${USER}/baselines" -c --walltime 02:00:00
-    Creating test directory /ccs/home/$USER/acme_scratch/cli115/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID
+# On cori-knl:
+./create_test MVK_PL.ne4_ne4.FC5AV1C-04P2 --baseline-root "${CSCRATCH}/baselines" --project acme -c --walltime 01:00:00
+    Creating test directory /global/cscratch1/sd/${USER}/acme_scratch/cori-knl/MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9
 ```
 
 Let's call that directory `$CASE_DIR`. Once all the jobs are finished, navigate to that directory and 
@@ -127,9 +130,9 @@ you can `cat TestStatus` to determine if the test passed or failed by looking at
 
 ```
 cd $CASE_DIR
-view TestStatus
+cat TestStatus
     ...
-    PASS MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi BASELINE
+    PASS MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel BASELINE
     ...
 
 ```
@@ -138,8 +141,9 @@ To get some basic summary statistics about the test that was run, look in the ou
 submission for EVV's analysis:
 
  ```
-view MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJOBID
+view MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJOBID
     ...
+    2019-08-14 22:09:02 CASE.RUN HAS FINISHED 
     -------------------------------------------------------------------- 
                        ______  __      __ __      __                     
                       |  ____| \ \    / / \ \    / /                     
@@ -151,17 +155,18 @@ view MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJ
         Extended Verification and Validation for Earth System Models     
     -------------------------------------------------------------------- 
      
-      Current run: 2018-08-02 23:24:26 
-      User: kennedy 
-      OS Type: Linux 3.0.101-0.46.1_1.0502.8871-cray_gem_s 
-      Machine: titan-batch7 
+      Current run: 2019-08-14 21:51:32 
+      User: ${USER} 
+      OS Type: Linux 4.12.14-25.22_5.0.70-cray_ari_c 
+      Machine: nid06598 
        
     ------------------------------------------------------------------- 
      ----------------------------------------------------------------- 
        Beginning extensions test suite  
-     ----------------------------------------------------------------- 
+     -----------------------------------------------------------------
+
      
-        Kolmogorov-Smirnov Test: YYYYMMDD_HHMMSS_RANDOMID 
+        Kolmogorov-Smirnov test: YYYYMMDD_HHMMSS_RANDOMID 
           Variables analyzed: 378 
           Rejecting: 9 
           Critical value: 13.0 
@@ -173,7 +178,7 @@ view MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.test.oJ
      
     ------------------------------------------------------------------- 
      Done!  Results can be seen in a web browser at: 
-       /lustre/atlas/proj-shared/cli115/$USER/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID/run/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve/index.html 
+       /global/cscratch1/sd/${USER}/acme_scratch/cori-knl/MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9/run/MVK_PL.ne4_ne4.FC5AV1C-04P2.cori-knl_intel.G.20190807_140111_rhfxn9.evv/index.html 
     -------------------------------------------------------------------
     ...
 ```
@@ -191,11 +196,10 @@ protocol). You can view the website by either copying it to a hosted location (`
 local http server (included in python!) and viewing it through an address like `http://0.0.0.0:8000/index.html`.  
 
 **For the easiest viewing** we recommend copying the website to your local machine, and using EVV to 
-view it. you can install EVV locally by running this command (will work with both python and anaconda 
-environments):
+view it. you can install EVV locally by running this command:
 
 ```
-pip install evv4esm
+conda install evv4esm
 ``` 
 
 Then, copy the website to your local machine, and view it: 
@@ -204,7 +208,7 @@ Then, copy the website to your local machine, and view it:
 ```
 # on your local machine
 scp -r /lustre/atlas/proj-shared/cli115/$USER/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID/run/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve . 
-evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve -s
+evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.evv -s
     --------------------------------------------------------------------
                        ______  __      __ __      __                    
                       |  ____| \ \    / / \ \    / /                    
@@ -217,7 +221,7 @@ evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve -
     --------------------------------------------------------------------
     
       Current run: 2018-08-06 15:15:03
-      User: fjk
+      User: ${USER}
       OS Type: Linux 4.15.0-29-generic
       Machine: pc0101123
       
@@ -226,7 +230,7 @@ evv -o MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve -
     
     View the generated website by navigating to:
     
-        http://0.0.0.0:8000/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.eve/index.html
+        http://0.0.0.0:8000/MVK_PL.ne4_oQU240.FC5AV1C-04P2.titan_pgi.C.YYYYMMDD_HHMMSS_RANDOMID.evv/index.html
     
     Exit by pressing `ctrl+c` to send a keyboard interrupt.
     
diff --git a/cime/scripts/lib/CIME/SystemTests/mvk.py b/cime/scripts/lib/CIME/SystemTests/mvk.py
index c4a828ca98a3..c7d26309a060 100644
--- a/cime/scripts/lib/CIME/SystemTests/mvk.py
+++ b/cime/scripts/lib/CIME/SystemTests/mvk.py
@@ -7,19 +7,14 @@
 """
 
 import os
-import re
-import stat
 import json
-import shutil
 import logging
 
+import CIME.test_status
 from CIME.SystemTests.system_tests_common import SystemTestsCommon
 from CIME.case.case_setup import case_setup
-from CIME.hist_utils import _get_all_hist_files, BLESS_LOG_NAME
-from CIME.utils import append_testlog, get_current_commit, get_timestamp, get_model
-from CIME.utils import expect
-
-import CIME.test_status
+from CIME.hist_utils import _get_all_hist_files
+from CIME.utils import safe_copy, SharedArea
 
 import evv4esm  # pylint: disable=import-error
 from evv4esm.__main__ import main as evv  # pylint: disable=import-error
@@ -27,8 +22,7 @@
 evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__))
 logger = logging.getLogger(__name__)
 
-# Build executable with multiple instances
-ninst = 20
+NINST = 20
 
 
 class MVK(SystemTestsCommon):
@@ -45,7 +39,6 @@ def __init__(self, case):
         else:
             self._case.set_value("COMPARE_BASELINE", False)
 
-
     def build_phase(self, sharedlib_only=False, model_only=False):
         # Only want this to happen once. It will impact the sharedlib build
         # so it has to happen there.
@@ -56,9 +49,9 @@ def build_phase(self, sharedlib_only=False, model_only=False):
 
                 ntasks = self._case.get_value("NTASKS_{}".format(comp))
 
-                self._case.set_value('NTASKS_{}'.format(comp), ntasks*ninst)
+                self._case.set_value('NTASKS_{}'.format(comp), ntasks * NINST)
                 if comp != 'CPL':
-                    self._case.set_value('NINST_{}'.format(comp), ninst)
+                    self._case.set_value('NINST_{}'.format(comp), NINST)
 
             self._case.set_value('ATM_NCPL', 18)
 
@@ -68,99 +61,35 @@ def build_phase(self, sharedlib_only=False, model_only=False):
 
         self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
 
-        # =================================================================
-        # Run-time settings.
-        # Do this already in build_phase so that we can check the xml and
-        # namelist files before job starts running.
-        # =================================================================
-
-        # namelist specifications for each instance
-        for iinst in range(1, ninst+1):
+        for iinst in range(1, NINST + 1):
             with open('user_nl_cam_{:04d}'.format(iinst), 'w') as nl_atm_file:
                 nl_atm_file.write('new_random = .true.\n')
                 nl_atm_file.write('pertlim = 1.0e-10\n')
                 nl_atm_file.write('seed_custom = {}\n'.format(iinst))
 
-
     def _generate_baseline(self):
         """
         generate a new baseline case based on the current test
         """
-        with self._test_status:
-            # generate baseline
+        super(MVK, self)._generate_baseline()
 
-            # BEGIN: modified CIME.hist_utils.generate_baseline
-            rundir = self._case.get_value("RUNDIR")
+        with SharedArea():
             basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
                                        self._case.get_value("BASEGEN_CASE"))
-            testcase = self._case.get_value("CASE")
-
-            if not os.path.isdir(basegen_dir):
-                os.makedirs(basegen_dir)
-
-            if os.path.isdir(os.path.join(basegen_dir, testcase)):
-                expect(False, " Cowardly refusing to overwrite existing baseline directory")
 
-            comments = "Generating baselines into '{}'\n".format(basegen_dir)
-            num_gen = 0
-
-            model = 'atm'
-            comments += "  generating for model '{}'\n".format(model)
-            hists = _get_all_hist_files(testcase, model, rundir)
-            logger.debug("mvk_hist_files: {}".format(hists))
+            rundir = self._case.get_value("RUNDIR")
+            ref_case = self._case.get_value("RUN_REFCASE")
 
-            num_gen += len(hists)
+            model = 'cam'
+            hists = _get_all_hist_files(model, rundir, [r'h\d*.*\.nc'], ref_case=ref_case)
+            logger.debug("MVK additional baseline files: {}".format(hists))
             for hist in hists:
                 basename = hist[hist.rfind(model):]
                 baseline = os.path.join(basegen_dir, basename)
                 if os.path.exists(baseline):
                     os.remove(baseline)
 
-                shutil.copy(hist, baseline)
-                comments += "    generating baseline '{}' from file {}\n".format(baseline, hist)
-
-            newestcpllogfile = self._case.get_latest_cpl_log(coupler_log_path=self._case.get_value("LOGDIR"))
-            if newestcpllogfile is None:
-                logger.warning("No cpl.log file found in log directory {}".format(self._case.get_value("LOGDIR")))
-            else:
-                shutil.copyfile(newestcpllogfile,
-                                os.path.join(basegen_dir, "cpl.log.gz"))
-
-            expect(num_gen > 0, "Could not generate any hist files for case '{}', something is seriously wrong".format(
-                os.path.join(rundir, testcase)))
-            # make sure permissions are open in baseline directory
-            for root, _, files in os.walk(basegen_dir):
-                for name in files:
-                    try:
-                        os.chmod(os.path.join(root, name),
-                                 stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH)
-                    except OSError:
-                        # We tried. Not worth hard failure here.
-                        pass
-
-            if get_model() == "e3sm":
-                bless_log = os.path.join(basegen_dir, BLESS_LOG_NAME)
-                with open(bless_log, "a") as fd:
-                    fd.write("sha:{} date:{}\n".format(get_current_commit(repo=self._case.get_value("CIMEROOT")),
-                                                       get_timestamp(timestamp_format="%Y-%m-%d_%H:%M:%S")))
-            # END: modified CIME.hist_utils.generate_baseline
-
-            append_testlog(comments)
-            status = CIME.test_status.TEST_PASS_STATUS
-            baseline_name = self._case.get_value("BASEGEN_CASE")
-            self._test_status.set_status("{}".format(CIME.test_status.GENERATE_PHASE), status,
-                                         comments=os.path.dirname(baseline_name))
-            basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE"))
-            # copy latest cpl log to baseline
-            # drop the date so that the name is generic
-            newestcpllogfiles = self._get_latest_cpl_logs()
-            for cpllog in newestcpllogfiles:
-                m = re.search(r"/(cpl.*.log).*.gz", cpllog)
-                if m is not None:
-                    baselog = os.path.join(basegen_dir, m.group(1)) + ".gz"
-                    shutil.copyfile(cpllog,
-                                    os.path.join(basegen_dir, baselog))
-
+                safe_copy(hist, baseline, preserve_meta=False)
 
     def _compare_baseline(self):
         with self._test_status:
@@ -184,11 +113,12 @@ def _compare_baseline(self):
             evv_config = {
                 test_name: {
                     "module": os.path.join(evv_lib_dir, "extensions", "ks.py"),
-                    "case1": "Test",
-                    "dir1": run_dir,
-                    "case2": "Baseline",
-                    "dir2": base_dir,
-                    "ninst": ninst,
+                    "test-case": "Test",
+                    "test-dir": run_dir,
+                    "ref-case": "Baseline",
+                    "ref-dir": base_dir,
+                    "var-set": "default",
+                    "ninst": NINST,
                     "critical": 13
                 }
             }
@@ -200,13 +130,13 @@ def _compare_baseline(self):
             evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
             evv(['-e', json_file, '-o', evv_out_dir])
 
-            with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f:
+            with open(os.path.join(evv_out_dir, 'index.json')) as evv_f:
                 evv_status = json.load(evv_f)
 
             for evv_elem in evv_status['Data']['Elements']:
                 if evv_elem['Type'] == 'ValSummary' \
-                        and evv_elem['TableTitle'] == 'Kolmogorov-Smirnov':
-                    if evv_elem['Data'][test_name]['']['Ensembles'] == 'identical':
+                        and evv_elem['TableTitle'] == 'Kolmogorov-Smirnov test':
+                    if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass':
                         self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
                                                      CIME.test_status.TEST_PASS_STATUS)
                         break
diff --git a/cime/scripts/lib/CIME/SystemTests/pgn.py b/cime/scripts/lib/CIME/SystemTests/pgn.py
index 52f274bbee41..026c7a6e599b 100644
--- a/cime/scripts/lib/CIME/SystemTests/pgn.py
+++ b/cime/scripts/lib/CIME/SystemTests/pgn.py
@@ -50,36 +50,14 @@ def __init__(self, case):
         """
         initialize an object interface to the PGN test
         """
-        super(PGN, self).__init__(self, case)
+        super(PGN, self).__init__(case)
 
     def build_phase(self, sharedlib_only=False, model_only=False):
         ninst = NUMBER_INITIAL_CONDITIONS * len(PERTURBATIONS)
         logger.debug('PGN_INFO: number of instance: '+str(ninst))
 
-        # Find number of instance in the default setup
         default_ninst = self._case.get_value("NINST_ATM")
 
-        # Sanity check: see if NINST is same for all model components, otherwise
-        # exit with error
-        for comp in self._case.get_values("COMP_CLASSES"):
-            if comp != 'CPL':
-                iinst = self._case.get_value("NINST_{}".format(comp))
-                expect(default_ninst == iinst,
-                       "ERROR: component {}  NINST({}) is different from  component"
-                       " ATM NINST({}).".format(comp, iinst, default_ninst))
-
-        # ------------------------------------------------------
-        # Setup multi-instances for model components:
-        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-        # Set the model for multi-instance ONLY if NINST == 1
-        # for all model components. This is because, for
-        # NINST > 1 (e.g. rebuilding an old case) the following
-        # loop will increase the ntasks to a multiple of ninst
-        # (requiring a clean build again). We hit this issue if
-        # we launch ./case.build in the case directory of PGN
-        # test
-        # ------------------------------------------------------
-
         if default_ninst == 1:  # if multi-instance is not already set
             # Only want this to happen once. It will impact the sharedlib build
             # so it has to happen here.
@@ -109,7 +87,7 @@ def build_phase(self, sharedlib_only=False, model_only=False):
 
         iinst = 1
         for icond in range(1, NUMBER_INITIAL_CONDITIONS + 1):
-            fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('atm', 'i', icond))
+            fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('cam', 'i', icond))
             flnd_in = os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', icond))
             for iprt in PERTURBATIONS.values():
                 with open('user_nl_cam_{:04d}'.format(iinst), 'w') as atmnlfile, \
@@ -118,7 +96,6 @@ def build_phase(self, sharedlib_only=False, model_only=False):
                     atmnlfile.write("ncdata  = '{}' \n".format(fatm_in))
                     lndnlfile.write("finidat = '{}' \n".format(flnd_in))
 
-                    # atm model output
                     atmnlfile.write("avgflag_pertape = 'I' \n")
                     atmnlfile.write("nhtfrq = 1 \n")
                     atmnlfile.write("mfilt  = 2  \n")
@@ -126,13 +103,6 @@ def build_phase(self, sharedlib_only=False, model_only=False):
                     atmnlfile.write("pergro_mods  = .true. \n")
                     atmnlfile.write("pergro_test_active = .true. \n")
 
-                    # atmnlfile.write("empty_htapes = .true. \n")
-                    # atmnlfile.write("fincl1 = 'PS','U','V','T','Q','CLDLIQ',"
-                    #                 "'CLDICE','NUMLIQ','NUMICE','num_a1',"
-                    #                 "'num_a2','num_a3','LANDFRAC' \n")
-                    # atmnlfile.write("phys_debug_lat = 41.3495891345")
-                    # atmnlfile.write("phys_debug_lon = 45.0" )
-
                     if iprt != 0.0:
                         atmnlfile.write("pertlim = {} \n".format(iprt))
 
@@ -190,7 +160,6 @@ def _compare_baseline(self):
                     "ninit": NUMBER_INITIAL_CONDITIONS,
                     "init-file-template": INIT_COND_FILE_TEMPLATE,
                     "instance-file-template": INSTANCE_FILE_TEMPLATE,
-
                 }
             }
 
@@ -224,7 +193,7 @@ def run_phase(self):
         logger.debug("PGN_INFO: Case name is:{}".format(casename))
 
         for icond in range(NUMBER_INITIAL_CONDITIONS):
-            for iprt, (prt, prt_name) in enumerate(PERTURBATIONS.items()):
+            for iprt, (prt_name, prt_value) in enumerate(PERTURBATIONS.items()):
                 iinst = pg._sub2instance(icond, iprt, len(PERTURBATIONS))
                 fname = os.path.join(rundir, INSTANCE_FILE_TEMPLATE.format(casename + '.', iinst, ''))
                 renamed_fname = re.sub(r'\.nc$', '_{}.nc'.format(prt_name), fname)
@@ -244,9 +213,8 @@ def run_phase(self):
     def _generate_baseline(self):
         super(PGN, self)._generate_baseline()
 
-        base_root = self._case.get_value("BASELINE_ROOT")
-        base_gen = self._case.get_value("BASEGEN_CASE")
-        base_dir = os.path.join(base_root, base_gen)
+        basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
+                                   self._case.get_value("BASEGEN_CASE"))
 
         rundir = self._case.get_value("RUNDIR")
         casename = self._case.get_value("CASE")
@@ -274,7 +242,7 @@ def _generate_baseline(self):
         cld_rmse = np.reshape(rmse.RMSE.values, (NUMBER_INITIAL_CONDITIONS, nprt - 1, nvar))
 
         pg.rmse_writer(os.path.join(rundir, FCLD_NC),
-                       cld_rmse, PERTURBATIONS.keys(), var_list, INIT_COND_FILE_TEMPLATE)
+                       cld_rmse, list(PERTURBATIONS.keys()), var_list, INIT_COND_FILE_TEMPLATE)
 
-        logger.debug("PGN_INFO:copy:{} to {}".format(FCLD_NC, base_dir))
-        shutil.copy(os.path.join(rundir, FCLD_NC), base_dir)
+        logger.debug("PGN_INFO:copy:{} to {}".format(FCLD_NC, basegen_dir))
+        shutil.copy(os.path.join(rundir, FCLD_NC), basegen_dir)
diff --git a/cime/scripts/lib/CIME/SystemTests/tsc.py b/cime/scripts/lib/CIME/SystemTests/tsc.py
index 90be18488204..86e50f7eb26c 100644
--- a/cime/scripts/lib/CIME/SystemTests/tsc.py
+++ b/cime/scripts/lib/CIME/SystemTests/tsc.py
@@ -7,155 +7,175 @@
 This class inherits from SystemTestsCommon.
 """
 
-from CIME.XML.standard_module_setup import *
+import os
+import json
+import logging
+
+import CIME.test_status
 from CIME.SystemTests.system_tests_common import SystemTestsCommon
 from CIME.case.case_setup import case_setup
-from CIME.build import post_build
-from CIME.hist_utils import rename_all_hist_files
-from CIME.test_status import *
-#import CIME.utils
-#from CIME.check_lockedfiles import *
+from CIME.hist_utils import rename_all_hist_files, _get_all_hist_files
+from CIME.utils import safe_copy, SharedArea
+
+import evv4esm  # pylint: disable=import-error
+from evv4esm.__main__ import main as evv  # pylint: disable=import-error
+
+evv_lib_dir = os.path.abspath(os.path.dirname(evv4esm.__file__))
 
 logger = logging.getLogger(__name__)
 
-class TSC(SystemTestsCommon):
 
+NINST = 12
+SIM_LENGTH = 600  # seconds
+OUT_FREQ = 10  # seconds
+INSPECT_AT = [300, 450, 600]  # seconds
+INIT_COND_FILE_TEMPLATE = \
+    "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.{}.{}.0002-{:02d}-01-00000.nc"
+VAR_LIST = ["T", "Q", "V", "CLDLIQ", "CLDICE", "NUMLIQ", "NUMICE", "num_a1", "num_a2", "num_a3"]
+P_THRESHOLD = 0.005
+
+
+class TSC(SystemTestsCommon):
     def __init__(self, case):
         """
         initialize an object interface to the TSC test
         """
-        SystemTestsCommon.__init__(self, case)
+        super(TSC, self).__init__(case)
 
-    #=================================================================
-    # Compile model with multiple instances
-    #=================================================================
     def build_phase(self, sharedlib_only=False, model_only=False):
-
-        ninst = 12
-
-        #BSINGH: For debugging only - Building the model can take
-        #significant amount of time. Setting fake_bld to True can save
-        #that time
-        fake_bld = False
-
-        # Build exectuable with multiple instances
-        # (in the development phase we use 3)
-        # Lay all of the components out concurrently
-
         # Only want this to happen once. It will impact the sharedlib build
         # so it has to happen there.
         if not model_only:
             logging.warning("Starting to build multi-instance exe")
-            for comp in ['ATM','OCN','WAV','GLC','ICE','ROF','LND']:
-                ntasks = self._case.get_value("NTASKS_%s"%comp)
-                self._case.set_value("ROOTPE_%s"%comp, 0)
-                self._case.set_value("NINST_%s"%comp,  ninst)
-                self._case.set_value("NTASKS_%s"%comp, ntasks*ninst)
-
-            self._case.set_value("ROOTPE_CPL",0)
-            self._case.set_value("NTASKS_CPL",ntasks*ninst)
+            for comp in ['ATM', 'OCN', 'WAV', 'GLC', 'ICE', 'ROF', 'LND']:
+                ntasks = self._case.get_value("NTASKS_{}".format(comp))
+                self._case.set_value("ROOTPE_{}".format(comp), 0)
+                self._case.set_value("NINST_{}".format(comp),  NINST)
+                self._case.set_value("NTASKS_{}".format(comp), ntasks * NINST)
+
+            self._case.set_value("ROOTPE_CPL", 0)
+            self._case.set_value("NTASKS_CPL", ntasks * NINST)
             self._case.flush()
 
             case_setup(self._case, test_mode=False, reset=True)
 
-        #BSINGH: Faking a bld can save the time code spend in building the model components
-        if fake_bld:
-            if (not sharedlib_only):
-                post_build(self._case, [])
-        else:
-            self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
-
-    #=================================================================
-    # Conduct one multi-instance run with specified time step size.
-    # The complete run_phase (defined below) will contain two calls
-    # of this function using different dtime.
-    #=================================================================
-    def _run_with_specified_dtime(self,dtime=2):
-
-        # dtime is model time step size in seconds. Default is set to 2.
+        self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
 
-        # Change the coupling frequency accordingly
-        ncpl  = 86400/dtime
-        self._case.set_value("ATM_NCPL",ncpl)
+    def _run_with_specified_dtime(self, dtime=2):
+        """
+        Conduct one multi-instance run with a specified time step size.
 
-        # Simulation length = 10 minutes
-        nsteps = 600/dtime
-        self._case.set_value("STOP_N",     nsteps)
-        self._case.set_value("STOP_OPTION","nsteps")
+        :param dtime (int): Specified time step size in seconds
+        """
+        coupling_frequency = 86400 // dtime
+        self._case.set_value("ATM_NCPL", str(coupling_frequency))
 
-        # Write output every 10 seconds
-        nhtfrq = 10/dtime
+        nsteps = SIM_LENGTH // dtime
+        self._case.set_value("STOP_N", str(nsteps))
+        self._case.set_value("STOP_OPTION", "nsteps")
 
-        # generate paths/file names for initial conditons
         csmdata_root = self._case.get_value("DIN_LOC_ROOT")
-        csmdata_atm  = csmdata_root+"/atm/cam/inic/homme/ne4_v1_init/"
-        csmdata_lnd  = csmdata_root+"/lnd/clm2/initdata/ne4_v1_init/b58d55680/"
-        file_pref_atm = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.cam.i.0002-"
-        file_pref_lnd = "SMS_Ly5.ne4_ne4.FC5AV1C-04P2.eos_intel.ne45y.clm2.r.0002-"
-
-        file_suf_atm = "-01-00000.nc"
-        file_suf_lnd = "-01-00000.nc"
-
-        # user_nl_... files are modified for every instance.
-        # The number instances specified here has to be consistent with the
-        # value set in build_phase.
-        ninst = 12
-        for iinst in range(1, ninst+1):
+        csmdata_atm = os.path.join(csmdata_root, "atm/cam/inic/homme/ne4_v1_init")
+        csmdata_lnd = os.path.join(csmdata_root, "lnd/clm2/initdata/ne4_v1_init/b58d55680")
 
+        nstep_output = OUT_FREQ // dtime
+        for iinst in range(1, NINST+1):
             with open('user_nl_cam_'+str(iinst).zfill(4), 'w') as atmnlfile, \
                  open('user_nl_clm_'+str(iinst).zfill(4), 'w') as lndnlfile:
 
-                # atm/lnd intitial conditions
-
-                inst_label_2digits = str(iinst).zfill(2)
-
-                atmnlfile.write("ncdata  = '"+ csmdata_atm + "/" + file_pref_atm + inst_label_2digits + file_suf_atm+"' \n")
-                lndnlfile.write("finidat = '"+ csmdata_lnd + "/" + file_pref_lnd + inst_label_2digits + file_suf_lnd+"' \n")
+                fatm_in = os.path.join(csmdata_atm, INIT_COND_FILE_TEMPLATE.format('cam', 'i', iinst))
+                flnd_in = os.path.join(csmdata_lnd, INIT_COND_FILE_TEMPLATE.format('clm2', 'r', iinst))
+                atmnlfile.write("ncdata  = '{}' \n".format(fatm_in))
+                lndnlfile.write("finidat = '{}' \n".format(flnd_in))
 
-                # for initial testing on constance@pnnl
-                #atmnlfile.write("ncdata  = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + file_pref_atm + inst_label_2digits + file_suf_atm+"' \n")
-                #lndnlfile.write("finidat = '"+ "/pic/projects/uq_climate/wanh895/acme_input/ne4_v1_init/" + file_pref_lnd + inst_label_2digits + file_suf_lnd+"' \n")
+                lndnlfile.write("dtime = {} \n".format(dtime))
 
-                # time step sizes
-
-                atmnlfile.write("dtime = "+str(dtime)+" \n")
+                atmnlfile.write("dtime = {} \n".format(dtime))
                 atmnlfile.write("iradsw = 2 \n")
                 atmnlfile.write("iradlw = 2 \n")
 
-                lndnlfile.write("dtime = "+str(dtime)+" \n")
-
-                # atm model output
-
                 atmnlfile.write("avgflag_pertape = 'I' \n")
-                atmnlfile.write("nhtfrq = "+str(nhtfrq)+" \n")
-                atmnlfile.write("mfilt  = 1  \n")
-                atmnlfile.write("ndens  = 1  \n")
+                atmnlfile.write("nhtfrq = {} \n".format(nstep_output))
+                atmnlfile.write("mfilt = 1 \n")
+                atmnlfile.write("ndens = 1 \n")
                 atmnlfile.write("empty_htapes = .true. \n")
-                atmnlfile.write("fincl1 = 'PS','U','V','T','Q','CLDLIQ','CLDICE','NUMLIQ','NUMICE','num_a1','num_a2','num_a3','LANDFRAC' \n")
+                atmnlfile.write("fincl1 = 'PS','U','LANDFRAC',{} \n".format(
+                        ''.join(["'{}',".format(s) for s in VAR_LIST])[:-1]
+                ))
 
         # Force rebuild namelists
         self._skip_pnl = False
 
-        # Namelist settings done. Now run the model.
         self.run_indv()
 
-        # Append "DT000x" to the history file names
-        rename_all_hist_files( self._case, suffix="DT"+str(dtime).zfill(4) )
+        rename_all_hist_files(self._case, suffix="DT{:04d}".format(dtime))
 
-
-    #=================================================================
-    # run_phase
-    #=================================================================
     def run_phase(self):
-
-        # Run simulations with 2-s time step
         self._run_with_specified_dtime(dtime=2)
 
-        # Run simulations with 1-s time step. This is NOT needed
-        # for testing new code or new computing environment.
         if self._case.get_value("GENERATE_BASELINE"):
             self._run_with_specified_dtime(dtime=1)
+
     def _compare_baseline(self):
-        #currently faking it as PENDING
         with self._test_status as ts:
-            ts.set_status(BASELINE_PHASE, TEST_PEND_STATUS)
+            ts.set_status(CIME.test_status.BASELINE_PHASE,
+                          CIME.test_status.TEST_FAIL_STATUS)
+
+            run_dir = self._case.get_value("RUNDIR")
+            case_name = self._case.get_value("CASE")
+            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
+                                    self._case.get_value("BASECMP_CASE"))
+
+            test_name = "{}".format(case_name.split('.')[-1])
+            evv_config = {
+                test_name: {
+                    "module": os.path.join(evv_lib_dir, "extensions", "tsc.py"),
+                    "test-case": case_name,
+                    "test-dir": run_dir,
+                    "ref-case": "Baseline",
+                    "ref-dir": base_dir,
+                    "time-slice": [OUT_FREQ, SIM_LENGTH],
+                    "inspect-times": INSPECT_AT,
+                    "variables": VAR_LIST,
+                    "p-threshold": P_THRESHOLD,
+                }
+            }
+
+            json_file = os.path.join(run_dir, '.'.join([case_name, 'json']))
+            with open(json_file, 'w') as config_file:
+                json.dump(evv_config, config_file, indent=4)
+
+            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
+            evv(['-e', json_file, '-o', evv_out_dir])
+
+            with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f:
+                evv_status = json.load(evv_f)
+
+            for evv_elem in evv_status['Data']['Elements']:
+                if evv_elem['Type'] == 'ValSummary' \
+                        and evv_elem['TableTitle'] == 'Time step convergence test':
+                    if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass':
+                        self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
+                                                     CIME.test_status.TEST_PASS_STATUS)
+                        break
+
+    def _generate_baseline(self):
+        super(TSC, self)._generate_baseline()
+
+        with SharedArea():
+            basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
+                                       self._case.get_value("BASEGEN_CASE"))
+
+            rundir = self._case.get_value("RUNDIR")
+            ref_case = self._case.get_value("RUN_REFCASE")
+
+            model = 'cam'
+            hists = _get_all_hist_files(model, rundir, [r'h\d*.*\.nc\.DT\d*'], ref_case=ref_case)
+            logger.debug("TSC additional baseline files: {}".format(hists))
+            for hist in hists:
+                basename = hist[hist.rfind(model):]
+                baseline = os.path.join(basegen_dir, basename)
+                if os.path.exists(baseline):
+                    os.remove(baseline)
+
+                safe_copy(hist, baseline, preserve_meta=False)
diff --git a/cime/scripts/lib/CIME/hist_utils.py b/cime/scripts/lib/CIME/hist_utils.py
index ddbc0b17385d..8d795a0b09da 100644
--- a/cime/scripts/lib/CIME/hist_utils.py
+++ b/cime/scripts/lib/CIME/hist_utils.py
@@ -574,7 +574,7 @@ def _generate_baseline_impl(case, baseline_dir=None, allow_baseline_overwrite=Fa
     testname = case.get_value("TESTCASE")
     testopts = parse_test_name(case.get_value("CASEBASEID"))[1]
     testopts = [] if testopts is None else testopts
-    expect(num_gen > 0 or (testname == "PFS" or "B" in testopts),
+    expect(num_gen > 0 or (testname in ["PFS", "TSC"] or "B" in testopts),
            "Could not generate any hist files for case '{}', something is seriously wrong".format(os.path.join(rundir, testcase)))
 
     if get_model() == "e3sm":

From 20a08960907453b40caa9da4c5d1e896f440b3a1 Mon Sep 17 00:00:00 2001
From: Joseph H Kennedy <me@jhkennedy.org>
Date: Thu, 15 Aug 2019 15:48:29 -0400
Subject: [PATCH 8/8] Add MVK test to e3sm_atm_nbfb test suite

---
 cime/config/e3sm/tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cime/config/e3sm/tests.py b/cime/config/e3sm/tests.py
index 3efa11f2c443..7a2def01c985 100644
--- a/cime/config/e3sm/tests.py
+++ b/cime/config/e3sm/tests.py
@@ -88,6 +88,7 @@
         "tests" : (
             "PGN_P1x1.ne4_ne4.FC5AV1C-L",
             "TSC.ne4_ne4.FC5AV1C-L",
+            "MVK_PL.ne4_ne4.FC5AV1C-L",
             )
         },