diff --git a/scripts/Tools/code_checker b/scripts/Tools/code_checker index 9bd5c8989c87..d15a4032e1e8 100755 --- a/scripts/Tools/code_checker +++ b/scripts/Tools/code_checker @@ -107,7 +107,7 @@ def _main_func(description): sys.exit(1 if test_results.failed > 0 else 0) pylint = find_executable("pylint") - expect(pylint is not None,"pylint not found") + expect(pylint is not None, "pylint not found") dir_to_check, num_procs, files = parse_command_line(sys.argv, description) diff --git a/scripts/Tools/getTiming b/scripts/Tools/getTiming index 1b1c6bccf453..a2ba123ec3b4 100755 --- a/scripts/Tools/getTiming +++ b/scripts/Tools/getTiming @@ -4,11 +4,9 @@ Get timing information from run """ from standard_script_setup import * -import argparse, sys, os, logging -import datetime, shutil, re +import argparse, sys, os from CIME.case import Case - -logger = logging.getLogger(__name__) +from CIME.get_timing import get_timing def parse_command_line(args, description): parser = argparse.ArgumentParser( @@ -27,599 +25,14 @@ def parse_command_line(args, description): CIME.utils.handle_standard_logging_options(args) return args.caseroot, args.lid - -class getTimingInfo: - def __init__(self, name): - self.name = name - self.tmin = 0 - self.tmax = 0 - self.adays = 0 - -class TimingParser: - def __init__(self, caseroot, lid="999999-999999"): - self.caseroot = caseroot - self.lid = lid - self.fin = None - self.fout = None - self.adays=0 - self.models = {} - - def write(self, text): - self.fout.write(text) - - def prttime(self, label, offset=None, div=None, coff=-999): - if offset is None: - offset=self.models['CPL'].offset - if div is None: - div = self.adays - datalen = 20 - cstr = "<---->" - clen = len(cstr) - - minval, maxval, found = self.gettime(label) - if div >= 1.0: - mind = minval/div - maxd = maxval/div - else: - mind = minval - maxd = maxval - - pstrlen = 25 - if mind >= 0 and maxd >= 0 and found: - if coff >= 0: - zoff = pstrlen + coff + int((datalen-clen)/2) - csp = offset - coff - int((datalen-clen)/2) - self.write(" {label:<{width1}}{cstr:<{width2}} " - "{minv:8.3f}:{maxv:8.3f} \n".format(label=label, - width1=zoff, - cstr=cstr, - width2=csp, - minv=mind, - maxv=maxd)) - else: - zoff = pstrlen + offset - self.write(" {label:<{width1}} {minv:8.3f}:{maxv:8.3f} \n" - .format(label=label, width1=zoff, - minv=mind, maxv=maxd)) - - def gettime2(self, heading_padded): - nprocs = 0 - ncount = 0 - - heading = heading_padded.strip() - for line in self.finlines: - m = re.match(r'\s*%s\s*(\d+)\s*\d+\s*(\S+)'%heading, line) - if m: - nprocs = int(float(m.groups()[0])) - ncount = int(float(m.groups()[1])) - return (nprocs, ncount) - else: - m = re.match(r'\s*%s\s+(\d+)\s'%heading, line) - if m: - nprocs = 1 - ncount = int(float(m.groups()[0])) - return (nprocs, ncount) - return (0, 0) - - def gettime(self, heading_padded): - found = False - heading = heading_padded.strip() - minval = 0 - maxval = 0 - - for line in self.finlines: - m = re.match(r'\s*%s\s*\d+\s*\d+\s*\S+\s*\S+\s*(\d*\.\d+)' - r'\s*\(.*\)\s*(\d*\.\d+)\s*\(.*\)'%heading, line) - if m: - maxval = float(m.groups()[0]) - minval = float(m.groups()[1]) - found = True - return (minval, maxval, found) - return (0, 0, False) - - - - def getTiming(self): - with Case(self.caseroot) as case: - components=case.get_value("COMP_CLASSES").split(',') - components[components.index("DRV")]="CPL" - for s in components: - self.models[s] = getTimingInfo(s) - atm = self.models['ATM'] - lnd = self.models['LND'] - rof = self.models['ROF'] - wav = self.models['WAV'] - ice = self.models['ICE'] - ocn = self.models['OCN'] - glc = self.models['GLC'] - cpl = self.models['CPL'] - cime_model = case.get_value("MODEL") - caseid = case.get_value("CASE") - mach = case.get_value("MACH") - user = case.get_value("USER") - continue_run = case.get_value("CONTINUE_RUN") - rundir = case.get_value("RUNDIR") - run_type = case.get_value("RUN_TYPE") - ncpl_base_period = case.get_value("NCPL_BASE_PERIOD") - atm_ncpl = case.get_value("ATM_NCPL") - ocn_ncpl = case.get_value("OCN_NCPL") - ccsm_compset = case.get_value("CCSM_COMPSET") - if ccsm_compset is None: - ccsm_compset = "" - grid = case.get_value("GRID") - run_type = case.get_value("RUN_TYPE") - stop_option = case.get_value("STOP_OPTION") - stop_n = case.get_value("STOP_N") - cost_pes = case.get_value("COST_PES") - totalpes = case.get_value("TOTALPES") - pes_per_node = case.get_value("PES_PER_NODE") - - if cost_pes > 0: - pecost = cost_pes - else: - pecost = totalpes - - for m in self.models.values(): - for key in ["NTASKS", "ROOTPE", "PSTRID", "NTHRDS", "NINST"]: - if key == "NINST" and m.name == "CPL": - m.ninst = 1 - else: - setattr(m, key.lower(), - int(case.get_value("%s_%s" % (key, m.name)))) - - m.comp = case.get_value("COMP_%s" % (m.name)) - m.pemax = m.rootpe + m.ntasks * m.pstrid - 1 - - now = datetime.datetime.ctime(datetime.datetime.now()) - inittype = "FALSE" - if (run_type == "startup" or run_type == "hybrid") and \ - not continue_run: - inittype = "TRUE" - - binfilename = os.path.join(rundir, "timing", "model_timing_stats") - finfilename = os.path.join(self.caseroot, "timing", - "%s_timing_stats.%s" % (cime_model, self.lid)) - foutfilename = os.path.join(self.caseroot, "timing", - "%s_timing.%s.%s" % (cime_model, caseid, self.lid)) - try: - shutil.copyfile(binfilename, finfilename) - except Exception, e: - if not os.path.isfile(binfilename): - logger.critical("File %s not found" % binfilename) - else: - logger.critical("Unable to cp %s to %s" % - (binfilename, finfilename)) - raise e - - os.chdir(self.caseroot) - try: - fin = open(finfilename, "r") - self.finlines = fin.readlines() - fin.close() - except Exception, e: - logger.critical("Unable to open file %s" % finfilename) - raise e - - tlen = 1.0 - if ncpl_base_period == "decade": - tlen = 3650.0 - elif ncpl_base_period == "year": - tlen = 365.0 - elif ncpl_base_period == "day": - tlen = 1.0 - elif ncpl_base_period == "hour": - tlen = 1.0/24.0 - else: - logger.warning("Unknown NCPL_BASE_PERIOD=%s" % ncpl_base_period) - - nprocs, ncount = self.gettime2('CPL:CLOCK_ADVANCE ') - nsteps = ncount / nprocs - adays = nsteps*tlen/atm_ncpl - odays = adays - if inittype == "TRUE": - odays = adays - (tlen/ocn_ncpl) - peminmax = max([m.rootpe for m in self.models.values()])+1 - if ncpl_base_period in ["decade","year","day"] and int(adays) > 0: - adays = int(adays) - if tlen % ocn_ncpl == 0: - odays = int(odays) - self.adays = adays - maxoffset = 40 - extraoff = 20 - for m in self.models.values(): - m.offset = int((maxoffset*m.rootpe)/peminmax) + extraoff - cpl.offset = 0 - try: - self.fout = open(foutfilename, "w") - except Exception, e: - logger.critical("Could not open file for writing: %s" - % foutfilename) - raise e - - self.write("---------------- TIMING PROFILE ---------------------\n") - - self.write(" Case : %s\n" % caseid) - self.write(" LID : %s\n" % self.lid) - self.write(" Machine : %s\n" % mach) - self.write(" Caseroot : %s\n" % self.caseroot) - self.write(" Timeroot : %s/Tools\n" % self.caseroot) - self.write(" User : %s\n" % user) - self.write(" Curr Date : %s\n" % now) - - self.write(" grid : %s\n" % grid) - self.write(" compset : %s\n" % ccsm_compset) - self.write(" run_type : %s, continue_run = %s " - "(inittype = %s)\n" % - (run_type, str(continue_run).upper(), inittype)) - self.write(" stop_option : %s, stop_n = %s\n" % - (stop_option, stop_n)) - self.write(" run_length : %s days (%s for ocean)\n\n" % - (adays, odays)) - - self.write(" component comp_pes root_pe tasks " - "x threads" - " instances (stride) \n") - self.write(" --------- ------ ------- ------ " - "------ --------- ------ \n") - - for k in ['CPL', 'GLC', 'WAV', 'LND', 'ROF', 'ICE', 'ATM', 'OCN']: - m = self.models[k] - self.write(" %s = %-8s %-6u %-6u %-6u x %-6u " - "%-6u (%-6u) \n" - % (m.name.lower(), m.comp, m.ntasks, m.rootpe, - m.ntasks, m.nthrds, m.ninst, m.pstrid)) - - nmin, nmax, found = self.gettime(' CPL:INIT ') - tmin, tmax, found = self.gettime(' CPL:RUN_LOOP ') - wtmin, wtmax, found = self.gettime(' CPL:TPROF_WRITE ') - fmin, fmax, found = self.gettime(' CPL:FINAL ') - for k in components: - if k != "CPL": - m = self.models[k] - m.tmin, m.tmax, found = self.gettime(' CPL:%s_RUN ' % m.name) - otmin, otmax, found = self.gettime(' CPL:OCNT_RUN ') - - # pick OCNT_RUN for tight coupling - if otmax > ocn.tmax: - ocn.tmin = otmin - ocn.tmax = otmax - - cpl.tmin, cpl.tmax, found = self.gettime(' CPL:RUN ') - xmin, xmax, found = self.gettime(' CPL:COMM ') - ocnwaittime, ignore, found = self.gettime(' CPL:C2O_INITWAIT') - - if odays != 0: - ocnrunitime = ocn.tmax * (adays/odays - 1.0) - else: - ocnrunitime = 0.0 - - - correction = max(0, ocnrunitime - ocnwaittime) - - tmax = tmax + wtmax + correction - ocn.tmax += ocnrunitime - - for m in self.models.values(): - m.tmaxr = 0 - if m.tmax > 0: - m.tmaxr = adays*86400.0/(m.tmax*365.0) - xmaxr = 0 - if xmax > 0: - xmaxr = adays*86400.0/(xmax*365.0) - tmaxr = 0 - if tmax > 0: - tmaxr = adays*86400.0/(tmax*365.0) - - self.write("\n") - self.write(" total pes active : %s \n" % totalpes) - self.write(" pes per node : %s \n" % pes_per_node) - self.write(" pe count for cost estimate : %s \n" % pecost) - self.write("\n") - - self.write(" Overall Metrics: \n") - self.write(" Model Cost: %10.2f pe-hrs/simulated_year \n" - % ((tmax*365.0*pecost)/(3600.0*adays))) - self.write(" Model Throughput: %10.2f simulated_years/day \n" - % ((86400.0*adays)/(tmax*365.0))) - self.write("\n") - - self.write(" Init Time : %10.3f seconds \n" % nmax) - self.write(" Run Time : %10.3f seconds %10.3f seconds/day \n" - % (tmax, tmax/adays)) - self.write(" Final Time : %10.3f seconds \n" % fmax) - - self.write("\n") - - self.write(" Actual Ocn Init Wait Time : %10.3f seconds \n" - % ocnwaittime) - self.write(" Estimated Ocn Init Run Time : %10.3f seconds \n" - % ocnrunitime) - self.write(" Estimated Run Time Correction : %10.3f seconds \n" - % correction) - self.write(" (This correction has been applied to the ocean and" - " total run times) \n") - - self.write("\n") - self.write("Runs Time in total seconds, seconds/model-day, and" - " model-years/wall-day \n") - self.write("CPL Run Time represents time in CPL pes alone, " - "not including time associated with data exchange " - "with other components \n") - self.write("\n") - - self.write(" TOT Run Time: %10.3f seconds %10.3f seconds/mday " - "%10.2f myears/wday \n" % (tmax, tmax/adays, tmaxr)) - for k in ['LND', 'ROF', 'ICE', 'ATM', 'OCN', 'GLC', 'WAV', 'CPL']: - m = self.models[k] - self.write(" %s Run Time: %10.3f seconds " - "%10.3f seconds/mday %10.2f myears/wday \n" - % (k, m.tmax, m.tmax/adays, m.tmaxr)) - self.write(" CPL COMM Time: %10.3f seconds %10.3f seconds/mday " - "%10.2f myears/wday \n" % (xmax, xmax/adays, xmaxr)) - - self.write("\n\n---------------- DRIVER TIMING FLOWCHART " - "--------------------- \n\n") - - pstrlen = 25 - hoffset = 1 - self.write(" NOTE: min:max driver timers (seconds/day): \n") - - for k in ["CPL", "OCN", "LND", "ROF", "ICE", "ATM", "GLC", "WAV"]: - m = self.models[k] - xspace = (pstrlen+hoffset+m.offset) * ' ' - self.write(" %s %s (pes %u to %u) \n" - % (xspace, k, m.rootpe, m.pemax)) - self.write("\n") - - self.prttime(' CPL:CLOCK_ADVANCE ') - self.prttime(' CPL:OCNPRE1_BARRIER ') - self.prttime(' CPL:OCNPRE1 ') - self.prttime(' CPL:ATMOCN1_BARRIER ') - self.prttime(' CPL:ATMOCN1 ') - self.prttime(' CPL:OCNPREP_BARRIER ') - self.prttime(' CPL:OCNPREP ') - self.prttime(' CPL:C2O_BARRIER ', offset=ocn.offset, div=odays, - coff=cpl.offset) - self.prttime(' CPL:C2O ', offset=ocn.offset, div=odays, coff=cpl.offset) - self.prttime(' CPL:LNDPREP_BARRIER ') - self.prttime(' CPL:LNDPREP ') - self.prttime(' CPL:C2L_BARRIER ', offset=lnd.offset, coff=cpl.offset) - self.prttime(' CPL:C2L ', offset=lnd.offset, coff=cpl.offset) - self.prttime(' CPL:ICEPREP_BARRIER ') - self.prttime(' CPL:ICEPREP ') - self.prttime(' CPL:C2I_BARRIER ', offset=ice.offset, coff=cpl.offset) - self.prttime(' CPL:C2I ', offset=ice.offset, coff=cpl.offset) - self.prttime(' CPL:WAVPREP_BARRIER ') - self.prttime(' CPL:WAVPREP ') - self.prttime(' CPL:C2W_BARRIER ', offset=ice.offset, coff=cpl.offset) - self.prttime(' CPL:C2W ', offset=ice.offset, coff=cpl.offset) - self.prttime(' CPL:ROFPREP_BARRIER ') - self.prttime(' CPL:ROFPREP ') - self.prttime(' CPL:C2R_BARRIER ', offset=rof.offset, coff=cpl.offset) - self.prttime(' CPL:C2R ', offset=rof.offset, coff=cpl.offset) - self.prttime(' CPL:ICE_RUN_BARRIER ', offset=ice.offset) - self.prttime(' CPL:ICE_RUN ', offset=ice.offset) - self.prttime(' CPL:LND_RUN_BARRIER ', offset=lnd.offset) - self.prttime(' CPL:LND_RUN ', offset=lnd.offset) - self.prttime(' CPL:ROF_RUN_BARRIER ', offset=rof.offset) - self.prttime(' CPL:ROF_RUN ', offset=rof.offset) - self.prttime(' CPL:WAV_RUN_BARRIER ', offset=rof.offset) - self.prttime(' CPL:WAV_RUN ', offset=rof.offset) - self.prttime(' CPL:OCNT_RUN_BARRIER ', offset=ocn.offset, div=odays) - self.prttime(' CPL:OCNT_RUN ', offset=ocn.offset, div=odays) - self.prttime(' CPL:O2CT_BARRIER ', offset=ocn.offset, div=odays, - coff=cpl.offset) - self.prttime(' CPL:O2CT ', offset=ocn.offset, div=odays, - coff=cpl.offset) - self.prttime(' CPL:OCNPOSTT_BARRIER ') - self.prttime(' CPL:OCNPOSTT ') - self.prttime(' CPL:ATMOCNP_BARRIER ') - self.prttime(' CPL:ATMOCNP ') - self.prttime(' CPL:L2C_BARRIER ', offset=lnd.offset, coff=cpl.offset) - self.prttime(' CPL:L2C ', offset=lnd.offset, div=cpl.offset) - self.prttime(' CPL:LNDPOST_BARRIER ') - self.prttime(' CPL:LNDPOST ') - self.prttime(' CPL:GLCPREP_BARRIER ') - self.prttime(' CPL:GLCPREP ') - self.prttime(' CPL:C2G_BARRIER ', offset=glc.offset, coff=cpl.offset) - self.prttime(' CPL:C2G ', offset=glc.offset, coff=cpl.offset) - self.prttime(' CPL:R2C_BARRIER ', offset=rof.offset, coff=cpl.offset) - self.prttime(' CPL:R2C ', offset=rof.offset, coff=cpl.offset) - self.prttime(' CPL:ROFPOST_BARRIER ') - self.prttime(' CPL:ROFPOST ') - self.prttime(' CPL:BUDGET1_BARRIER ') - self.prttime(' CPL:BUDGET1 ') - self.prttime(' CPL:I2C_BARRIER ', offset=ice.offset, coff=cpl.offset) - self.prttime(' CPL:I2C ', offset=ice.offset, coff=cpl.offset) - self.prttime(' CPL:ICEPOST_BARRIER ') - self.prttime(' CPL:ICEPOST ') - self.prttime(' CPL:FRACSET_BARRIER ') - self.prttime(' CPL:FRACSET ') - self.prttime(' CPL:ATMOCN2_BARRIER ') - self.prttime(' CPL:ATMOCN2 ') - self.prttime(' CPL:OCNPRE2_BARRIER ') - self.prttime(' CPL:OCNPRE2 ') - self.prttime(' CPL:C2O2_BARRIER ', offset=ocn.offset, div=odays, - coff=cpl.offset) - self.prttime(' CPL:C2O2 ', offset=ocn.offset, div=odays, - coff=cpl.offset) - self.prttime(' CPL:ATMOCNQ_BARRIER') - self.prttime(' CPL:ATMOCNQ ') - self.prttime(' CPL:ATMPREP_BARRIER ') - self.prttime(' CPL:ATMPREP ') - self.prttime(' CPL:C2A_BARRIER ', offset=atm.offset, coff=cpl.offset) - self.prttime(' CPL:C2A ', offset=atm.offset, coff=cpl.offset) - self.prttime(' CPL:OCN_RUN_BARRIER ', offset=ocn.offset, div=odays) - self.prttime(' CPL:OCN_RUN ', offset=ocn.offset, div=odays) - self.prttime(' CPL:ATM_RUN_BARRIER ', offset=atm.offset) - self.prttime(' CPL:ATM_RUN ', offset=atm.offset) - self.prttime(' CPL:GLC_RUN_BARRIER ', offset=glc.offset) - self.prttime(' CPL:GLC_RUN ', offset=glc.offset) - self.prttime(' CPL:W2C_BARRIER ', offset=glc.offset, coff=cpl.offset) - self.prttime(' CPL:W2C ', offset=glc.offset, coff=cpl.offset) - self.prttime(' CPL:WAVPOST_BARRIER ') - self.prttime(' CPL:WAVPOST ', cpl.offset) - self.prttime(' CPL:G2C_BARRIER ', offset=glc.offset, coff=cpl.offset) - self.prttime(' CPL:G2C ', offset=glc.offset, coff=cpl.offset) - self.prttime(' CPL:GLCPOST_BARRIER ') - self.prttime(' CPL:GLCPOST ') - self.prttime(' CPL:A2C_BARRIER ', offset=atm.offset, coff=cpl.offset) - self.prttime(' CPL:A2C ', offset=atm.offset, coff=cpl.offset) - self.prttime(' CPL:ATMPOST_BARRIER ') - self.prttime(' CPL:ATMPOST ') - self.prttime(' CPL:BUDGET2_BARRIER ') - self.prttime(' CPL:BUDGET2 ') - self.prttime(' CPL:BUDGET3_BARRIER ') - self.prttime(' CPL:BUDGET3 ') - self.prttime(' CPL:BUDGETF_BARRIER ') - self.prttime(' CPL:BUDGETF ') - self.prttime(' CPL:O2C_BARRIER ', offset=ocn.offset, - div=odays, coff=cpl.offset) - self.prttime(' CPL:O2C ', offset=ocn.offset, div=odays, coff=cpl.offset) - self.prttime(' CPL:OCNPOST_BARRIER ') - self.prttime(' CPL:OCNPOST ') - self.prttime(' CPL:RESTART_BARRIER ') - self.prttime(' CPL:RESTART') - self.prttime(' CPL:HISTORY_BARRIER ') - self.prttime(' CPL:HISTORY ') - self.prttime(' CPL:TSTAMP_WRITE ') - self.prttime(' CPL:TPROF_WRITE ') - self.prttime(' CPL:RUN_LOOP_BSTOP ') - - self.write("\n\n") - self.write("More info on coupler timing:\n") - - self.write("\n") - self.prttime(' CPL:OCNPRE1 ') - self.prttime(' CPL:ocnpre1_atm2ocn ') - - self.write("\n") - self.prttime(' CPL:OCNPREP ') - self.prttime(' CPL:OCNPRE2 ') - self.prttime(' CPL:ocnprep_avg ') - self.prttime(' CPL:ocnprep_diagav ') - - self.write("\n") - self.prttime(' CPL:LNDPREP ') - self.prttime(' CPL:lndprep_atm2lnd ') - self.prttime(' CPL:lndprep_mrgx2l ') - self.prttime(' CPL:lndprep_diagav ') - - self.write("\n") - self.prttime(' CPL:ICEPREP ') - self.prttime(' CPL:iceprep_ocn2ice ') - self.prttime(' CPL:iceprep_atm2ice ') - self.prttime(' CPL:iceprep_mrgx2i ') - self.prttime(' CPL:iceprep_diagav ') - - self.write("\n") - self.prttime(' CPL:WAVPREP ') - self.prttime(' CPL:wavprep_atm2wav ') - self.prttime(' CPL:wavprep_ocn2wav ') - self.prttime(' CPL:wavprep_ice2wav ') - self.prttime(' CPL:wavprep_mrgx2w ') - self.prttime(' CPL:wavprep_diagav ') - - self.write("\n") - self.prttime(' CPL:ROFPREP ') - self.prttime(' CPL:rofprep_l2xavg ') - self.prttime(' CPL:rofprep_lnd2rof ') - self.prttime(' CPL:rofprep_mrgx2r ') - self.prttime(' CPL:rofprep_diagav ') - - self.write("\n") - self.prttime(' CPL:GLCPREP ') - self.prttime(' CPL:glcprep_avg ') - self.prttime(' CPL:glcprep_lnd2glc ') - self.prttime(' CPL:glcprep_mrgx2g ') - self.prttime(' CPL:glcprep_diagav ') - - self.write("\n") - self.prttime(' CPL:ATMPREP ') - self.prttime(' CPL:atmprep_xao2atm ') - self.prttime(' CPL:atmprep_ocn2atm ') - self.prttime(' CPL:atmprep_alb2atm ') - self.prttime(' CPL:atmprep_ice2atm ') - self.prttime(' CPL:atmprep_lnd2atm ') - self.prttime(' CPL:atmprep_mrgx2a ') - self.prttime(' CPL:atmprep_diagav ') - - self.write("\n") - self.prttime(' CPL:ATMOCNP ') - self.prttime(' CPL:ATMOCN1 ') - self.prttime(' CPL:ATMOCN2 ') - self.prttime(' CPL:atmocnp_ice2ocn ') - self.prttime(' CPL:atmocnp_wav2ocn ') - self.prttime(' CPL:atmocnp_fluxo ') - self.prttime(' CPL:atmocnp_fluxe ') - self.prttime(' CPL:atmocnp_mrgx2o ') - self.prttime(' CPL:atmocnp_accum ') - self.prttime(' CPL:atmocnp_ocnalb ') - - self.write("\n") - self.prttime(' CPL:ATMOCNQ ') - self.prttime(' CPL:atmocnq_ocn2atm ') - self.prttime(' CPL:atmocnq_fluxa ') - self.prttime(' CPL:atmocnq_atm2ocnf ') - - self.write("\n") - self.prttime(' CPL:OCNPOSTT ') - self.prttime(' CPL:OCNPOST ') - self.prttime(' CPL:ocnpost_diagav ') - - self.write("\n") - self.prttime(' CPL:LNDPOST ') - self.prttime(' CPL:lndpost_diagav ') - self.prttime(' CPL:lndpost_acc2lr ') - self.prttime(' CPL:lndpost_acc2lg ') - - self.write("\n") - self.prttime(' CPL:ROFOST ') - self.prttime(' CPL:rofpost_diagav ') - self.prttime(' CPL:rofpost_histaux ') - self.prttime(' CPL:rofpost_rof2lnd ') - self.prttime(' CPL:rofpost_rof2ice ') - self.prttime(' CPL:rofpost_rof2ocn ') - - self.write("\n") - self.prttime(' CPL:ICEPOST ') - self.prttime(' CPL:icepost_diagav ') - - self.write("\n") - self.prttime(' CPL:WAVPOST ') - self.prttime(' CPL:wavpost_diagav ') - - self.write("\n") - self.prttime(' CPL:GLCPOST ') - self.prttime(' CPL:glcpost_diagav ') - self.prttime(' CPL:glcpost_glc2lnd ') - self.prttime(' CPL:glcpost_glc2ice ') - self.prttime(' CPL:glcpost_glc2ocn ') - - self.write("\n") - self.prttime(' CPL:ATMPOST ') - self.prttime(' CPL:atmpost_diagav ') - - self.write("\n") - self.prttime(' CPL:BUDGET ') - self.prttime(' CPL:BUDGET1 ') - self.prttime(' CPL:BUDGET2 ') - self.prttime(' CPL:BUDGET3 ') - self.prttime(' CPL:BUDGETF ') - self.write("\n\n") - - self.fout.close() - - def __main_func(description): """ Reads timing information from $CASEROOT/timing/$MODEL_timing_stats.$lid and outputs to $CASEROOT/timing/$MODEL_timing.$CASE.$lid """ caseroot, lid = parse_command_line(sys.argv, description) - parser = TimingParser(caseroot, lid) - parser.getTiming() - return 0 + with Case(caseroot, read_only=True) as case: + get_timing(case, lid) if __name__ == "__main__": __main_func(__doc__) diff --git a/scripts/Tools/save_provenance b/scripts/Tools/save_provenance new file mode 100755 index 000000000000..fff7a57817e4 --- /dev/null +++ b/scripts/Tools/save_provenance @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +""" +This tool provide command-line access to provenance-saving functionality +""" + +from standard_script_setup import * + +from CIME.case import Case +from CIME.provenance import * +from CIME.utils import get_lids +from CIME.get_timing import get_timing + +############################################################################### +def parse_command_line(args, description): +############################################################################### + parser = argparse.ArgumentParser( + usage="""\n%s [] [--verbose] +OR +%s --help +OR +%s --test + +\033[1mEXAMPLES:\033[0m + \033[1;32m# Save run (timing) provenance for current case \033[0m + > %s postrun +""" % ((os.path.basename(args[0]), ) * 4), + description=description, + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + CIME.utils.setup_standard_logging_options(parser) + + parser.add_argument("mode", choices=("build", "prerun", "postrun"), + help="Phase for which to save provenance. " + "prerun is mostly for infrastructure testing; " + "it does not make sense to store this information manually otherwise") + + parser.add_argument("caseroot", nargs="?", default=os.getcwd(), + help="Case directory") + + parser.add_argument("-l", "--lid", + help="Force system to save provenance with this LID") + + args = parser.parse_args(args[1:]) + + CIME.utils.handle_standard_logging_options(args) + + return args.mode, args.caseroot, args.lid + +############################################################################### +def _main_func(description): +############################################################################### + mode, caseroot, lid = parse_command_line(sys.argv, description) + with Case(caseroot, read_only=False) as case: + if mode == "build": + expect(False, "Saving build provenance manually is not currently supported " + "but it should already always be happening automatically") + save_build_provenance(case, lid=lid) + elif mode == "prerun": + expect(lid is not None, "You must provide LID for prerun mode") + save_prerun_provenance(case, lid=lid) + elif mode == "postrun": + expect(lid is None, "Please allow me to autodetect LID") + model = case.get_value("MODEL") + caseid = case.get_value("CASE") + case.set_value("SAVE_TIMING", True) + lids = get_lids(case) + for lid in lids: + # call get_timing if needed + expected_timing_file = os.path.join(caseroot, "timing", "%s_timing.%s.%s.gz" % (model, caseid, lid)) + if (not os.path.exists(expected_timing_file)): + get_timing(case, lid) + save_postrun_provenance(case, lid=lid) + else: + expect(False, "Unhandled mode '%s'" % mode) + +if __name__ == "__main__": + _main_func(__doc__) diff --git a/utils/python/CIME/build.py b/utils/python/CIME/build.py index d13a0cd0cfe4..cba4dc785df8 100644 --- a/utils/python/CIME/build.py +++ b/utils/python/CIME/build.py @@ -5,6 +5,7 @@ from CIME.utils import get_model, append_status from CIME.preview_namelists import preview_namelists from CIME.check_input_data import check_input_data +from CIME.provenance import save_build_provenance import glob, shutil, time, threading, gzip, subprocess logger = logging.getLogger(__name__) @@ -137,6 +138,8 @@ def post_build(case, logs): shutil.copy("env_build.xml", "LockedFiles") + save_build_provenance(case) + ############################################################################### def case_build(caseroot, case, sharedlib_only=False, model_only=False): ############################################################################### @@ -593,5 +596,3 @@ def clean(case, cleanlist=None): # append call of to CaseStatus msg = "cleanbuild %s "%" ".join(cleanlist) append_status(msg, caseroot=caseroot, sfile="CaseStatus") - -############################################################################### diff --git a/utils/python/CIME/case_run.py b/utils/python/CIME/case_run.py index 89d4bfd3ca20..f5cbf55626f5 100644 --- a/utils/python/CIME/case_run.py +++ b/utils/python/CIME/case_run.py @@ -2,12 +2,14 @@ from CIME.XML.standard_module_setup import * from CIME.case_submit import submit from CIME.XML.machines import Machines -from CIME.utils import append_status, touch, gzip_existing_file +from CIME.utils import append_status, gzip_existing_file from CIME.check_lockedfiles import check_lockedfiles from CIME.preview_namelists import preview_namelists from CIME.task_maker import TaskMaker +from CIME.get_timing import get_timing +from CIME.provenance import save_prerun_provenance, save_postrun_provenance -import shutil, time, sys, os, getpass, tarfile, glob, signal +import shutil, time, sys, os, glob logger = logging.getLogger(__name__) @@ -131,192 +133,6 @@ def post_run_check(case, lid): append_status(msg, caseroot=caseroot, sfile="CaseStatus") expect (False, msg) -############################################################################### -def _get_batch_job_id(case): -############################################################################### - mach = case.get_value("MACH") - if mach == 'titan': - return os.environ("PBS_JOBID") - elif mach in ['edison', 'corip1']: - return os.environ("SLURM_JOB_ID") - elif mach == 'mira': - return os.environ("COBALT_JOBID") - else: - return None - -############################################################################### -def save_timing_setup_acme(case, lid): -############################################################################### - if not case.get_value("SAVE_TIMING") or case.get_value("MODEL") != "acme": - return - - timing_dir = case.get_value("SAVE_TIMING_DIR") - if timing_dir is None or timing_dir == 'UNSET': - logger.warning("ACME requires SAVE_TIMING_DIR to be set in order to save timings. Skipping save timings") - return - logger.warn("timing dir is %s"%timing_dir) - rundir = case.get_value("RUNDIR") - caseroot = case.get_value("CASEROOT") - cimeroot = case.get_value("CIMEROOT") - base_case = case.get_value("CASEBASEID") - full_timing_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), base_case, lid) - expect(not os.path.exists(full_timing_dir), "%s already exists" % full_timing_dir) - - os.makedirs(full_timing_dir) - mach = case.get_value("MACH") - compiler = case.get_value("COMPILER") - - # For some batch machines save queue info - job_id = _get_batch_job_id(case) - if mach == "mira": - for cmd, filename in [("qstat -lf", "qstatf"), ("qstat -lf %s" % job_id, "qstatf_jobid")]: - run_cmd_no_fail("%s > %s.%s" % (cmd, filename, lid), from_dir=full_timing_dir) - gzip_existing_file(os.path.join(full_timing_dir, filename)) - elif mach == ["corip1", "edison"]: - for cmd, filename in [("sqs -f", "sqsf"), ("sqs -w -a", "sqsw"), ("sqs -f %s" % job_id, "sqsf_jobid"), ("squeue", "squeuef")]: - run_cmd_no_fail("%s > %s.%s" % (cmd, filename, lid), from_dir=full_timing_dir) - gzip_existing_file(os.path.join(full_timing_dir, filename)) - elif mach == "titan": - for cmd, filename in [("xtdb2proc -f xtdb2proc", "xtdb2procf"), - ("qstat -f > qstat", "qstatf"), - ("qstat -f %s > qstatf_jobid" % job_id, "qstatf_jobid"), - ("xtnodestat > xtnodestat", "xtnodestatf"), - ("showq > showqf", "showqf")]: - run_cmd_no_fail(cmd + "." + lid, from_dir=full_timing_dir) - gzip_existing_file(os.path.join(full_timing_dir, filename + "." + lid)) - - mdiag_reduce = os.path.join(full_timing_dir, "mdiag_reduce." + lid) - run_cmd_no_fail("./mdiag_reduce.csh > %s" % mdiag_reduce, from_dir=os.path.join(caseroot, "Tools")) - gzip_existing_file(mdiag_reduce) - - # copy/tar SourceModes - source_mods_dir = os.path.join(caseroot, "SourceMods") - if os.path.isdir(source_mods_dir): - with tarfile.open(os.path.join(full_timing_dir, "SourceMods.%s.tar.gz" % lid), "w:gz") as tfd: - tfd.add(source_mods_dir) - - # Save various case configuration items - case_docs = os.path.join(full_timing_dir, "CaseDocs") - os.mkdir(case_docs) - globs_to_copy = [ - "CaseDocs/*", - "*.run", - "*.xml", - "user_nl_*", - "*env_mach_specific*", - "Macros", - "README.case", - "Depends.%s" % mach, - "Depends.%s" % compiler, - "Depends.%s.%s" % (mach, compiler), - "software_environment.txt" - ] - for glob_to_copy in globs_to_copy: - for item in glob.glob(os.path.join(caseroot, glob_to_copy)): - shutil.copy(item, os.path.join(case_docs, os.path.basename(item) + "." + lid)) - - if job_id is not None: - sample_interval = case.get_value("SYSLOG_N") - if sample_interval > 0: - archive_checkpoints = os.path.join(full_timing_dir, "checkpoints") - os.mkdir(archive_checkpoints) - touch("%s/acme.log.%s" % (rundir, lid)) - syslog_jobid = run_cmd_no_fail("./mach_syslog %d %s %s %s %s/timing/checkpoints %s/checkpoints >& /dev/null & echo $!" % - (sample_interval, job_id, lid, rundir, rundir, archive_checkpoints), - from_dir=os.path.join(caseroot, "Tools")) - with open(os.path.join(rundir, "syslog_jobid", ".%s" % job_id), "w") as fd: - fd.write("%s\n" % syslog_jobid) - - # Save state of repo - run_cmd_no_fail("git describe > %s" % os.path.join(full_timing_dir, "GIT_DESCRIBE"), from_dir=cimeroot) - -############################################################################### -def save_timing_cesm(case, lid): -############################################################################### - rundir = case.get_value("RUNDIR") - timing_dir = case.get_value("SAVE_TIMING_DIR") - timing_dir = os.path.join(timing_dir, case.get_value("CASE")) - shutil.move(os.path.join(rundir,"timing"), - os.path.join(timing_dir,"timing."+lid)) - -############################################################################### -def save_timing_acme(case, lid): -############################################################################### - rundir = case.get_value("RUNDIR") - timing_dir = case.get_value("SAVE_TIMING_DIR") - caseroot = case.get_value("CASEROOT") - mach = case.get_value("MACH") - base_case = case.get_value("CASEBASEID") - full_timing_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), base_case, lid) - - # Kill mach_syslog - job_id = _get_batch_job_id(case) - if job_id is not None: - syslog_jobid_path = os.path.join(rundir, "syslog_jobid", ".%s" % job_id) - if os.path.exists(syslog_jobid_path): - try: - with open(syslog_jobid_path, "r") as fd: - syslog_jobid = int(fd.read().strip()) - os.kill(syslog_jobid, signal.SIGTERM) - except (ValueError, OSError) as e: - logger.warning("Failed to kill syslog: %s" % e) - finally: - os.remove(syslog_jobid_path) - - # copy/tar timings - with tarfile.open(os.path.join(full_timing_dir, "timing.%s.tar.gz" % lid), "w:gz") as tfd: - tfd.add(os.path.join(rundir, "timing")) - - # - # save output files and logs - # - globs_to_copy = [] - if mach == "titan": - globs_to_copy.append("%s*OU" % job_id) - elif mach == "mira": - globs_to_copy.append("%s*output" % job_id) - globs_to_copy.append("%s*cobaltlog" % job_id) - elif mach in ["edison", "corip1"]: - globs_to_copy.append("%s" % case.get_value("CASE")) - - globs_to_copy.append("logs/acme.log.%s.gz" % lid) - globs_to_copy.append("logs/cpl.log.%s.gz" % lid) - globs_to_copy.append("timing/*.%s" % lid) - globs_to_copy.append("CaseStatus") - - for glob_to_copy in globs_to_copy: - for item in glob.glob(os.path.join(caseroot, glob_to_copy)): - shutil.copy(item, full_timing_dir) - -############################################################################### -def get_timings(case, lid): -############################################################################### - check_timing = case.get_value("CHECK_TIMING") - if check_timing: - caseroot = case.get_value("CASEROOT") - timingDir = os.path.join(caseroot, "timing") - if not os.path.isdir(timingDir): - os.makedirs(timingDir) - - logger.info("Running timing script %s " %(os.path.join(caseroot, "Tools", "getTiming"))) - run_cmd_no_fail("%s -lid %s " % (os.path.join(caseroot, "Tools", "getTiming"), lid)) - - # save the timing files if desired. Some of the details here are - # model dependent. - model = case.get_value("MODEL") - save_timing = case.get_value("SAVE_TIMING") - if save_timing: - if model == "acme": - save_timing_acme(case, lid) - else: - save_timing_cesm(case, lid) - - # compress relevant timing files - logger.info( "gzipping timing stats.." ) - timingfile = os.path.join(timingDir, model + "_timing_stats." + lid) - gzip_existing_file(timingfile) - logger.info("Done with timings") - ############################################################################### def save_logs(case, lid): ############################################################################### @@ -393,17 +209,21 @@ def case_run(case): lid = time.strftime("%y%m%d-%H%M%S") os.environ["LID"] = lid - save_timing_setup_acme(case, lid) + save_prerun_provenance(case) for _ in range(data_assimilation_cycles): pre_run_check(case) run_model(case) post_run_check(case, lid) save_logs(case, lid) # Copy log files back to caseroot - get_timings(case, lid) # Run the getTiming script + if case.get_value("CHECK_TIMING") or case.get_value("SAVE_TIMING"): + get_timing(case, lid) # Run the getTiming script + if data_assimilation: do_data_assimilation(data_assimilation_script, lid) + save_postrun_provenance(case) + logger.warn("check for resubmit") resubmit_check(case) diff --git a/utils/python/CIME/get_timing.py b/utils/python/CIME/get_timing.py new file mode 100644 index 000000000000..db4fbc37a53c --- /dev/null +++ b/utils/python/CIME/get_timing.py @@ -0,0 +1,600 @@ +#!/usr/bin/env python + +""" +Library for implementing getTiming tool which gets timing +information from a run. +""" + +from CIME.XML.standard_module_setup import * + +import datetime, shutil, re, gzip + +logger = logging.getLogger(__name__) + +class _GetTimingInfo: + def __init__(self, name): + self.name = name + self.tmin = 0 + self.tmax = 0 + self.adays = 0 + +class _TimingParser: + def __init__(self, case, lid="999999-999999"): + self.case = case + self.caseroot = case.get_value("CASEROOT") + self.lid = lid + self.finlines = None + self.fout = None + self.adays=0 + self.models = {} + + def write(self, text): + self.fout.write(text) + + def prttime(self, label, offset=None, div=None, coff=-999): + if offset is None: + offset=self.models['CPL'].offset + if div is None: + div = self.adays + datalen = 20 + cstr = "<---->" + clen = len(cstr) + + minval, maxval, found = self.gettime(label) + if div >= 1.0: + mind = minval/div + maxd = maxval/div + else: + mind = minval + maxd = maxval + + pstrlen = 25 + if mind >= 0 and maxd >= 0 and found: + if coff >= 0: + zoff = pstrlen + coff + int((datalen-clen)/2) + csp = offset - coff - int((datalen-clen)/2) + self.write(" {label:<{width1}}{cstr:<{width2}} " + "{minv:8.3f}:{maxv:8.3f} \n".format(label=label, + width1=zoff, + cstr=cstr, + width2=csp, + minv=mind, + maxv=maxd)) + else: + zoff = pstrlen + offset + self.write(" {label:<{width1}} {minv:8.3f}:{maxv:8.3f} \n" + .format(label=label, width1=zoff, + minv=mind, maxv=maxd)) + + def gettime2(self, heading_padded): + nprocs = 0 + ncount = 0 + + heading = heading_padded.strip() + for line in self.finlines: + m = re.match(r'\s*%s\s*(\d+)\s*\d+\s*(\S+)'%heading, line) + if m: + nprocs = int(float(m.groups()[0])) + ncount = int(float(m.groups()[1])) + return (nprocs, ncount) + else: + m = re.match(r'\s*%s\s+(\d+)\s'%heading, line) + if m: + nprocs = 1 + ncount = int(float(m.groups()[0])) + return (nprocs, ncount) + return (0, 0) + + def gettime(self, heading_padded): + found = False + heading = heading_padded.strip() + minval = 0 + maxval = 0 + + for line in self.finlines: + m = re.match(r'\s*%s\s*\d+\s*\d+\s*\S+\s*\S+\s*(\d*\.\d+)' + r'\s*\(.*\)\s*(\d*\.\d+)\s*\(.*\)'%heading, line) + if m: + maxval = float(m.groups()[0]) + minval = float(m.groups()[1]) + found = True + return (minval, maxval, found) + return (0, 0, False) + + def getTiming(self): + components=self.case.get_value("COMP_CLASSES").split(',') + components[components.index("DRV")]="CPL" + for s in components: + self.models[s] = _GetTimingInfo(s) + atm = self.models['ATM'] + lnd = self.models['LND'] + rof = self.models['ROF'] + ice = self.models['ICE'] + ocn = self.models['OCN'] + glc = self.models['GLC'] + cpl = self.models['CPL'] + cime_model = self.case.get_value("MODEL") + caseid = self.case.get_value("CASE") + mach = self.case.get_value("MACH") + user = self.case.get_value("USER") + continue_run = self.case.get_value("CONTINUE_RUN") + rundir = self.case.get_value("RUNDIR") + run_type = self.case.get_value("RUN_TYPE") + ncpl_base_period = self.case.get_value("NCPL_BASE_PERIOD") + atm_ncpl = self.case.get_value("ATM_NCPL") + ocn_ncpl = self.case.get_value("OCN_NCPL") + ccsm_compset = self.case.get_value("CCSM_COMPSET") + if ccsm_compset is None: + ccsm_compset = "" + grid = self.case.get_value("GRID") + run_type = self.case.get_value("RUN_TYPE") + stop_option = self.case.get_value("STOP_OPTION") + stop_n = self.case.get_value("STOP_N") + cost_pes = self.case.get_value("COST_PES") + totalpes = self.case.get_value("TOTALPES") + pes_per_node = self.case.get_value("PES_PER_NODE") + + if cost_pes > 0: + pecost = cost_pes + else: + pecost = totalpes + + for m in self.models.values(): + for key in ["NTASKS", "ROOTPE", "PSTRID", "NTHRDS", "NINST"]: + if key == "NINST" and m.name == "CPL": + m.ninst = 1 + else: + setattr(m, key.lower(), + int(self.case.get_value("%s_%s" % (key, m.name)))) + + m.comp = self.case.get_value("COMP_%s" % (m.name)) + m.pemax = m.rootpe + m.ntasks * m.pstrid - 1 + + now = datetime.datetime.ctime(datetime.datetime.now()) + inittype = "FALSE" + if (run_type == "startup" or run_type == "hybrid") and \ + not continue_run: + inittype = "TRUE" + + binfilename = os.path.join(rundir, "timing", "model_timing_stats") + finfilename = os.path.join(self.caseroot, "timing", + "%s_timing_stats.%s" % (cime_model, self.lid)) + foutfilename = os.path.join(self.caseroot, "timing", + "%s_timing.%s.%s.gz" % (cime_model, caseid, self.lid)) + + timingDir = os.path.join(self.caseroot, "timing") + if not os.path.isdir(timingDir): + os.makedirs(timingDir) + + try: + shutil.copyfile(binfilename, finfilename) + except Exception, e: + if not os.path.isfile(binfilename): + logger.critical("File %s not found" % binfilename) + else: + logger.critical("Unable to cp %s to %s" % + (binfilename, finfilename)) + raise e + + os.chdir(self.caseroot) + try: + fin = open(finfilename, "r") + self.finlines = fin.readlines() + fin.close() + except Exception, e: + logger.critical("Unable to open file %s" % finfilename) + raise e + + tlen = 1.0 + if ncpl_base_period == "decade": + tlen = 3650.0 + elif ncpl_base_period == "year": + tlen = 365.0 + elif ncpl_base_period == "day": + tlen = 1.0 + elif ncpl_base_period == "hour": + tlen = 1.0/24.0 + else: + logger.warning("Unknown NCPL_BASE_PERIOD=%s" % ncpl_base_period) + + nprocs, ncount = self.gettime2('CPL:CLOCK_ADVANCE ') + nsteps = ncount / nprocs + adays = nsteps*tlen/atm_ncpl + odays = adays + if inittype == "TRUE": + odays = adays - (tlen/ocn_ncpl) + peminmax = max([m.rootpe for m in self.models.values()])+1 + if ncpl_base_period in ["decade","year","day"] and int(adays) > 0: + adays = int(adays) + if tlen % ocn_ncpl == 0: + odays = int(odays) + self.adays = adays + maxoffset = 40 + extraoff = 20 + for m in self.models.values(): + m.offset = int((maxoffset*m.rootpe)/peminmax) + extraoff + cpl.offset = 0 + try: + self.fout = gzip.open(foutfilename, "wb") + except Exception, e: + logger.critical("Could not open file for writing: %s" + % foutfilename) + raise e + + self.write("---------------- TIMING PROFILE ---------------------\n") + + self.write(" Case : %s\n" % caseid) + self.write(" LID : %s\n" % self.lid) + self.write(" Machine : %s\n" % mach) + self.write(" Caseroot : %s\n" % self.caseroot) + self.write(" Timeroot : %s/Tools\n" % self.caseroot) + self.write(" User : %s\n" % user) + self.write(" Curr Date : %s\n" % now) + + self.write(" grid : %s\n" % grid) + self.write(" compset : %s\n" % ccsm_compset) + self.write(" run_type : %s, continue_run = %s " + "(inittype = %s)\n" % + (run_type, str(continue_run).upper(), inittype)) + self.write(" stop_option : %s, stop_n = %s\n" % + (stop_option, stop_n)) + self.write(" run_length : %s days (%s for ocean)\n\n" % + (adays, odays)) + + self.write(" component comp_pes root_pe tasks " + "x threads" + " instances (stride) \n") + self.write(" --------- ------ ------- ------ " + "------ --------- ------ \n") + + for k in ['CPL', 'GLC', 'WAV', 'LND', 'ROF', 'ICE', 'ATM', 'OCN']: + m = self.models[k] + self.write(" %s = %-8s %-6u %-6u %-6u x %-6u " + "%-6u (%-6u) \n" + % (m.name.lower(), m.comp, m.ntasks, m.rootpe, + m.ntasks, m.nthrds, m.ninst, m.pstrid)) + + nmax = self.gettime(' CPL:INIT ')[1] + tmax = self.gettime(' CPL:RUN_LOOP ')[1] + wtmax = self.gettime(' CPL:TPROF_WRITE ')[1] + fmax = self.gettime(' CPL:FINAL ')[1] + for k in components: + if k != "CPL": + m = self.models[k] + m.tmin, m.tmax, _ = self.gettime(' CPL:%s_RUN ' % m.name) + otmin, otmax, _ = self.gettime(' CPL:OCNT_RUN ') + + # pick OCNT_RUN for tight coupling + if otmax > ocn.tmax: + ocn.tmin = otmin + ocn.tmax = otmax + + cpl.tmin, cpl.tmax, _ = self.gettime(' CPL:RUN ') + xmax = self.gettime(' CPL:COMM ')[1] + ocnwaittime = self.gettime(' CPL:C2O_INITWAIT')[0] + + if odays != 0: + ocnrunitime = ocn.tmax * (adays/odays - 1.0) + else: + ocnrunitime = 0.0 + + + correction = max(0, ocnrunitime - ocnwaittime) + + tmax = tmax + wtmax + correction + ocn.tmax += ocnrunitime + + for m in self.models.values(): + m.tmaxr = 0 + if m.tmax > 0: + m.tmaxr = adays*86400.0/(m.tmax*365.0) + xmaxr = 0 + if xmax > 0: + xmaxr = adays*86400.0/(xmax*365.0) + tmaxr = 0 + if tmax > 0: + tmaxr = adays*86400.0/(tmax*365.0) + + self.write("\n") + self.write(" total pes active : %s \n" % totalpes) + self.write(" pes per node : %s \n" % pes_per_node) + self.write(" pe count for cost estimate : %s \n" % pecost) + self.write("\n") + + self.write(" Overall Metrics: \n") + self.write(" Model Cost: %10.2f pe-hrs/simulated_year \n" + % ((tmax*365.0*pecost)/(3600.0*adays))) + self.write(" Model Throughput: %10.2f simulated_years/day \n" + % ((86400.0*adays)/(tmax*365.0))) + self.write("\n") + + self.write(" Init Time : %10.3f seconds \n" % nmax) + self.write(" Run Time : %10.3f seconds %10.3f seconds/day \n" + % (tmax, tmax/adays)) + self.write(" Final Time : %10.3f seconds \n" % fmax) + + self.write("\n") + + self.write(" Actual Ocn Init Wait Time : %10.3f seconds \n" + % ocnwaittime) + self.write(" Estimated Ocn Init Run Time : %10.3f seconds \n" + % ocnrunitime) + self.write(" Estimated Run Time Correction : %10.3f seconds \n" + % correction) + self.write(" (This correction has been applied to the ocean and" + " total run times) \n") + + self.write("\n") + self.write("Runs Time in total seconds, seconds/model-day, and" + " model-years/wall-day \n") + self.write("CPL Run Time represents time in CPL pes alone, " + "not including time associated with data exchange " + "with other components \n") + self.write("\n") + + self.write(" TOT Run Time: %10.3f seconds %10.3f seconds/mday " + "%10.2f myears/wday \n" % (tmax, tmax/adays, tmaxr)) + for k in ['LND', 'ROF', 'ICE', 'ATM', 'OCN', 'GLC', 'WAV', 'CPL']: + m = self.models[k] + self.write(" %s Run Time: %10.3f seconds " + "%10.3f seconds/mday %10.2f myears/wday \n" + % (k, m.tmax, m.tmax/adays, m.tmaxr)) + self.write(" CPL COMM Time: %10.3f seconds %10.3f seconds/mday " + "%10.2f myears/wday \n" % (xmax, xmax/adays, xmaxr)) + + self.write("\n\n---------------- DRIVER TIMING FLOWCHART " + "--------------------- \n\n") + + pstrlen = 25 + hoffset = 1 + self.write(" NOTE: min:max driver timers (seconds/day): \n") + + for k in ["CPL", "OCN", "LND", "ROF", "ICE", "ATM", "GLC", "WAV"]: + m = self.models[k] + xspace = (pstrlen+hoffset+m.offset) * ' ' + self.write(" %s %s (pes %u to %u) \n" + % (xspace, k, m.rootpe, m.pemax)) + self.write("\n") + + self.prttime(' CPL:CLOCK_ADVANCE ') + self.prttime(' CPL:OCNPRE1_BARRIER ') + self.prttime(' CPL:OCNPRE1 ') + self.prttime(' CPL:ATMOCN1_BARRIER ') + self.prttime(' CPL:ATMOCN1 ') + self.prttime(' CPL:OCNPREP_BARRIER ') + self.prttime(' CPL:OCNPREP ') + self.prttime(' CPL:C2O_BARRIER ', offset=ocn.offset, div=odays, + coff=cpl.offset) + self.prttime(' CPL:C2O ', offset=ocn.offset, div=odays, coff=cpl.offset) + self.prttime(' CPL:LNDPREP_BARRIER ') + self.prttime(' CPL:LNDPREP ') + self.prttime(' CPL:C2L_BARRIER ', offset=lnd.offset, coff=cpl.offset) + self.prttime(' CPL:C2L ', offset=lnd.offset, coff=cpl.offset) + self.prttime(' CPL:ICEPREP_BARRIER ') + self.prttime(' CPL:ICEPREP ') + self.prttime(' CPL:C2I_BARRIER ', offset=ice.offset, coff=cpl.offset) + self.prttime(' CPL:C2I ', offset=ice.offset, coff=cpl.offset) + self.prttime(' CPL:WAVPREP_BARRIER ') + self.prttime(' CPL:WAVPREP ') + self.prttime(' CPL:C2W_BARRIER ', offset=ice.offset, coff=cpl.offset) + self.prttime(' CPL:C2W ', offset=ice.offset, coff=cpl.offset) + self.prttime(' CPL:ROFPREP_BARRIER ') + self.prttime(' CPL:ROFPREP ') + self.prttime(' CPL:C2R_BARRIER ', offset=rof.offset, coff=cpl.offset) + self.prttime(' CPL:C2R ', offset=rof.offset, coff=cpl.offset) + self.prttime(' CPL:ICE_RUN_BARRIER ', offset=ice.offset) + self.prttime(' CPL:ICE_RUN ', offset=ice.offset) + self.prttime(' CPL:LND_RUN_BARRIER ', offset=lnd.offset) + self.prttime(' CPL:LND_RUN ', offset=lnd.offset) + self.prttime(' CPL:ROF_RUN_BARRIER ', offset=rof.offset) + self.prttime(' CPL:ROF_RUN ', offset=rof.offset) + self.prttime(' CPL:WAV_RUN_BARRIER ', offset=rof.offset) + self.prttime(' CPL:WAV_RUN ', offset=rof.offset) + self.prttime(' CPL:OCNT_RUN_BARRIER ', offset=ocn.offset, div=odays) + self.prttime(' CPL:OCNT_RUN ', offset=ocn.offset, div=odays) + self.prttime(' CPL:O2CT_BARRIER ', offset=ocn.offset, div=odays, + coff=cpl.offset) + self.prttime(' CPL:O2CT ', offset=ocn.offset, div=odays, + coff=cpl.offset) + self.prttime(' CPL:OCNPOSTT_BARRIER ') + self.prttime(' CPL:OCNPOSTT ') + self.prttime(' CPL:ATMOCNP_BARRIER ') + self.prttime(' CPL:ATMOCNP ') + self.prttime(' CPL:L2C_BARRIER ', offset=lnd.offset, coff=cpl.offset) + self.prttime(' CPL:L2C ', offset=lnd.offset, div=cpl.offset) + self.prttime(' CPL:LNDPOST_BARRIER ') + self.prttime(' CPL:LNDPOST ') + self.prttime(' CPL:GLCPREP_BARRIER ') + self.prttime(' CPL:GLCPREP ') + self.prttime(' CPL:C2G_BARRIER ', offset=glc.offset, coff=cpl.offset) + self.prttime(' CPL:C2G ', offset=glc.offset, coff=cpl.offset) + self.prttime(' CPL:R2C_BARRIER ', offset=rof.offset, coff=cpl.offset) + self.prttime(' CPL:R2C ', offset=rof.offset, coff=cpl.offset) + self.prttime(' CPL:ROFPOST_BARRIER ') + self.prttime(' CPL:ROFPOST ') + self.prttime(' CPL:BUDGET1_BARRIER ') + self.prttime(' CPL:BUDGET1 ') + self.prttime(' CPL:I2C_BARRIER ', offset=ice.offset, coff=cpl.offset) + self.prttime(' CPL:I2C ', offset=ice.offset, coff=cpl.offset) + self.prttime(' CPL:ICEPOST_BARRIER ') + self.prttime(' CPL:ICEPOST ') + self.prttime(' CPL:FRACSET_BARRIER ') + self.prttime(' CPL:FRACSET ') + self.prttime(' CPL:ATMOCN2_BARRIER ') + self.prttime(' CPL:ATMOCN2 ') + self.prttime(' CPL:OCNPRE2_BARRIER ') + self.prttime(' CPL:OCNPRE2 ') + self.prttime(' CPL:C2O2_BARRIER ', offset=ocn.offset, div=odays, + coff=cpl.offset) + self.prttime(' CPL:C2O2 ', offset=ocn.offset, div=odays, + coff=cpl.offset) + self.prttime(' CPL:ATMOCNQ_BARRIER') + self.prttime(' CPL:ATMOCNQ ') + self.prttime(' CPL:ATMPREP_BARRIER ') + self.prttime(' CPL:ATMPREP ') + self.prttime(' CPL:C2A_BARRIER ', offset=atm.offset, coff=cpl.offset) + self.prttime(' CPL:C2A ', offset=atm.offset, coff=cpl.offset) + self.prttime(' CPL:OCN_RUN_BARRIER ', offset=ocn.offset, div=odays) + self.prttime(' CPL:OCN_RUN ', offset=ocn.offset, div=odays) + self.prttime(' CPL:ATM_RUN_BARRIER ', offset=atm.offset) + self.prttime(' CPL:ATM_RUN ', offset=atm.offset) + self.prttime(' CPL:GLC_RUN_BARRIER ', offset=glc.offset) + self.prttime(' CPL:GLC_RUN ', offset=glc.offset) + self.prttime(' CPL:W2C_BARRIER ', offset=glc.offset, coff=cpl.offset) + self.prttime(' CPL:W2C ', offset=glc.offset, coff=cpl.offset) + self.prttime(' CPL:WAVPOST_BARRIER ') + self.prttime(' CPL:WAVPOST ', cpl.offset) + self.prttime(' CPL:G2C_BARRIER ', offset=glc.offset, coff=cpl.offset) + self.prttime(' CPL:G2C ', offset=glc.offset, coff=cpl.offset) + self.prttime(' CPL:GLCPOST_BARRIER ') + self.prttime(' CPL:GLCPOST ') + self.prttime(' CPL:A2C_BARRIER ', offset=atm.offset, coff=cpl.offset) + self.prttime(' CPL:A2C ', offset=atm.offset, coff=cpl.offset) + self.prttime(' CPL:ATMPOST_BARRIER ') + self.prttime(' CPL:ATMPOST ') + self.prttime(' CPL:BUDGET2_BARRIER ') + self.prttime(' CPL:BUDGET2 ') + self.prttime(' CPL:BUDGET3_BARRIER ') + self.prttime(' CPL:BUDGET3 ') + self.prttime(' CPL:BUDGETF_BARRIER ') + self.prttime(' CPL:BUDGETF ') + self.prttime(' CPL:O2C_BARRIER ', offset=ocn.offset, + div=odays, coff=cpl.offset) + self.prttime(' CPL:O2C ', offset=ocn.offset, div=odays, coff=cpl.offset) + self.prttime(' CPL:OCNPOST_BARRIER ') + self.prttime(' CPL:OCNPOST ') + self.prttime(' CPL:RESTART_BARRIER ') + self.prttime(' CPL:RESTART') + self.prttime(' CPL:HISTORY_BARRIER ') + self.prttime(' CPL:HISTORY ') + self.prttime(' CPL:TSTAMP_WRITE ') + self.prttime(' CPL:TPROF_WRITE ') + self.prttime(' CPL:RUN_LOOP_BSTOP ') + + self.write("\n\n") + self.write("More info on coupler timing:\n") + + self.write("\n") + self.prttime(' CPL:OCNPRE1 ') + self.prttime(' CPL:ocnpre1_atm2ocn ') + + self.write("\n") + self.prttime(' CPL:OCNPREP ') + self.prttime(' CPL:OCNPRE2 ') + self.prttime(' CPL:ocnprep_avg ') + self.prttime(' CPL:ocnprep_diagav ') + + self.write("\n") + self.prttime(' CPL:LNDPREP ') + self.prttime(' CPL:lndprep_atm2lnd ') + self.prttime(' CPL:lndprep_mrgx2l ') + self.prttime(' CPL:lndprep_diagav ') + + self.write("\n") + self.prttime(' CPL:ICEPREP ') + self.prttime(' CPL:iceprep_ocn2ice ') + self.prttime(' CPL:iceprep_atm2ice ') + self.prttime(' CPL:iceprep_mrgx2i ') + self.prttime(' CPL:iceprep_diagav ') + + self.write("\n") + self.prttime(' CPL:WAVPREP ') + self.prttime(' CPL:wavprep_atm2wav ') + self.prttime(' CPL:wavprep_ocn2wav ') + self.prttime(' CPL:wavprep_ice2wav ') + self.prttime(' CPL:wavprep_mrgx2w ') + self.prttime(' CPL:wavprep_diagav ') + + self.write("\n") + self.prttime(' CPL:ROFPREP ') + self.prttime(' CPL:rofprep_l2xavg ') + self.prttime(' CPL:rofprep_lnd2rof ') + self.prttime(' CPL:rofprep_mrgx2r ') + self.prttime(' CPL:rofprep_diagav ') + + self.write("\n") + self.prttime(' CPL:GLCPREP ') + self.prttime(' CPL:glcprep_avg ') + self.prttime(' CPL:glcprep_lnd2glc ') + self.prttime(' CPL:glcprep_mrgx2g ') + self.prttime(' CPL:glcprep_diagav ') + + self.write("\n") + self.prttime(' CPL:ATMPREP ') + self.prttime(' CPL:atmprep_xao2atm ') + self.prttime(' CPL:atmprep_ocn2atm ') + self.prttime(' CPL:atmprep_alb2atm ') + self.prttime(' CPL:atmprep_ice2atm ') + self.prttime(' CPL:atmprep_lnd2atm ') + self.prttime(' CPL:atmprep_mrgx2a ') + self.prttime(' CPL:atmprep_diagav ') + + self.write("\n") + self.prttime(' CPL:ATMOCNP ') + self.prttime(' CPL:ATMOCN1 ') + self.prttime(' CPL:ATMOCN2 ') + self.prttime(' CPL:atmocnp_ice2ocn ') + self.prttime(' CPL:atmocnp_wav2ocn ') + self.prttime(' CPL:atmocnp_fluxo ') + self.prttime(' CPL:atmocnp_fluxe ') + self.prttime(' CPL:atmocnp_mrgx2o ') + self.prttime(' CPL:atmocnp_accum ') + self.prttime(' CPL:atmocnp_ocnalb ') + + self.write("\n") + self.prttime(' CPL:ATMOCNQ ') + self.prttime(' CPL:atmocnq_ocn2atm ') + self.prttime(' CPL:atmocnq_fluxa ') + self.prttime(' CPL:atmocnq_atm2ocnf ') + + self.write("\n") + self.prttime(' CPL:OCNPOSTT ') + self.prttime(' CPL:OCNPOST ') + self.prttime(' CPL:ocnpost_diagav ') + + self.write("\n") + self.prttime(' CPL:LNDPOST ') + self.prttime(' CPL:lndpost_diagav ') + self.prttime(' CPL:lndpost_acc2lr ') + self.prttime(' CPL:lndpost_acc2lg ') + + self.write("\n") + self.prttime(' CPL:ROFOST ') + self.prttime(' CPL:rofpost_diagav ') + self.prttime(' CPL:rofpost_histaux ') + self.prttime(' CPL:rofpost_rof2lnd ') + self.prttime(' CPL:rofpost_rof2ice ') + self.prttime(' CPL:rofpost_rof2ocn ') + + self.write("\n") + self.prttime(' CPL:ICEPOST ') + self.prttime(' CPL:icepost_diagav ') + + self.write("\n") + self.prttime(' CPL:WAVPOST ') + self.prttime(' CPL:wavpost_diagav ') + + self.write("\n") + self.prttime(' CPL:GLCPOST ') + self.prttime(' CPL:glcpost_diagav ') + self.prttime(' CPL:glcpost_glc2lnd ') + self.prttime(' CPL:glcpost_glc2ice ') + self.prttime(' CPL:glcpost_glc2ocn ') + + self.write("\n") + self.prttime(' CPL:ATMPOST ') + self.prttime(' CPL:atmpost_diagav ') + + self.write("\n") + self.prttime(' CPL:BUDGET ') + self.prttime(' CPL:BUDGET1 ') + self.prttime(' CPL:BUDGET2 ') + self.prttime(' CPL:BUDGET3 ') + self.prttime(' CPL:BUDGETF ') + self.write("\n\n") + + self.fout.close() + +def get_timing(case, lid): + parser = _TimingParser(case, lid) + parser.getTiming() diff --git a/utils/python/CIME/provenance.py b/utils/python/CIME/provenance.py new file mode 100644 index 000000000000..e2378429ab79 --- /dev/null +++ b/utils/python/CIME/provenance.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python + +""" +Library for saving build/run provenance. +""" + +from CIME.XML.standard_module_setup import * +from CIME.utils import touch, gzip_existing_file + +import tarfile, getpass, signal, glob, shutil + +logger = logging.getLogger(__name__) + +def _get_batch_job_id(case): + mach = case.get_value("MACH") + if mach == 'titan': + return os.environ("PBS_JOBID") + elif mach in ['edison', 'corip1']: + return os.environ("SLURM_JOB_ID") + elif mach == 'mira': + return os.environ("COBALT_JOBID") + else: + return None + +def save_build_provenance_acme(case, lid=None): # pylint: disable=unused-argument + pass + +def save_build_provenance_cesm(case, lid=None): # pylint: disable=unused-argument + pass + +def save_build_provenance(case, lid=None): + model = case.get_value("MODEL") + if model == "acme": + save_build_provenance_acme(case, lid=lid) + elif model == "cesm": + save_build_provenance_cesm(case, lid=lid) + +def save_prerun_provenance_acme(case, lid=None): + if not case.get_value("SAVE_TIMING") or case.get_value("MODEL") != "acme": + return + + lid = os.environ["LID"] if lid is None else lid + timing_dir = case.get_value("SAVE_TIMING_DIR") + if timing_dir is None or timing_dir == 'UNSET': + logger.warning("ACME requires SAVE_TIMING_DIR to be set in order to save timings. Skipping save timings") + return + + logger.warn("timing dir is %s"%timing_dir) + rundir = case.get_value("RUNDIR") + caseroot = case.get_value("CASEROOT") + cimeroot = case.get_value("CIMEROOT") + base_case = case.get_value("CASEBASEID") + full_timing_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), base_case, lid) + expect(not os.path.exists(full_timing_dir), "%s already exists" % full_timing_dir) + + os.makedirs(full_timing_dir) + mach = case.get_value("MACH") + compiler = case.get_value("COMPILER") + + # For some batch machines save queue info + job_id = _get_batch_job_id(case) + if mach == "mira": + for cmd, filename in [("qstat -lf", "qstatf"), ("qstat -lf %s" % job_id, "qstatf_jobid")]: + run_cmd_no_fail("%s > %s.%s" % (cmd, filename, lid), from_dir=full_timing_dir) + gzip_existing_file(os.path.join(full_timing_dir, filename)) + + elif mach == ["corip1", "edison"]: + for cmd, filename in [("sqs -f", "sqsf"), ("sqs -w -a", "sqsw"), ("sqs -f %s" % job_id, "sqsf_jobid"), ("squeue", "squeuef")]: + run_cmd_no_fail("%s > %s.%s" % (cmd, filename, lid), from_dir=full_timing_dir) + gzip_existing_file(os.path.join(full_timing_dir, filename)) + + elif mach == "titan": + for cmd, filename in [("xtdb2proc -f xtdb2proc", "xtdb2procf"), + ("qstat -f > qstat", "qstatf"), + ("qstat -f %s > qstatf_jobid" % job_id, "qstatf_jobid"), + ("xtnodestat > xtnodestat", "xtnodestatf"), + ("showq > showqf", "showqf")]: + run_cmd_no_fail(cmd + "." + lid, from_dir=full_timing_dir) + gzip_existing_file(os.path.join(full_timing_dir, filename + "." + lid)) + + mdiag_reduce = os.path.join(full_timing_dir, "mdiag_reduce." + lid) + run_cmd_no_fail("./mdiag_reduce.csh > %s" % mdiag_reduce, from_dir=os.path.join(caseroot, "Tools")) + gzip_existing_file(mdiag_reduce) + + # copy/tar SourceModes + source_mods_dir = os.path.join(caseroot, "SourceMods") + if os.path.isdir(source_mods_dir): + with tarfile.open(os.path.join(full_timing_dir, "SourceMods.%s.tar.gz" % lid), "w:gz") as tfd: + tfd.add(source_mods_dir) + + # Save various case configuration items + case_docs = os.path.join(full_timing_dir, "CaseDocs") + os.mkdir(case_docs) + globs_to_copy = [ + "CaseDocs/*", + "*.run", + "*.xml", + "user_nl_*", + "*env_mach_specific*", + "Macros", + "README.case", + "Depends.%s" % mach, + "Depends.%s" % compiler, + "Depends.%s.%s" % (mach, compiler), + "software_environment.txt" + ] + for glob_to_copy in globs_to_copy: + for item in glob.glob(os.path.join(caseroot, glob_to_copy)): + shutil.copy(item, os.path.join(case_docs, os.path.basename(item) + "." + lid)) + + if job_id is not None: + sample_interval = case.get_value("SYSLOG_N") + if sample_interval > 0: + archive_checkpoints = os.path.join(full_timing_dir, "checkpoints") + os.mkdir(archive_checkpoints) + touch("%s/acme.log.%s" % (rundir, lid)) + syslog_jobid = run_cmd_no_fail("./mach_syslog %d %s %s %s %s/timing/checkpoints %s/checkpoints >& /dev/null & echo $!" % + (sample_interval, job_id, lid, rundir, rundir, archive_checkpoints), + from_dir=os.path.join(caseroot, "Tools")) + with open(os.path.join(rundir, "syslog_jobid", ".%s" % job_id), "w") as fd: + fd.write("%s\n" % syslog_jobid) + + # Save state of repo + run_cmd_no_fail("git describe > %s" % os.path.join(full_timing_dir, "GIT_DESCRIBE"), from_dir=cimeroot) + +def save_prerun_provenance_cesm(case, lid=None): # pylint: disable=unused-argument + pass + +def save_prerun_provenance(case, lid=None): + model = case.get_value("MODEL") + if model == "acme": + save_prerun_provenance_acme(case, lid=lid) + elif model == "cesm": + save_prerun_provenance_cesm(case, lid=lid) + +def save_postrun_provenance_cesm(case, lid=None): + save_timing = case.get_value("SAVE_TIMING") + if save_timing: + lid = os.environ["LID"] if lid is None else lid + rundir = case.get_value("RUNDIR") + timing_dir = case.get_value("SAVE_TIMING_DIR") + timing_dir = os.path.join(timing_dir, case.get_value("CASE")) + shutil.move(os.path.join(rundir,"timing"), + os.path.join(timing_dir,"timing."+lid)) + +def save_postrun_provenance_acme(case, lid): + save_timing = case.get_value("SAVE_TIMING") + if not save_timing: + return + + lid = os.environ["LID"] if lid is None else lid + rundir = case.get_value("RUNDIR") + timing_dir = case.get_value("SAVE_TIMING_DIR") + caseroot = case.get_value("CASEROOT") + mach = case.get_value("MACH") + base_case = case.get_value("CASEBASEID") + full_timing_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), base_case, lid) + + if not os.path.isdir(full_timing_dir): + os.makedirs(full_timing_dir) + + # Kill mach_syslog + job_id = _get_batch_job_id(case) + if job_id is not None: + syslog_jobid_path = os.path.join(rundir, "syslog_jobid", ".%s" % job_id) + if os.path.exists(syslog_jobid_path): + try: + with open(syslog_jobid_path, "r") as fd: + syslog_jobid = int(fd.read().strip()) + os.kill(syslog_jobid, signal.SIGTERM) + except (ValueError, OSError) as e: + logger.warning("Failed to kill syslog: %s" % e) + finally: + os.remove(syslog_jobid_path) + + # copy/tar timings + with tarfile.open(os.path.join(full_timing_dir, "timing.%s.tar.gz" % lid), "w:gz") as tfd: + tfd.add(os.path.join(rundir, "timing")) + + # + # save output files and logs + # + globs_to_copy = [] + if mach == "titan": + globs_to_copy.append("%s*OU" % job_id) + elif mach == "mira": + globs_to_copy.append("%s*output" % job_id) + globs_to_copy.append("%s*cobaltlog" % job_id) + elif mach in ["edison", "corip1"]: + globs_to_copy.append("%s" % case.get_value("CASE")) + + globs_to_copy.append("logs/acme.log.%s.gz" % lid) + globs_to_copy.append("logs/cpl.log.%s.gz" % lid) + globs_to_copy.append("timing/*.%s" % lid) + globs_to_copy.append("CaseStatus") + + for glob_to_copy in globs_to_copy: + for item in glob.glob(os.path.join(caseroot, glob_to_copy)): + shutil.copy(item, full_timing_dir) + +def save_postrun_provenance(case, lid=None): + model = case.get_value("MODEL") + if model == "acme": + save_postrun_provenance_acme(case, lid=lid) + elif model == "cesm": + save_postrun_provenance_cesm(case, lid=lid) + diff --git a/utils/python/CIME/test_scheduler.py b/utils/python/CIME/test_scheduler.py index faba0a5786eb..4578607213aa 100644 --- a/utils/python/CIME/test_scheduler.py +++ b/utils/python/CIME/test_scheduler.py @@ -459,7 +459,6 @@ def _xml_phase(self, test): envtest.set_initial_values(case) if self._save_timing: case.set_value("SAVE_TIMING", True) - case.set_value("CHECK_TIMING", True) return True diff --git a/utils/python/CIME/utils.py b/utils/python/CIME/utils.py index 0411055b1418..74854d7f808c 100644 --- a/utils/python/CIME/utils.py +++ b/utils/python/CIME/utils.py @@ -2,7 +2,7 @@ Common functions used by cime python scripts Warning: you cannot use CIME Classes in this module as it causes circular dependencies """ -import logging, gzip, sys, os, time, re, shutil +import logging, gzip, sys, os, time, re, shutil, glob # Return this error code if the scripts worked but tests failed TESTS_FAILED_ERR_CODE = 100 @@ -923,3 +923,24 @@ def find_system_test(testname, case): mod = import_module(path) return getattr(mod, m) +def _get_most_recent_lid_impl(files): + """ + >>> files = ['/foo/bar/acme.log.20160905_111212', '/foo/bar/acme.log.20160906_111212.gz'] + >>> _get_most_recent_lid_impl(files) + ['20160905_111212', '20160906_111212'] + """ + results = [] + for item in files: + basename = os.path.basename(item) + components = basename.split(".") + if len(components) > 2: + results.append(components[2]) + else: + logger.warning("Apparent model log file '%s' did not conform to expected name format" % item) + + return sorted(results) + +def get_lids(case): + model = case.get_value("MODEL") + rundir = case.get_value("RUNDIR") + return _get_most_recent_lid_impl(glob.glob("%s/%s.log*" % (rundir, model))) diff --git a/utils/python/tests/scripts_regression_tests.py b/utils/python/tests/scripts_regression_tests.py index b7b2579f801e..f37af457b76f 100755 --- a/utils/python/tests/scripts_regression_tests.py +++ b/utils/python/tests/scripts_regression_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import io, glob, os, re, shutil, signal, sys, tempfile, \ - threading, time, logging, unittest + threading, time, logging, unittest, getpass from xml.etree.ElementTree import ParseError @@ -11,7 +11,7 @@ import subprocess subprocess.call('/bin/rm $(find . -name "*.pyc")', shell=True, cwd=LIB_DIR) -from CIME.utils import run_cmd, run_cmd_no_fail +from CIME.utils import run_cmd, run_cmd_no_fail, get_lids import update_acme_tests import CIME.test_scheduler, CIME.wait_for_tests from CIME.test_scheduler import TestScheduler @@ -984,9 +984,10 @@ class L_TestSaveTimings(TestCreateTestCommon): ############################################################################### ########################################################################### - def test_save_timings(self): + def simple_test(self, manual_timing=False): ########################################################################### - create_test_cmd = "%s/create_test SMS_Ln9_Mmpi-serial.f19_g16_rx1.A --save-timing --walltime 0:15:00 -t %s" % (SCRIPT_DIR, self._baseline_name) + timing_flag = "" if manual_timing else "--save-timing" + create_test_cmd = "%s/create_test SMS_Ln9_Mmpi-serial.f19_g16_rx1.A %s --walltime 0:15:00 -t %s" % (SCRIPT_DIR, timing_flag, self._baseline_name) if NO_BATCH: create_test_cmd += " --no-batch" @@ -995,6 +996,33 @@ def test_save_timings(self): run_cmd_assert_result(self, "%s/wait_for_tests *%s/TestStatus" % (TOOLS_DIR, self._baseline_name), from_dir=self._testroot) + statuses = glob.glob("%s/*%s/TestStatus" % (self._testroot, self._baseline_name)) + self.assertEqual(len(statuses), 1, msg="Should have had exactly one match, found %s" % statuses) + casedir = os.path.dirname(statuses[0]) + + with Case(casedir, read_only=True) as case: + lids = get_lids(case) + timing_dir = case.get_value("SAVE_TIMING_DIR") + casename = case.get_value("CASEBASEID") + + self.assertEqual(len(lids), 1, msg="Expected one LID, found %s" % lids) + + if manual_timing: + run_cmd_assert_result(self, "cd %s && %s/save_provenance postrun" % (casedir, TOOLS_DIR)) + + provenance_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), casename, lids[0]) + self.assertTrue(os.path.isdir(provenance_dir), msg="'%s' was missing" % provenance_dir) + + ########################################################################### + def test_save_timings(self): + ########################################################################### + self.simple_test() + + ########################################################################### + def test_save_timings_manual(self): + ########################################################################### + self.simple_test(manual_timing=True) + ############################################################################### class C_TestXMLQuery(unittest.TestCase): # Testing command line scripts