From ced19d84a03723d4bbe537184e2fa370671afe62 Mon Sep 17 00:00:00 2001 From: Michael Deakin Date: Thu, 15 Jun 2017 12:40:50 -0600 Subject: [PATCH 1/3] Initial work on fixing branch runs. Also start work on removal of sed commands --- run_acme.template.csh | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) mode change 100755 => 100644 run_acme.template.csh diff --git a/run_acme.template.csh b/run_acme.template.csh old mode 100755 new mode 100644 index 5cebd98f7151..7c18ddf1a76b --- a/run_acme.template.csh +++ b/run_acme.template.csh @@ -1001,17 +1001,23 @@ endif mkdir -p batch_output ### Make directory that stdout and stderr will go into. +set batch_options = '' + set machine = `lowercase $machine` # Change to lowercase, just to make the following easier to read. if ( $machine == hopper ) then + # TODO: Pass the correct options to the queue through batch_options + set batch_options = "" sed -i /"#PBS \( \)*-N"/c"#PBS -N ${job_name}" ${case_run_exe} sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' ${case_run_exe} + sed -i /"#PBS \( \)*-N"/c"#PBS -N ST+${job_name}" $shortterm_archive_script sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' $shortterm_archive_script sed -i /"#PBS \( \)*-N"/c"#PBS -N LT+${job_name}" $longterm_archive_script sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' $longterm_archive_script else if ( $machine == cori || $machine == edison ) then + set batch_options = "--job-name=${job_name} --account=${project} --output=batch_output/${case_name}.o" sed -i /"#SBATCH \( \)*--job-name"/c"#SBATCH --job-name=${job_name}" ${case_run_exe} sed -i /"#SBATCH \( \)*--job-name"/a"#SBATCH --account=${project}" ${case_run_exe} sed -i /"#SBATCH \( \)*--output"/c"#SBATCH --output=batch_output/"${case_name}'.o%j' ${case_run_exe} @@ -1024,6 +1030,7 @@ else if ( $machine == cori || $machine == edison ) then sed -i /"#SBATCH \( \)*--output"/c'#SBATCH --output=batch_output/LT+'${case_name}'.o%j' $longterm_archive_script else if ( $machine == titan || $machine == eos ) then + set batch_options = "" sed -i /"#PBS \( \)*-N"/c"#PBS -N ${job_name}" ${case_run_exe} sed -i /"#PBS \( \)*-A"/c"#PBS -A ${project}" ${case_run_exe} sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' ${case_run_exe} @@ -1153,6 +1160,7 @@ else if ( $model_start_type == 'branch' ) then acme_print '$restart_filedate = '$restart_filedate ### the next line gets the YYYY-MM of the month before the restart time. Needed for staging history files. + ### NOTE: This is broken for cases that have run for less than a month set restart_prevdate = `date -d "${restart_filedate} - 1 month" +%Y-%m` acme_print '$restart_prevdate = '$restart_prevdate @@ -1165,9 +1173,9 @@ else if ( $model_start_type == 'branch' ) then cp ${restart_files_dir}/${restart_case_name}.cpl.r.${restart_filedate}-00000.nc $case_run_dir cp ${restart_files_dir}/${restart_case_name}.mosart.r.${restart_filedate}-00000.nc $case_run_dir cp ${restart_files_dir}/${restart_case_name}.mosart.rh0.${restart_filedate}-00000.nc $case_run_dir - cp ${restart_files_dir}/rst.mpas-cice.${restart_filedate}_00.00.00.nc $case_run_dir - cp ${restart_files_dir}/rst.mpas-o.${restart_filedate}_00.00.00.nc $case_run_dir - cp ${restart_files_dir}/${restart_case_name}.cam.h0.${restart_prevdate}.nc $case_run_dir + cp ${restart_files_dir}/mpascice.rst.${restart_filedate}_00000.nc $case_run_dir + cp ${restart_files_dir}/mpaso.rst.${restart_filedate}_00000.nc $case_run_dir + cp ${restart_files_dir}/${restart_case_name}.cam.h0.${restart_prevdate}-*-00000.nc $case_run_dir cp ${restart_files_dir}/${restart_case_name}.mosart.h0.${restart_prevdate}.nc $case_run_dir cp ${restart_files_dir}/${restart_case_name}.clm2.h0.${restart_prevdate}.nc $case_run_dir cp ${restart_files_dir}/rpointer* $case_run_dir @@ -1176,7 +1184,9 @@ else if ( $model_start_type == 'branch' ) then $xmlchange_exe --id RUN_REFCASE --val $restart_case_name $xmlchange_exe --id RUN_REFDATE --val $restart_filedate # Model date of restart file $xmlchange_exe --id CONTINUE_RUN --val "FALSE" - $xmlchange_exe --id BRNCH_RETAIN_CASENAME --val "FALSE" ## Only TRUE if you want to continue the run with the same name (risky)!! + # Currently broken in CIME + # Only uncomment this if you want to continue the run with the same name (risky)!! + # $xmlchange_exe --id BRNCH_RETAIN_CASENAME --val "TRUE" else @@ -1211,7 +1221,7 @@ acme_newline if ( `lowercase $submit_run` == 'true' ) then if ( $num_submits == 1 ) then acme_print ' SUBMITTING A SINGLE JOB.' - ${case_submit_exe} + ${case_submit_exe} --batch-args " ${batch_options} --dependency=afterany:1769195 " else if ( $num_submits <= 0 ) then acme_print '$num_submits <= 0 so NOT submitting a job.' acme_print '$num_submits = '$num_submits From f420e3f09170129a0851f370d9e843e974819e2c Mon Sep 17 00:00:00 2001 From: Michael Deakin Date: Fri, 16 Jun 2017 14:34:50 -0700 Subject: [PATCH 2/3] Tested branch runs on Edison; also cleaned up the batch options --- run_acme.template.csh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) mode change 100644 => 100755 run_acme.template.csh diff --git a/run_acme.template.csh b/run_acme.template.csh old mode 100644 new mode 100755 index 7c18ddf1a76b..c816d034fc99 --- a/run_acme.template.csh +++ b/run_acme.template.csh @@ -1017,10 +1017,7 @@ if ( $machine == hopper ) then sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' $longterm_archive_script else if ( $machine == cori || $machine == edison ) then - set batch_options = "--job-name=${job_name} --account=${project} --output=batch_output/${case_name}.o" - sed -i /"#SBATCH \( \)*--job-name"/c"#SBATCH --job-name=${job_name}" ${case_run_exe} - sed -i /"#SBATCH \( \)*--job-name"/a"#SBATCH --account=${project}" ${case_run_exe} - sed -i /"#SBATCH \( \)*--output"/c"#SBATCH --output=batch_output/"${case_name}'.o%j' ${case_run_exe} + set batch_options = "--job-name=${job_name} --output=batch_output/${case_name}.o%j" sed -i /"#SBATCH \( \)*--job-name"/c"#SBATCH --job-name=ST+${job_name}" $shortterm_archive_script sed -i /"#SBATCH \( \)*--job-name"/a"#SBATCH --account=${project}" $shortterm_archive_script @@ -1045,6 +1042,8 @@ else acme_print ' Assuming default ACME values.' endif +acme_print "Batch options: ${batch_options}" + #============================================ # QUEUE OPTIONS #============================================ @@ -1221,7 +1220,7 @@ acme_newline if ( `lowercase $submit_run` == 'true' ) then if ( $num_submits == 1 ) then acme_print ' SUBMITTING A SINGLE JOB.' - ${case_submit_exe} --batch-args " ${batch_options} --dependency=afterany:1769195 " + ${case_submit_exe} --batch-args " ${batch_options} " else if ( $num_submits <= 0 ) then acme_print '$num_submits <= 0 so NOT submitting a job.' acme_print '$num_submits = '$num_submits From 56215dd32650e6dad795872cefb056cb3b99bcf9 Mon Sep 17 00:00:00 2001 From: Michael Deakin Date: Mon, 19 Jun 2017 12:28:35 -0700 Subject: [PATCH 3/3] Minor cleanups Update version number Remove hopper from consideration when updating the case.run file --- run_acme.template.csh | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/run_acme.template.csh b/run_acme.template.csh index c816d034fc99..ab9fefff1fef 100755 --- a/run_acme.template.csh +++ b/run_acme.template.csh @@ -210,7 +210,7 @@ set cpl_hist_num = 1 #=========================================== # VERSION OF THIS SCRIPT #=========================================== -set script_ver = 3.0.8 +set script_ver = 3.0.9 #=========================================== # DEFINE ALIASES @@ -1005,18 +1005,7 @@ set batch_options = '' set machine = `lowercase $machine` # Change to lowercase, just to make the following easier to read. -if ( $machine == hopper ) then - # TODO: Pass the correct options to the queue through batch_options - set batch_options = "" - sed -i /"#PBS \( \)*-N"/c"#PBS -N ${job_name}" ${case_run_exe} - sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' ${case_run_exe} - - sed -i /"#PBS \( \)*-N"/c"#PBS -N ST+${job_name}" $shortterm_archive_script - sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' $shortterm_archive_script - sed -i /"#PBS \( \)*-N"/c"#PBS -N LT+${job_name}" $longterm_archive_script - sed -i /"#PBS \( \)*-j oe"/a'#PBS -o batch_output/${PBS_JOBNAME}.o${PBS_JOBID}' $longterm_archive_script - -else if ( $machine == cori || $machine == edison ) then +if ( $machine == cori || $machine == edison ) then set batch_options = "--job-name=${job_name} --output=batch_output/${case_name}.o%j" sed -i /"#SBATCH \( \)*--job-name"/c"#SBATCH --job-name=ST+${job_name}" $shortterm_archive_script @@ -1042,8 +1031,6 @@ else acme_print ' Assuming default ACME values.' endif -acme_print "Batch options: ${batch_options}" - #============================================ # QUEUE OPTIONS #============================================ @@ -1366,6 +1353,7 @@ acme_newline # Note that this breaks compatibility with older versions of CIME # Also add a fix to reenable using the special acme qos queue on Edison (MD) # 3.0.8 2017-05-24 Fixed minor bug when $machine contained a capital letter. Bug was introduced recently. (PJC) +# 3.0.9 2017-06-19 Fixed branch runs. Also removed sed commands for case.run and use --batch-args in case.submit (MD) # # NOTE: PJC = Philip Cameron-Smith, PMC = Peter Caldwell, CG = Chris Golaz, MD = Michael Deakin