From d8d72135c43d1fb44ef1be7e4de65b6f3c20d0df Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Thu, 3 Jul 2014 14:27:59 +0100 Subject: [PATCH 01/13] #119: restore runahead limit --- dev/suites/integer/one/suite.rc | 2 +- doc/suiterc.tex | 11 ++++++----- examples/future-trigger/suite.rc | 4 ++-- lib/cylc/cfgspec/suite.py | 22 +++++++++++++++++----- lib/cylc/config.py | 19 ++++++++++--------- lib/cylc/scheduler.py | 2 +- tests/cyclers/integer1/suite.rc | 2 +- tests/cylc-get-config/00-simple.t | 1 - tests/periodicals/Monthly-reorder/suite.rc | 2 +- tests/reload/runahead/suite.rc | 1 - tests/restart/reload/suite.rc | 2 +- tests/runahead/no_final/suite.rc | 2 +- tests/runahead/runahead/suite.rc | 2 +- 13 files changed, 42 insertions(+), 30 deletions(-) diff --git a/dev/suites/integer/one/suite.rc b/dev/suites/integer/one/suite.rc index 87c80c264a0..c0592d306a9 100644 --- a/dev/suites/integer/one/suite.rc +++ b/dev/suites/integer/one/suite.rc @@ -2,7 +2,7 @@ [scheduling] initial cycle point = 1 final cycle point = 16 - runahead factor = 4 + runahead limit = 12 cycling mode = integer [[special tasks]] sequential = seq diff --git a/doc/suiterc.tex b/doc/suiterc.tex index da50d0bcda3..23c1da5aecc 100644 --- a/doc/suiterc.tex +++ b/doc/suiterc.tex @@ -582,17 +582,18 @@ \subsection{[scheduling]} \end{myitemize} -\subsubsection[runahead limit]{[scheduling] $\rightarrow$ runahead factor} +\subsubsection[runahead limit]{[scheduling] $\rightarrow$ runahead limit} The suite runahead limit prevents the fastest tasks in a suite from getting too far ahead of the slowest ones, as documented in Section~\ref{RunaheadLimit}. Tasks exceeding the limit are put into a special runahead held state until slower tasks have caught up -sufficiently. The limit computed as this factor of the suite's minimum -cycling interval. +sufficiently. The default limit is computed as a factor (2) of the +suite's minimum cycling interval. \begin{myitemize} - \item {\em type:} integer multiple of the suite's minimum cycling interval - \item {\em default:} $2$ + \item {\em type:} ISO 8601 interval string e.g. \lstinline=PT12H= + for a 12 hour limit. + \item {\em default:} (twice the minimum cycling interval in the suite) \end{myitemize} \subsubsection[{[[}queues{]]}]{[scheduling] $\rightarrow$ [[queues]]} diff --git a/examples/future-trigger/suite.rc b/examples/future-trigger/suite.rc index c2ba000e495..c16e66d5d50 100644 --- a/examples/future-trigger/suite.rc +++ b/examples/future-trigger/suite.rc @@ -1,7 +1,7 @@ [scheduling] - initial cycle point = 2010080800 - runahead factor = 4 + initial cycle point = 20100808T00 + runahead limit = P1D [[special tasks]] cold-start = cold [[dependencies]] diff --git a/lib/cylc/cfgspec/suite.py b/lib/cylc/cfgspec/suite.py index 1cd55eeb7dc..e1f9d30903e 100644 --- a/lib/cylc/cfgspec/suite.py +++ b/lib/cylc/cfgspec/suite.py @@ -25,10 +25,24 @@ from parsec.config import config from isodatetime.dumpers import TimePointDumper from isodatetime.data import TimePoint -from isodatetime.parsers import TimePointParser +from isodatetime.parsers import TimePointParser, TimeIntervalParser "Define all legal items and values for cylc suite definition files." +def _coerce_cycleinterval( value, keys, args ): + """Coerce value to a cycle interval.""" + value = _strip_and_unquote( keys, value ) + if value.isdigit(): + # Old runahead limit format. + return "PT%dH" % int(value) + parser = TimeIntervalParser() + try: + parser.parse(value) + except ValueError: + raise IllegalValueError("interval", keys, value) + return value + + def _coerce_cycletime( value, keys, args ): """Coerce value to a cycle point.""" value = _strip_and_unquote( keys, value ) @@ -100,10 +114,10 @@ def _coerce_cycletime_time_zone( value, keys, args ): raise IllegalValueError("cycle point time zone format", keys, value) return value - coercers['cycletime'] = _coerce_cycletime coercers['cycletime_format'] = _coerce_cycletime_format coercers['cycletime_time_zone'] = _coerce_cycletime_time_zone +coercers['cycleinterval'] = _coerce_cycleinterval SPEC = { @@ -168,7 +182,7 @@ def _coerce_cycletime_time_zone( value, keys, args ): 'initial cycle point' : vdr(vtype='cycletime'), 'final cycle point' : vdr(vtype='cycletime'), 'cycling mode' : vdr(vtype='string', default="gregorian", options=["360day","gregorian","integer"] ), - 'runahead factor' : vdr(vtype='integer', default=2 ), + 'runahead limit' : vdr(vtype='cycleinterval' ), 'queues' : { 'default' : { 'limit' : vdr( vtype='integer', default=0), @@ -320,8 +334,6 @@ def upg( cfg, descr ): ['visualization', 'final cycle time'], ['visualization', 'final cycle point'], converter( lambda x: x, 'changed naming to reflect non-date-time cycling' ) ) - u.deprecate( '6.0.0', ['scheduling', 'runahead limit'], ['scheduling', 'runahead factor'], - converter( lambda x:'2', 'using default runahead factor' )) u.obsolete( '6.0.0', ['scheduling', 'dependencies', '__MANY__', 'daemon'] ) u.upgrade() diff --git a/lib/cylc/config.py b/lib/cylc/config.py index e817e20f029..d96591ab0ab 100644 --- a/lib/cylc/config.py +++ b/lib/cylc/config.py @@ -19,7 +19,8 @@ import re, os, sys import taskdef from cylc.cfgspec.suite import get_suitecfg -from cylc.cycling.loader import (get_point, get_interval_cls, +from cylc.cycling.loader import (get_point, + get_interval, get_interval_cls, get_sequence, get_sequence_cls, init_cyclers, INTEGER_CYCLING_TYPE, get_backwards_compatibility_mode) @@ -42,6 +43,7 @@ checking, then construct task proxy objects and graph structures. """ +AUTO_RUNAHEAD_FACTOR = 2 CLOCK_OFFSET_RE = re.compile('(\w+)\s*\(\s*([-+]*\s*[\d.]+)\s*\)') TRIGGER_TYPES = [ 'submit', 'submit-fail', 'start', 'succeed', 'fail', 'finish' ] @@ -113,6 +115,7 @@ def __init__( self, suite, fpath, template_vars=[], self.sequences = [] self.actual_first_ctime = None + self.custom_runahead_limit = None self.runahead_limit = None # runtime hierarchy dicts keyed by namespace name: @@ -545,14 +548,11 @@ def print_inheritance(self): print ' ', ' ', item, val def compute_runahead_limit( self ): - rfactor = self.cfg['scheduling']['runahead factor'] - if not rfactor: - # no runahead limit! + self.custom_runahead_limit = get_interval( + self.cfg['scheduling']['runahead limit']) + if self.custom_runahead_limit is not None: + self.runahead_limit = self.custom_runahead_limit return - try: - rfactor = int( rfactor ) - except ValueError: - raise SuiteConfigError, "ERROR, illegal runahead limit: " + str(rfactor) rlim = None intervals = [] @@ -564,7 +564,7 @@ def compute_runahead_limit( self ): offsets.append( seq.get_offset() ) if intervals: - rlim = min( intervals ) * rfactor + rlim = min( intervals ) * AUTO_RUNAHEAD_FACTOR if offsets: min_offset = min( offsets ) if min_offset < get_interval_cls().get_null(): @@ -575,6 +575,7 @@ def compute_runahead_limit( self ): rlim = abs(min_offset) + rlim self.runahead_limit = rlim + if flags.verbose: print "Runahead limit:", self.runahead_limit diff --git a/lib/cylc/scheduler.py b/lib/cylc/scheduler.py index 6136a7b0652..3912f69524e 100644 --- a/lib/cylc/scheduler.py +++ b/lib/cylc/scheduler.py @@ -1004,7 +1004,7 @@ def update_state_summary( self ): self.pool.get_min_ctime(), self.pool.get_max_ctime(), self.paused(), self.will_pause_at(), self.do_shutdown is not None, - self.will_stop_at(), self.pool.runahead_limit, + self.will_stop_at(), self.pool.runahead_limit, self.config.ns_defn_order ) diff --git a/tests/cyclers/integer1/suite.rc b/tests/cyclers/integer1/suite.rc index eddef1d24f3..390ce67cf59 100644 --- a/tests/cyclers/integer1/suite.rc +++ b/tests/cyclers/integer1/suite.rc @@ -4,7 +4,7 @@ [scheduling] initial cycle point = 1 final cycle point = 16 - runahead factor = 4 + runahead limit = 12 cycling mode = integer [[special tasks]] sequential = seq diff --git a/tests/cylc-get-config/00-simple.t b/tests/cylc-get-config/00-simple.t index 1c56ab00126..4fdf6f1e3af 100644 --- a/tests/cylc-get-config/00-simple.t +++ b/tests/cylc-get-config/00-simple.t @@ -91,7 +91,6 @@ TEST_NAME=$TEST_NAME_BASE-section1 run_ok $TEST_NAME cylc get-config --item=[scheduling] $SUITE_NAME cmp_ok $TEST_NAME.stdout - <<__OUT__ cycling mode = integer -runahead factor = 2 initial cycle point = 1 final cycle point = 1 [[queues]] diff --git a/tests/periodicals/Monthly-reorder/suite.rc b/tests/periodicals/Monthly-reorder/suite.rc index fadc3204497..adf8e115f5d 100644 --- a/tests/periodicals/Monthly-reorder/suite.rc +++ b/tests/periodicals/Monthly-reorder/suite.rc @@ -6,7 +6,7 @@ [scheduling] initial cycle time = 20130130T00 final cycle time = 20130202T00 - runahead factor = 1 # enforce sequential running + runahead limit = PT12H # Enforces sequential behaviour [[dependencies]] # (this triggers a monthly task off the last daily task each month) [[[T00]]] diff --git a/tests/reload/runahead/suite.rc b/tests/reload/runahead/suite.rc index 0897d23b459..4205cc5a90b 100644 --- a/tests/reload/runahead/suite.rc +++ b/tests/reload/runahead/suite.rc @@ -4,7 +4,6 @@ timeout = 0.2 abort on timeout = True [scheduling] - runahead factor = 2 # marker initial cycle time = 2010-01-01 final cycle time = 2010-01-05 [[dependencies]] diff --git a/tests/restart/reload/suite.rc b/tests/restart/reload/suite.rc index 6b99e5603c1..9b497ba0bfc 100644 --- a/tests/restart/reload/suite.rc +++ b/tests/restart/reload/suite.rc @@ -12,7 +12,7 @@ which should run to completion on restarting.""" [scheduling] initial cycle time = 2010080800 final cycle time = 2010080900 - runahead factor = 1 + runahead limit = 12 [[dependencies]] [[[0]]] graph = "foo => bar" diff --git a/tests/runahead/no_final/suite.rc b/tests/runahead/no_final/suite.rc index cdd83e98edc..3d1291bce61 100644 --- a/tests/runahead/no_final/suite.rc +++ b/tests/runahead/no_final/suite.rc @@ -4,7 +4,7 @@ timeout = 0.1 abort on timeout = True [scheduling] - runahead factor = 3 + runahead limit = PT18H initial cycle time = 20100101T00 [[dependencies]] [[[PT6H]]] diff --git a/tests/runahead/runahead/suite.rc b/tests/runahead/runahead/suite.rc index 8915d3acee7..ef189d1462f 100644 --- a/tests/runahead/runahead/suite.rc +++ b/tests/runahead/runahead/suite.rc @@ -4,7 +4,7 @@ timeout = 0.1 abort on timeout = True [scheduling] - runahead factor = 3 + runahead limit = PT18H initial cycle time = 20100101T00 final cycle time = 20100105T00 [[dependencies]] From 3b8e15de116adbaf7571f259a0c01139930f0248 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Fri, 4 Jul 2014 15:20:26 +0100 Subject: [PATCH 02/13] #119: auto-detect minimum task repetition --- lib/cylc/config.py | 105 ++++++++++++++---- lib/cylc/cycling/iso8601.py | 2 +- lib/cylc/taskdef.py | 15 ++- ...ck-default.t => 01-check-default-simple.t} | 6 +- tests/runahead/02-check-default-complex.t | 46 ++++++++ tests/runahead/03-check-default-future.t | 48 ++++++++ ...2-no-final-cycle.t => 04-no-final-cycle.t} | 0 tests/runahead/default-complex/suite.rc | 24 ++++ tests/runahead/default-future/suite.rc | 23 ++++ .../{default => default-simple}/suite.rc | 7 +- 10 files changed, 239 insertions(+), 37 deletions(-) rename tests/runahead/{01-check-default.t => 01-check-default-simple.t} (93%) create mode 100644 tests/runahead/02-check-default-complex.t create mode 100644 tests/runahead/03-check-default-future.t rename tests/runahead/{02-no-final-cycle.t => 04-no-final-cycle.t} (100%) create mode 100644 tests/runahead/default-complex/suite.rc create mode 100644 tests/runahead/default-future/suite.rc rename tests/runahead/{default => default-simple}/suite.rc (59%) diff --git a/lib/cylc/config.py b/lib/cylc/config.py index d96591ab0ab..2dc06cfa609 100644 --- a/lib/cylc/config.py +++ b/lib/cylc/config.py @@ -43,8 +43,9 @@ checking, then construct task proxy objects and graph structures. """ -AUTO_RUNAHEAD_FACTOR = 2 +AUTO_RUNAHEAD_FACTOR = 2 # Factor to apply to the minimum cycling interval. CLOCK_OFFSET_RE = re.compile('(\w+)\s*\(\s*([-+]*\s*[\d.]+)\s*\)') +NUM_RUNAHEAD_SEQ_POINTS = 5 # Number of cycle points to look at per sequence. TRIGGER_TYPES = [ 'submit', 'submit-fail', 'start', 'succeed', 'fail', 'finish' ] try: @@ -554,17 +555,67 @@ def compute_runahead_limit( self ): self.runahead_limit = self.custom_runahead_limit return - rlim = None - intervals = [] - offsets = [] - for seq in self.sequences: - i = seq.get_interval() - if i: - intervals.append( i ) - offsets.append( seq.get_offset() ) + initial_point = get_point( + self.cfg['scheduling']['initial cycle point']) + + offsets = set([]) + seq_points = {} + point_tasks = {} - if intervals: - rlim = min( intervals ) * AUTO_RUNAHEAD_FACTOR + # Loop through all sequences and extract the first few points. + for seq in self.sequences: + seq_points[seq] = [] + seq_point_0 = seq.get_first_point(initial_point) + if seq_point_0 is None: + continue + seq_points[seq].append(seq_point_0) + point_tasks.setdefault(seq_point_0, set()) + iter_point = seq_point_0 + for i in range(NUM_RUNAHEAD_SEQ_POINTS - 1): + next_point = seq.get_next_point(iter_point) + if next_point is None: + break + seq_points[seq].append(next_point) + point_tasks.setdefault(next_point, set()) + iter_point = next_point + + # Loop through all tasks and log their sequences. + for name, taskdef in self.taskdefs.items(): + if taskdef.min_intercycle_offset is not None: + if taskdef.min_intercycle_offset: + offsets.add(taskdef.min_intercycle_offset) + for seq in taskdef.sequences: + for point in seq_points.get(seq, []): + point_tasks[point].add(name) + + points = sorted(point_tasks.keys()) + min_interval = None + + # Loop through all point pairs with common tasks to get the interval. + for i, point in enumerate(points): + tasks = point_tasks[point] + for other_point in points[i + 1:]: + other_tasks = point_tasks[other_point] + if point == other_point: + # Technically, hash-different points could compare equal. + continue + common_tasks = tasks.intersection(other_tasks) + if common_tasks: + # These points share the same task or tasks. + interval = abs(other_point - point) + if min_interval is None or interval < min_interval: + min_interval = interval + min_interval_task_example = common_tasks.pop() + min_interval_points = [point, other_point] + + if min_interval is None: + rlim = get_interval_cls().get_null() # Null interval. + description = "(null)" + else: + rlim = min_interval * AUTO_RUNAHEAD_FACTOR + description = "from %d * %s (min interval)" % ( + AUTO_RUNAHEAD_FACTOR, min_interval + ) if offsets: min_offset = min( offsets ) if min_offset < get_interval_cls().get_null(): @@ -573,11 +624,12 @@ def compute_runahead_limit( self ): #... that extend past the default rl # set to offsets plus one minimum interval rlim = abs(min_offset) + rlim + description += " + %s (future trigger)" % ( + abs(min_offset)) + if flags.verbose: + print "Runahead limit: %s: %s" % (rlim, description) self.runahead_limit = rlim - - if flags.verbose: - print "Runahead limit:", self.runahead_limit def get_runahead_limit( self ): # may be None (no cycling tasks) @@ -1282,6 +1334,8 @@ def generate_taskdefs( self, line, left_nodes, right, ttype, section, seq, offset_seq_map[str(offset)] = seq_offset self.taskdefs[name].add_sequence( seq_offset, is_implicit=True) + if seq_offset not in self.sequences: + self.sequences.append(seq_offset) # We don't handle implicit cycling in new-style cycling. else: self.taskdefs[ name ].add_sequence(seq) @@ -1313,20 +1367,23 @@ def generate_triggers( self, lexpression, left_nodes, right, seq, suicide ): # (GraphNodeError checked above) cycle_point = None lnode = graphnode(left, base_interval=base_interval) + ltaskdef = self.taskdefs[lnode.name] + if lnode.intercycle: - self.taskdefs[lnode.name].intercycle = True - if (self.taskdefs[lnode.name].intercycle_offset is None or ( - lnode.offset is not None and - lnode.offset > - self.taskdefs[lnode.name].intercycle_offset)): - self.taskdefs[lnode.name].intercycle_offset = lnode.offset + ltaskdef.intercycle = True + if (ltaskdef.max_intercycle_offset is None or + lnode.offset > ltaskdef.max_intercycle_offset): + ltaskdef.max_intercycle_offset = lnode.offset + if (ltaskdef.min_intercycle_offset is None or + lnode.offset < ltaskdef.min_intercycle_offset): + ltaskdef.min_intercycle_offset = lnode.offset + if lnode.offset_is_from_ict: last_point = seq.get_stop_point() - first_point = self.taskdefs[lnode.name].ict - lnode.offset + first_point = ltaskdef.ict - lnode.offset if first_point and last_point is not None: - self.taskdefs[lnode.name].intercycle_offset = (last_point - first_point) - else: - self.taskdefs[lnode.name].intercycle_offset = None + offset = (last_point - first_point) + ltaskdef.max_intercycle_offset = offset cycle_point = first_point trigger = self.set_trigger( lnode.name, right, lnode.output, lnode.offset, cycle_point, suicide, seq.get_interval() ) if not trigger: diff --git a/lib/cylc/cycling/iso8601.py b/lib/cylc/cycling/iso8601.py index 29d7d1539bd..9ac8063d559 100755 --- a/lib/cylc/cycling/iso8601.py +++ b/lib/cylc/cycling/iso8601.py @@ -381,7 +381,7 @@ def get_next_point_on_sequence(self, point): return result def get_first_point( self, point): - """Return the first point >= to poing, or None if out of bounds.""" + """Return the first point >= to point, or None if out of bounds.""" try: return ISO8601Point(self._cached_first_point_values[point.value]) except KeyError: diff --git a/lib/cylc/taskdef.py b/lib/cylc/taskdef.py index 87c5b868b84..e33e31d700d 100644 --- a/lib/cylc/taskdef.py +++ b/lib/cylc/taskdef.py @@ -63,7 +63,8 @@ def __init__( self, name, rtcfg, run_mode, ict ): # some defaults self.intercycle = False - self.intercycle_offset = get_interval_cls().get_null() + self.max_intercycle_offset = get_interval_cls().get_null() + self.min_intercycle_offset = get_interval_cls().get_null() self.sequential = False self.cycling = False self.modifiers = [] @@ -270,7 +271,7 @@ def tclass_init( sself, start_point, initial_state, stop_c_time=None, sself.startup = startup sself.submit_num = submit_num sself.exists=exists - sself.intercycle_offset = self.intercycle_offset + sself.max_intercycle_offset = self.max_intercycle_offset if self.cycling and startup: # adjust up to the first on-sequence cycle point @@ -282,10 +283,11 @@ def tclass_init( sself, start_point, initial_state, stop_c_time=None, adjusted.append( adj ) if adjusted: sself.tag = min( adjusted ) - if sself.intercycle_offset is None: + if sself.max_intercycle_offset is None: sself.cleanup_cutoff = None else: - sself.cleanup_cutoff = sself.tag + sself.intercycle_offset + sself.cleanup_cutoff = ( + sself.tag + sself.max_intercycle_offset) sself.id = TaskID.get( sself.name, str(sself.tag) ) else: sself.tag = None @@ -294,10 +296,11 @@ def tclass_init( sself, start_point, initial_state, stop_c_time=None, return else: sself.tag = start_point - if sself.intercycle_offset is None: + if sself.max_intercycle_offset is None: sself.cleanup_cutoff = None else: - sself.cleanup_cutoff = sself.tag + sself.intercycle_offset + sself.cleanup_cutoff = ( + sself.tag + sself.max_intercycle_offset) sself.id = TaskID.get( sself.name, str(sself.tag) ) sself.c_time = sself.tag diff --git a/tests/runahead/01-check-default.t b/tests/runahead/01-check-default-simple.t similarity index 93% rename from tests/runahead/01-check-default.t rename to tests/runahead/01-check-default-simple.t index 27571bdc943..98a428a62d3 100644 --- a/tests/runahead/01-check-default.t +++ b/tests/runahead/01-check-default-simple.t @@ -20,10 +20,10 @@ #------------------------------------------------------------------------------- set_test_number 4 #------------------------------------------------------------------------------- -install_suite $TEST_NAME_BASE default +install_suite $TEST_NAME_BASE default-simple #------------------------------------------------------------------------------- TEST_NAME=$TEST_NAME_BASE-validate -run_ok $TEST_NAME cylc validate $SUITE_NAME +run_ok $TEST_NAME cylc validate -v $SUITE_NAME #------------------------------------------------------------------------------- TEST_NAME=$TEST_NAME_BASE-run run_fail $TEST_NAME cylc run --debug $SUITE_NAME @@ -32,7 +32,7 @@ TEST_NAME=$TEST_NAME_BASE-check-fail DB=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db RUNAHEAD=$(sqlite3 $DB "select max(cycle) from task_states") # manual comparison for the test -if [[ "$RUNAHEAD" == "20100101T0600Z" ]]; then +if [[ "$RUNAHEAD" == "20100101T1200Z" ]]; then ok $TEST_NAME else fail $TEST_NAME diff --git a/tests/runahead/02-check-default-complex.t b/tests/runahead/02-check-default-complex.t new file mode 100644 index 00000000000..602adf6338b --- /dev/null +++ b/tests/runahead/02-check-default-complex.t @@ -0,0 +1,46 @@ +#!/bin/bash +#C: THIS FILE IS PART OF THE CYLC SUITE ENGINE. +#C: Copyright (C) 2008-2014 Hilary Oliver, NIWA +#C: +#C: This program is free software: you can redistribute it and/or modify +#C: it under the terms of the GNU General Public License as published by +#C: the Free Software Foundation, either version 3 of the License, or +#C: (at your option) any later version. +#C: +#C: This program is distributed in the hope that it will be useful, +#C: but WITHOUT ANY WARRANTY; without even the implied warranty of +#C: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#C: GNU General Public License for more details. +#C: +#C: You should have received a copy of the GNU General Public License +#C: along with this program. If not, see . +#------------------------------------------------------------------------------- +#C: Test default runahead limit behaviour is still the same +. $(dirname $0)/test_header +#------------------------------------------------------------------------------- +set_test_number 5 +#------------------------------------------------------------------------------- +install_suite $TEST_NAME_BASE default-complex +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-validate +run_ok $TEST_NAME cylc validate -v $SUITE_NAME +grep_ok "Runahead limit: PT4H" $TEST_NAME.stdout +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-run +run_fail $TEST_NAME cylc run --debug $SUITE_NAME +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-check-fail +DB=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db +RUNAHEAD=$(sqlite3 $DB "select max(cycle) from task_states") +# manual comparison for the test +if [[ "$RUNAHEAD" == "20100101T0400Z" ]]; then + ok $TEST_NAME +else + fail $TEST_NAME +fi +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-check-timeout +LOG=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/log/suite/log +run_ok $TEST_NAME grep 'Abort on suite timeout is set' $LOG +#------------------------------------------------------------------------------- +purge_suite $SUITE_NAME diff --git a/tests/runahead/03-check-default-future.t b/tests/runahead/03-check-default-future.t new file mode 100644 index 00000000000..68f4ea53640 --- /dev/null +++ b/tests/runahead/03-check-default-future.t @@ -0,0 +1,48 @@ +#!/bin/bash +#C: THIS FILE IS PART OF THE CYLC SUITE ENGINE. +#C: Copyright (C) 2008-2014 Hilary Oliver, NIWA +#C: +#C: This program is free software: you can redistribute it and/or modify +#C: it under the terms of the GNU General Public License as published by +#C: the Free Software Foundation, either version 3 of the License, or +#C: (at your option) any later version. +#C: +#C: This program is distributed in the hope that it will be useful, +#C: but WITHOUT ANY WARRANTY; without even the implied warranty of +#C: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#C: GNU General Public License for more details. +#C: +#C: You should have received a copy of the GNU General Public License +#C: along with this program. If not, see . +#------------------------------------------------------------------------------- +#C: Test default runahead limit behaviour is still the same +. $(dirname $0)/test_header +#------------------------------------------------------------------------------- +set_test_number 5 +#------------------------------------------------------------------------------- +install_suite $TEST_NAME_BASE default-future +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-validate +run_ok $TEST_NAME cylc validate -v $SUITE_NAME +grep_ok "Runahead limit: PT8H" $TEST_NAME.stdout +cat $TEST_NAME.stdout >/dev/tty +cat $TEST_NAME.stderr >/dev/tty +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-run +run_fail $TEST_NAME cylc run --debug $SUITE_NAME +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-check-fail +DB=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db +RUNAHEAD=$(sqlite3 $DB "select max(cycle) from task_states") +# manual comparison for the test +if [[ "$RUNAHEAD" == "20100101T0800Z" ]]; then + ok $TEST_NAME +else + fail $TEST_NAME +fi +#------------------------------------------------------------------------------- +TEST_NAME=$TEST_NAME_BASE-check-timeout +LOG=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/log/suite/log +run_ok $TEST_NAME grep 'Abort on suite timeout is set' $LOG +#------------------------------------------------------------------------------- +purge_suite $SUITE_NAME diff --git a/tests/runahead/02-no-final-cycle.t b/tests/runahead/04-no-final-cycle.t similarity index 100% rename from tests/runahead/02-no-final-cycle.t rename to tests/runahead/04-no-final-cycle.t diff --git a/tests/runahead/default-complex/suite.rc b/tests/runahead/default-complex/suite.rc new file mode 100644 index 00000000000..c1793219e77 --- /dev/null +++ b/tests/runahead/default-complex/suite.rc @@ -0,0 +1,24 @@ +[cylc] + UTC mode = True + [[event hooks]] + timeout = 0.1 + abort on timeout = True +[scheduling] + initial cycle point = 20100101T00 + final cycle point = 20100105T00 + [[dependencies]] + # T00, T07, T14, ... + [[[PT7H]]] + graph = "foo => bar" + # T00, T12, T18... 2 hours difference + [[[T00, T12, T18]]] + graph = "foo" + [[[T04]]] + graph = "run_ok" + [[[T05]]] + graph = "never_run" +[runtime] + [[foo]] + command scripting = false + [[bar]] + command scripting = true diff --git a/tests/runahead/default-future/suite.rc b/tests/runahead/default-future/suite.rc new file mode 100644 index 00000000000..396f4ddbf3d --- /dev/null +++ b/tests/runahead/default-future/suite.rc @@ -0,0 +1,23 @@ +[cylc] + UTC mode = True + [[event hooks]] + timeout = 0.1 + abort on timeout = True +[scheduling] + initial cycle point = 20100101T00 + final cycle point = 20100105T00 + [[dependencies]] + # Every hour, so would expect a 2 hour runahead limit. + [[[PT1H]]] + graph = "foo => bar" + # A 6 hour future trigger, => 2 + 6 = 8 hour runahead limit. + [[[T04/PT6H]]] + graph = """ + baz[+PT6H] => bar + baz + """ +[runtime] + [[foo]] + command scripting = false + [[bar]] + command scripting = true diff --git a/tests/runahead/default/suite.rc b/tests/runahead/default-simple/suite.rc similarity index 59% rename from tests/runahead/default/suite.rc rename to tests/runahead/default-simple/suite.rc index 0b1f05d85eb..f8af7de5c1d 100644 --- a/tests/runahead/default/suite.rc +++ b/tests/runahead/default-simple/suite.rc @@ -4,10 +4,11 @@ timeout = 0.1 abort on timeout = True [scheduling] - initial cycle time = 20100101T00 - final cycle time = 20100105T00 + initial cycle point = 20100101T00 + final cycle point = 20100105T00 [[dependencies]] - [[[PT3H]]] + # Intervals are all 24 hours, but we really have a 6 hour repetition. + [[[T00, T06, T12, T18]]] graph = "foo => bar" [runtime] [[foo]] From 828bf3b11cbebc01aeb52282ea06e3ff4d6b8304 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Fri, 4 Jul 2014 16:23:08 +0100 Subject: [PATCH 03/13] fix integer runahead limit for integer cycling --- doc/suiterc.tex | 7 ++++--- lib/cylc/cfgspec/suite.py | 3 ++- lib/cylc/config.py | 9 +++++++-- tests/restart/02-failed.t | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/doc/suiterc.tex b/doc/suiterc.tex index 23c1da5aecc..12843f08aeb 100644 --- a/doc/suiterc.tex +++ b/doc/suiterc.tex @@ -589,10 +589,11 @@ \subsection{[scheduling]} Section~\ref{RunaheadLimit}. Tasks exceeding the limit are put into a special runahead held state until slower tasks have caught up sufficiently. The default limit is computed as a factor (2) of the -suite's minimum cycling interval. +suite's minimum cycling interval, and is up-adjusted from there as +necessary for any longer-offset future triggering. \begin{myitemize} - \item {\em type:} ISO 8601 interval string e.g. \lstinline=PT12H= - for a 12 hour limit. + \item {\em type:} Cycle interval string e.g. \lstinline=PT12H= + for a 12 hour limit under ISO 8601 cycling. \item {\em default:} (twice the minimum cycling interval in the suite) \end{myitemize} diff --git a/lib/cylc/cfgspec/suite.py b/lib/cylc/cfgspec/suite.py index e1f9d30903e..285a9965911 100644 --- a/lib/cylc/cfgspec/suite.py +++ b/lib/cylc/cfgspec/suite.py @@ -29,12 +29,13 @@ "Define all legal items and values for cylc suite definition files." + def _coerce_cycleinterval( value, keys, args ): """Coerce value to a cycle interval.""" value = _strip_and_unquote( keys, value ) if value.isdigit(): # Old runahead limit format. - return "PT%dH" % int(value) + return value parser = TimeIntervalParser() try: parser.parse(value) diff --git a/lib/cylc/config.py b/lib/cylc/config.py index 2dc06cfa609..b0c6312d64d 100644 --- a/lib/cylc/config.py +++ b/lib/cylc/config.py @@ -23,6 +23,7 @@ get_interval, get_interval_cls, get_sequence, get_sequence_cls, init_cyclers, INTEGER_CYCLING_TYPE, + ISO8601_CYCLING_TYPE, get_backwards_compatibility_mode) from envvar import check_varnames, expandvars from copy import deepcopy, copy @@ -549,8 +550,12 @@ def print_inheritance(self): print ' ', ' ', item, val def compute_runahead_limit( self ): - self.custom_runahead_limit = get_interval( - self.cfg['scheduling']['runahead limit']) + limit = self.cfg['scheduling']['runahead limit'] + if (limit is not None and limit.isdigit() and + get_interval_cls().get_null().TYPE == ISO8601_CYCLING_TYPE): + # Backwards-compatibility for raw number of hours. + limit = "PT%sH" % limit + self.custom_runahead_limit = get_interval(limit) if self.custom_runahead_limit is not None: self.runahead_limit = self.custom_runahead_limit return diff --git a/tests/restart/02-failed.t b/tests/restart/02-failed.t index fd92fb36231..4414646ad74 100644 --- a/tests/restart/02-failed.t +++ b/tests/restart/02-failed.t @@ -35,7 +35,7 @@ fi export TEST_DIR #------------------------------------------------------------------------------- TEST_NAME=$TEST_NAME_BASE-validate -run_ok $TEST_NAME cylc validate $SUITE_NAME +run_ok $TEST_NAME cylc validate -v $SUITE_NAME cmp_ok "$TEST_NAME.stderr" Date: Tue, 8 Jul 2014 10:20:14 +0100 Subject: [PATCH 04/13] restore runahead behaviour in restart tests --- tests/restart/01-broadcast.t | 5 ----- tests/restart/02-failed.t | 5 ----- tests/restart/03-retrying.t | 9 --------- tests/restart/04-running.t | 5 ----- tests/restart/05-submit-failed.t | 4 +--- tests/restart/06-succeeded.t | 5 ----- tests/restart/07-waiting.t | 4 ---- tests/restart/failed/suite.rc | 4 ++-- 8 files changed, 3 insertions(+), 38 deletions(-) diff --git a/tests/restart/01-broadcast.t b/tests/restart/01-broadcast.t index 2909f098186..560abe610e0 100644 --- a/tests/restart/01-broadcast.t +++ b/tests/restart/01-broadcast.t @@ -94,11 +94,8 @@ grep_ok "send_a_broadcast_task|2013092300|1|1|succeeded" $TEST_DIR/states-db-pre cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' broadcast_task|2013092300|0|1|waiting force_restart|2013092300|1|1|succeeded -force_restart|2013092306|0|1|waiting output_states|2013092300|1|1|running -output_states|2013092306|0|1|waiting send_a_broadcast_task|2013092300|1|1|succeeded -send_a_broadcast_task|2013092306|0|1|waiting tidy|2013092300|0|1|waiting __DB_DUMP__ cmp_ok $TEST_DIR/state-pre-restart-2013092306 <<'__STATE__' @@ -151,7 +148,6 @@ output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting send_a_broadcast_task|2013092300|1|1|succeeded send_a_broadcast_task|2013092306|1|1|succeeded -send_a_broadcast_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ @@ -165,7 +161,6 @@ output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running send_a_broadcast_task|2013092300|1|1|succeeded send_a_broadcast_task|2013092306|1|1|succeeded -send_a_broadcast_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ diff --git a/tests/restart/02-failed.t b/tests/restart/02-failed.t index 4414646ad74..ca13b7c7304 100644 --- a/tests/restart/02-failed.t +++ b/tests/restart/02-failed.t @@ -78,11 +78,8 @@ __STATE__ grep_ok "failed_task|2013092300|1|1|failed" $TEST_DIR/states-db-pre-restart-2013092300 cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' failed_task|2013092300|1|1|failed -failed_task|2013092306|0|1|waiting force_restart|2013092300|1|1|succeeded -force_restart|2013092306|0|1|waiting output_states|2013092300|1|1|running -output_states|2013092306|0|1|waiting tidy|2013092300|0|1|waiting __DB_DUMP__ cmp_ok $TEST_DIR/state-pre-restart-2013092306 <<'__STATE__' @@ -102,7 +99,6 @@ __STATE__ cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' failed_task|2013092300|1|1|failed failed_task|2013092306|1|1|failed -failed_task|2013092312|0|1|held force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running output_states|2013092300|1|1|succeeded @@ -113,7 +109,6 @@ __DB_DUMP__ cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' failed_task|2013092300|1|1|failed failed_task|2013092306|1|1|failed -failed_task|2013092312|0|1|held force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded output_states|2013092300|1|1|succeeded diff --git a/tests/restart/03-retrying.t b/tests/restart/03-retrying.t index 61c3e0388f8..6d6a3fcf1ac 100644 --- a/tests/restart/03-retrying.t +++ b/tests/restart/03-retrying.t @@ -78,22 +78,15 @@ __STATE__ grep_ok "retrying_task|2013092300|1|2|retrying" $TEST_DIR/states-db-pre-restart-2013092300 cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded -force_restart|2013092306|0|1|waiting output_states|2013092300|1|1|running -output_states|2013092306|0|1|waiting retrying_task|2013092300|2|2|retrying -retrying_task|2013092306|0|1|waiting tidy|2013092300|0|1|waiting __DB_DUMP__ cmp_ok $TEST_DIR/states-db-tidy-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded -force_restart|2013092306|0|1|waiting output_states|2013092300|1|1|succeeded -output_states|2013092306|0|1|waiting retrying_task|2013092300|4|3|succeeded -retrying_task|2013092306|0|1|waiting tidy|2013092300|1|1|running -tidy|2013092306|0|1|waiting __DB_DUMP__ cmp_ok $TEST_DIR/state-pre-restart-2013092306 <<'__STATE__' run mode : live @@ -116,7 +109,6 @@ output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting retrying_task|2013092300|4|3|succeeded retrying_task|2013092306|1|2|retrying -retrying_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ @@ -127,7 +119,6 @@ output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running retrying_task|2013092300|4|3|succeeded retrying_task|2013092306|2|2|retrying -retrying_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ diff --git a/tests/restart/04-running.t b/tests/restart/04-running.t index 70366c48cfd..834b55417e8 100644 --- a/tests/restart/04-running.t +++ b/tests/restart/04-running.t @@ -78,11 +78,8 @@ __STATE__ grep_ok "running_task|2013092300|1|1|running" $TEST_DIR/states-db-pre-restart-2013092300 cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded -force_restart|2013092306|0|1|waiting output_states|2013092300|1|1|running -output_states|2013092306|0|1|waiting running_task|2013092300|1|1|running -running_task|2013092306|0|1|waiting tidy|2013092300|0|1|waiting __DB_DUMP__ cmp_ok $TEST_DIR/state-pre-restart-2013092306 <<'__STATE__' @@ -106,7 +103,6 @@ output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting running_task|2013092300|1|1|succeeded running_task|2013092306|1|1|running -running_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ @@ -118,7 +114,6 @@ output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running running_task|2013092300|1|1|succeeded running_task|2013092306|1|1|succeeded -running_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ diff --git a/tests/restart/05-submit-failed.t b/tests/restart/05-submit-failed.t index 08e18f6163f..79d437eb68b 100644 --- a/tests/restart/05-submit-failed.t +++ b/tests/restart/05-submit-failed.t @@ -39,7 +39,7 @@ run_ok $TEST_NAME cylc validate $SUITE_NAME cmp_ok "$TEST_NAME.stderr" Date: Tue, 8 Jul 2014 10:44:17 +0100 Subject: [PATCH 05/13] modify test suites for runahead limit --- tests/intercycle/complex_implicit/suite.rc | 1 + tests/reload/runahead/suite.rc | 3 ++- tests/special/sequential/suite.rc | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/intercycle/complex_implicit/suite.rc b/tests/intercycle/complex_implicit/suite.rc index 41f5b848543..dd81b11e3d8 100644 --- a/tests/intercycle/complex_implicit/suite.rc +++ b/tests/intercycle/complex_implicit/suite.rc @@ -1,6 +1,7 @@ [scheduling] initial cycle time = 2014010100 final cycle time = 2014010300 + runahead limit = 24 [[dependencies]] [[[0]]] graph = "foo[T-13] => bar" diff --git a/tests/reload/runahead/suite.rc b/tests/reload/runahead/suite.rc index 4205cc5a90b..959a0beb5a8 100644 --- a/tests/reload/runahead/suite.rc +++ b/tests/reload/runahead/suite.rc @@ -6,6 +6,7 @@ [scheduling] initial cycle time = 2010-01-01 final cycle time = 2010-01-05 + runahead limit = PT12H # marker [[dependencies]] [[[T00]]] graph = "foo:fail => reloader" @@ -19,7 +20,7 @@ [[reloader]] command scripting = """ # change the dependencies section name to garbage: -perl -pi -e 's/(runahead factor = )2( # marker)/\1 3\2/' $CYLC_SUITE_DEF_PATH/suite.rc +perl -pi -e 's/(runahead limit = )PT12H( # marker)/\1 PT18H\2/' $CYLC_SUITE_DEF_PATH/suite.rc # reload cylc reload -f $CYLC_SUITE_NAME """ diff --git a/tests/special/sequential/suite.rc b/tests/special/sequential/suite.rc index 786af5bc4ba..e5c8dbf1192 100644 --- a/tests/special/sequential/suite.rc +++ b/tests/special/sequential/suite.rc @@ -5,6 +5,7 @@ [scheduling] initial cycle time = 2010010100 final cycle time = 2010010118 + runahead limit = 18 [[special tasks]] sequential = "foo" [[dependencies]] From b962aecb419b122528ffc7fa73614b2c5ee4a59c Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Tue, 8 Jul 2014 14:02:49 +0100 Subject: [PATCH 06/13] restore runahead limit to get-config test --- tests/cylc-get-config/00-simple.t | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cylc-get-config/00-simple.t b/tests/cylc-get-config/00-simple.t index 4fdf6f1e3af..e22f985cd40 100644 --- a/tests/cylc-get-config/00-simple.t +++ b/tests/cylc-get-config/00-simple.t @@ -92,6 +92,7 @@ run_ok $TEST_NAME cylc get-config --item=[scheduling] $SUITE_NAME cmp_ok $TEST_NAME.stdout - <<__OUT__ cycling mode = integer initial cycle point = 1 +runahead limit = final cycle point = 1 [[queues]] [[[default]]] From 3830a579b6d39aa8c8bb8b119c6ad39bf15dd5b6 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Wed, 9 Jul 2014 16:33:24 +0100 Subject: [PATCH 07/13] add new test function to test line-by-line membership --- bin/cylc-test-battery | 4 ++++ tests/lib/bash/test_header | 38 +++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/bin/cylc-test-battery b/bin/cylc-test-battery index ee5d47ad0b7..fb54b893d92 100755 --- a/bin/cylc-test-battery +++ b/bin/cylc-test-battery @@ -46,6 +46,10 @@ machines this may result in some test failures due to timeouts intended to catch problems that can prevent a suite from shutting down normally. Use the "-j N" option to change the amount of concurrency. +To output stderr from failed tests to the terminal, export +CYLC_TEST_DEBUG=true before running this command. To perform comparison +tests using `xxdiff -D`, export CYLC_TEST_DEBUG_CMP=true. + For more information see "Reference Tests" in the User Guide. Options: diff --git a/tests/lib/bash/test_header b/tests/lib/bash/test_header index c6f6b65310f..819861c300f 100644 --- a/tests/lib/bash/test_header +++ b/tests/lib/bash/test_header @@ -31,7 +31,8 @@ # ok TEST_NAME # echo a TAP OK message for TEST_NAME. # fail TEST_NAME -# echo a TAP fail message for TEST_NAME. +# echo a TAP fail message for TEST_NAME. If $CYLC_TEST_DEBUG is set, +# cat $TEST_NAME.stderr to stderr. # run_ok TEST_NAME COMMAND ... # Run COMMAND with any following options/arguments and store stdout # and stderr in TEST_NAME.stdout and TEST_NAME.stderr. @@ -42,8 +43,11 @@ # This is expected to have a non-zero return code, which ok's the test. # cmp_ok FILE_TEST [FILE_CONTROL] # Compare FILE_TEST with a file or stdin given by FILE_CONTROL -# (stdin if FILE_CONTROL is "-" or missing). If $TEST_DEBUG_CMP +# (stdin if FILE_CONTROL is "-" or missing). If $CYLC_TEST_DEBUG_CMP # is set, show differences using xxdiff. +# contains_ok FILE_TEST [FILE_CONTROL] +# Make sure that each line in FILE_TEST is present in FILE_CONTROL +# (stdin if FILE_CONTROL is "-" or missing). # grep_ok PATTERN FILE # Run "grep -q PATTERN FILE". # exists_ok FILE @@ -96,6 +100,10 @@ function ok() { function fail() { ((++FAILURES)) echo "not ok $((++TEST_NUMBER)) - $@" + if [[ -n ${CYLC_TEST_DEBUG:-} ]]; then + ls -l $TEST_NAME.stderr >/dev/tty + cat $TEST_NAME.stderr >/dev/tty + fi } function run_ok() { @@ -157,21 +165,36 @@ function cmp_ok() { local FILE_TEST=$1 local FILE_CONTROL=${2:--} local TEST_NAME=$(basename $FILE_TEST)-cmp-ok - local CMP_COMMAND="cmp" - if [[ -n ${TEST_DEBUG_CMP:-} ]]; then + local CMP_COMMAND="diff" + if [[ -n ${CYLC_TEST_DEBUG_CMP:-} ]]; then CMP_COMMAND="xxdiff -D" fi if $CMP_COMMAND "$FILE_TEST" "$FILE_CONTROL" \ - 1>$TEST_NAME.stdout 2>$TEST_NAME.stderr; then + 1>$TEST_NAME.stderr 2>&1; then ok $TEST_NAME return fi - diff "$FILE_TEST" "$FILE_CONTROL" 2>>$TEST_NAME.stderr mkdir -p $TEST_LOG_DIR cp $TEST_NAME.stderr $TEST_LOG_DIR/$TEST_NAME.stderr fail $TEST_NAME } +function contains_ok() { + local FILE_TEST=$1 + local FILE_CONTROL=${2:--} + local TEST_NAME=$(basename $FILE_TEST)-contains-ok + comm -13 <(sort "$FILE_TEST") <(sort "$FILE_CONTROL") \ + 1>$TEST_NAME.stdout 2>$TEST_NAME.stderr + if [[ -s $TEST_NAME.stdout ]]; then + echo "Missing lines:" >>$TEST_NAME.stderr + cat $TEST_NAME.stdout >>$TEST_NAME.stderr + cp $TEST_NAME.stderr $TEST_LOG_DIR/$TEST_NAME.stderr + fail $TEST_NAME + return + fi + ok $TEST_NAME +} + function grep_ok() { local BRE=$1 local FILE=$2 @@ -281,7 +304,8 @@ TEST_NAME_BASE=$(basename $0 .t) TEST_SOURCE_DIR=$(cd $(dirname $0) && pwd) TEST_DIR=$(mktemp -d) cd $TEST_DIR -TEST_LOG_DIR=${TMPDIR:-/tmp}/cylc-tests-"$LOGNAME"/$(basename $TEST_SOURCE_DIR) +TEST_LOG_DIR_BASE=${TMPDIR:-/tmp}/cylc-tests-"$LOGNAME"/$(basename $TEST_SOURCE_DIR) +TEST_LOG_DIR=$TEST_LOG_DIR_BASE/$TEST_NAME_BASE-$(date -u +%Y%m%dT%H%M%SZ)/ SUITE_RUN_FAILS=false # these variables should be moved to site/user config: From 52ab2583f4c5c59d0494e1fac55381be92ff4cf2 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Wed, 9 Jul 2014 16:33:59 +0100 Subject: [PATCH 08/13] modify restart tests to use new contains_ok function --- tests/queues/01-queuesize-5.t | 1 + tests/queues/qsize/suite.rc | 3 ++- tests/restart/01-broadcast.t | 16 ++++------------ tests/restart/02-failed.t | 14 ++++---------- tests/restart/03-retrying.t | 17 +++++------------ tests/restart/04-running.t | 14 ++++---------- tests/restart/05-submit-failed.t | 14 ++++---------- tests/restart/06-succeeded.t | 14 ++++---------- tests/restart/07-waiting.t | 13 ++++--------- 9 files changed, 32 insertions(+), 74 deletions(-) diff --git a/tests/queues/01-queuesize-5.t b/tests/queues/01-queuesize-5.t index 268da3d3561..4b90b6aa4b6 100644 --- a/tests/queues/01-queuesize-5.t +++ b/tests/queues/01-queuesize-5.t @@ -26,6 +26,7 @@ TEST_NAME=$TEST_NAME_BASE-validate run_ok $TEST_NAME cylc validate -s q_size=5 $SUITE_NAME #------------------------------------------------------------------------------- TEST_NAME=$TEST_NAME_BASE-run +cylc gui $SUITE_NAME & suite_run_ok $TEST_NAME cylc run --reference-test --debug -s q_size=5 $SUITE_NAME #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/queues/qsize/suite.rc b/tests/queues/qsize/suite.rc index 2bcc4acaac5..81282bb1a03 100644 --- a/tests/queues/qsize/suite.rc +++ b/tests/queues/qsize/suite.rc @@ -30,7 +30,8 @@ fi sleep 1 done - if ((TASKS > 3)); then + if ((TASKS > {{q_size}})); then + echo "[FAIL] more than {{q_size}} tasks running: $TASKS" >&2 false else true diff --git a/tests/restart/01-broadcast.t b/tests/restart/01-broadcast.t index 724cba87541..01168509833 100644 --- a/tests/restart/01-broadcast.t +++ b/tests/restart/01-broadcast.t @@ -91,7 +91,7 @@ tidy.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "broadcast_task|2013092300|0|1|waiting" $TEST_DIR/states-db-pre-restart-2013092300 grep_ok "send_a_broadcast_task|2013092300|1|1|succeeded" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' broadcast_task|2013092300|0|1|waiting force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running @@ -140,12 +140,11 @@ send_a_broadcast_task.2013092312 : status=held, spawned=false tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' broadcast_task|2013092300|1|1|succeeded broadcast_task|2013092306|0|1|waiting force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting send_a_broadcast_task|2013092300|1|1|succeeded @@ -154,15 +153,13 @@ tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' broadcast_task|2013092300|1|1|succeeded broadcast_task|2013092306|0|1|waiting force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held send_a_broadcast_task|2013092300|1|1|succeeded send_a_broadcast_task|2013092306|1|1|succeeded tidy|2013092300|1|1|succeeded @@ -199,22 +196,17 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' broadcast_task|2013092300|1|1|succeeded broadcast_task|2013092306|1|1|succeeded -broadcast_task|2013092312|0|1|held force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held send_a_broadcast_task|2013092300|1|1|succeeded send_a_broadcast_task|2013092306|1|1|succeeded -send_a_broadcast_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/restart/02-failed.t b/tests/restart/02-failed.t index c0e481ed011..203f69a026c 100644 --- a/tests/restart/02-failed.t +++ b/tests/restart/02-failed.t @@ -76,7 +76,7 @@ output_states.2013092300 : status=waiting, spawned=false tidy.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "failed_task|2013092300|1|1|failed" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' failed_task|2013092300|1|1|failed force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running @@ -97,7 +97,7 @@ output_states.2013092306 : status=waiting, spawned=false tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' failed_task|2013092300|1|1|failed failed_task|2013092306|1|1|failed force_restart|2013092300|1|1|succeeded @@ -107,15 +107,13 @@ output_states|2013092306|0|1|waiting tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' failed_task|2013092300|1|1|failed failed_task|2013092306|1|1|failed force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ @@ -136,19 +134,15 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' failed_task|2013092300|1|1|failed failed_task|2013092306|1|1|failed -failed_task|2013092312|0|1|held force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/restart/03-retrying.t b/tests/restart/03-retrying.t index f4732e39eca..79d20f71c24 100644 --- a/tests/restart/03-retrying.t +++ b/tests/restart/03-retrying.t @@ -76,13 +76,13 @@ retrying_task.2013092306 : status=waiting, spawned=false tidy.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "retrying_task|2013092300|1|2|retrying" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running retrying_task|2013092300|2|2|retrying tidy|2013092300|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-tidy-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-tidy-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|succeeded retrying_task|2013092300|4|3|succeeded @@ -103,10 +103,9 @@ retrying_task.2013092312 : status=held, spawned=false tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting retrying_task|2013092300|4|3|succeeded @@ -114,13 +113,11 @@ retrying_task|2013092306|1|2|retrying tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held retrying_task|2013092300|4|3|succeeded retrying_task|2013092306|2|2|retrying tidy|2013092300|1|1|succeeded @@ -143,19 +140,15 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held retrying_task|2013092300|4|3|succeeded retrying_task|2013092306|4|3|succeeded -retrying_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/restart/04-running.t b/tests/restart/04-running.t index 5d1ef9fbd49..ab78d86a92a 100644 --- a/tests/restart/04-running.t +++ b/tests/restart/04-running.t @@ -76,7 +76,7 @@ running_task.2013092306 : status=waiting, spawned=false tidy.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "running_task|2013092300|1|1|running" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running running_task|2013092300|1|1|running @@ -97,7 +97,7 @@ running_task.2013092312 : status=held, spawned=false tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running output_states|2013092300|1|1|succeeded @@ -108,13 +108,11 @@ tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held running_task|2013092300|1|1|succeeded running_task|2013092306|1|1|succeeded tidy|2013092300|1|1|succeeded @@ -137,19 +135,15 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held running_task|2013092300|1|1|succeeded running_task|2013092306|1|1|succeeded -running_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/restart/05-submit-failed.t b/tests/restart/05-submit-failed.t index a488ac2eee1..65317935338 100644 --- a/tests/restart/05-submit-failed.t +++ b/tests/restart/05-submit-failed.t @@ -75,7 +75,7 @@ submit_fail_task.2013092300 : status=submit-failed, spawned=false tidy.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "submit_fail_task|2013092300|1|1|submit-failed" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running submit_fail_task|2013092300|1|1|submit-failed @@ -95,7 +95,7 @@ submit_fail_task.2013092306 : status=submit-failed, spawned=false tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running output_states|2013092300|1|1|succeeded @@ -106,13 +106,11 @@ tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held submit_fail_task|2013092300|1|1|submit-failed submit_fail_task|2013092306|1|1|submit-failed tidy|2013092300|1|1|succeeded @@ -135,19 +133,15 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held submit_fail_task|2013092300|1|1|submit-failed submit_fail_task|2013092306|1|1|submit-failed -submit_fail_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/restart/06-succeeded.t b/tests/restart/06-succeeded.t index 9355acbea18..2dd99479006 100644 --- a/tests/restart/06-succeeded.t +++ b/tests/restart/06-succeeded.t @@ -76,7 +76,7 @@ succeed_task.2013092306 : status=waiting, spawned=false tidy.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "succeed_task|2013092300|1|1|succeeded" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running succeed_task|2013092300|1|1|succeeded @@ -97,7 +97,7 @@ succeed_task.2013092312 : status=held, spawned=false tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running output_states|2013092300|1|1|succeeded @@ -108,13 +108,11 @@ tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held succeed_task|2013092300|1|1|succeeded succeed_task|2013092306|1|1|succeeded tidy|2013092300|1|1|succeeded @@ -137,19 +135,15 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held succeed_task|2013092300|1|1|succeeded succeed_task|2013092306|1|1|succeeded -succeed_task|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME diff --git a/tests/restart/07-waiting.t b/tests/restart/07-waiting.t index 6fcfc6ed1d6..3f66109ddb4 100644 --- a/tests/restart/07-waiting.t +++ b/tests/restart/07-waiting.t @@ -75,7 +75,7 @@ tidy.2013092300 : status=waiting, spawned=false waiting_task.2013092300 : status=waiting, spawned=false __STATE__ grep_ok "waiting_task|2013092300|0|1|waiting" $TEST_DIR/states-db-pre-restart-2013092300 -cmp_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded output_states|2013092300|1|1|running tidy|2013092300|0|1|waiting @@ -95,7 +95,7 @@ tidy.2013092300 : status=succeeded, spawned=true tidy.2013092306 : status=waiting, spawned=false waiting_task.2013092306 : status=waiting, spawned=false __STATE__ -cmp_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running output_states|2013092300|1|1|succeeded @@ -105,12 +105,11 @@ tidy|2013092306|0|1|waiting waiting_task|2013092300|1|1|succeeded waiting_task|2013092306|0|1|waiting __DB_DUMP__ -cmp_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|running -output_states|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|0|1|waiting waiting_task|2013092300|1|1|succeeded @@ -133,19 +132,15 @@ sqlite3 $(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db \ "select name, cycle, submit_num, try_num, status from task_states order by name, cycle;" > $TEST_DIR/states-db -cmp_ok $TEST_DIR/states-db <<'__DB_DUMP__' +contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|1|1|succeeded -output_states|2013092312|0|1|held tidy|2013092300|1|1|succeeded tidy|2013092306|1|1|succeeded -tidy|2013092312|0|1|held waiting_task|2013092300|1|1|succeeded waiting_task|2013092306|1|1|succeeded -waiting_task|2013092312|0|1|held __DB_DUMP__ #------------------------------------------------------------------------------- purge_suite $SUITE_NAME From 06b3150d4cc9e46f2fdbe86db003992e9235fe14 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Mon, 14 Jul 2014 14:55:46 +0100 Subject: [PATCH 09/13] #983: first draft of runahead-by-num-cycle-points --- doc/suiterc.tex | 42 ++++++++-- lib/cylc/cfgspec/suite.py | 3 +- lib/cylc/config.py | 125 ++++++++++------------------- lib/cylc/cycling/__init__.py | 1 - lib/cylc/cycling/integer.py | 4 +- lib/cylc/cycling/iso8601.py | 1 - lib/cylc/task_pool.py | 147 +++++++++++++++++++++++++---------- 7 files changed, 187 insertions(+), 136 deletions(-) diff --git a/doc/suiterc.tex b/doc/suiterc.tex index 12843f08aeb..3f6b75372e2 100644 --- a/doc/suiterc.tex +++ b/doc/suiterc.tex @@ -583,20 +583,52 @@ \subsection{[scheduling]} \subsubsection[runahead limit]{[scheduling] $\rightarrow$ runahead limit} +\label{runahead limit} -The suite runahead limit prevents the fastest tasks in a suite from +Runahead limiting prevents the fastest tasks in a suite from getting too far ahead of the slowest ones, as documented in Section~\ref{RunaheadLimit}. Tasks exceeding the limit are put into a special runahead held state until slower tasks have caught up -sufficiently. The default limit is computed as a factor (2) of the -suite's minimum cycling interval, and is up-adjusted from there as -necessary for any longer-offset future triggering. +sufficiently. + +The default behaviour is to limit the number of tasks +by only allowing up to \lstinline=N= (default 3) active cycle points at +any time, up-adjusted if necessary for any future triggering. + +\lstinline=runahead limit= is deprecated in favour of specifying this +maximum number of active cycle points (\ref{max active cycle points}). + \begin{myitemize} \item {\em type:} Cycle interval string e.g. \lstinline=PT12H= for a 12 hour limit under ISO 8601 cycling. - \item {\em default:} (twice the minimum cycling interval in the suite) + \item {\em default:} (none) +\end{myitemize} + + +\subsubsection[max active cycle points]{[scheduling] $\rightarrow$ + max active cycle points} +\label{max active cycle points} + +Runahead limiting prevents the fastest tasks in a suite from +getting too far ahead of the slowest ones, as documented in +Section~\ref{RunaheadLimit}. Tasks exceeding the limit are put into +a special runahead held state until slower tasks have caught up +sufficiently. + +The default behaviour is to limit the number of tasks +by only allowing up to \lstinline=N= (default 3) active cycle points at +any time, up-adjusted if necessary for any future triggering. + +This input allows you to change \lstinline=N=, the maximum +number of cycle points that can be active at once. This supersedes +\ref{runahead limit}. + +\begin{myitemize} + \item {\em type:} integer + \item {\em default:} 3 \end{myitemize} + \subsubsection[{[[}queues{]]}]{[scheduling] $\rightarrow$ [[queues]]} Configuration of internal queues, by which the number of simultaneously diff --git a/lib/cylc/cfgspec/suite.py b/lib/cylc/cfgspec/suite.py index 285a9965911..9ce9174ed7f 100644 --- a/lib/cylc/cfgspec/suite.py +++ b/lib/cylc/cfgspec/suite.py @@ -183,7 +183,8 @@ def _coerce_cycletime_time_zone( value, keys, args ): 'initial cycle point' : vdr(vtype='cycletime'), 'final cycle point' : vdr(vtype='cycletime'), 'cycling mode' : vdr(vtype='string', default="gregorian", options=["360day","gregorian","integer"] ), - 'runahead limit' : vdr(vtype='cycleinterval' ), + 'runahead limit' : vdr(vtype='cycleinterval' ), + 'max active cycle points' : vdr(vtype='integer', default=3), 'queues' : { 'default' : { 'limit' : vdr( vtype='integer', default=0), diff --git a/lib/cylc/config.py b/lib/cylc/config.py index b0c6312d64d..0ccd59b0e5f 100644 --- a/lib/cylc/config.py +++ b/lib/cylc/config.py @@ -118,7 +118,7 @@ def __init__( self, suite, fpath, template_vars=[], self.actual_first_ctime = None self.custom_runahead_limit = None - self.runahead_limit = None + self.max_num_active_cycle_points = None # runtime hierarchy dicts keyed by namespace name: self.runtime = { @@ -286,7 +286,7 @@ def __init__( self, suite, fpath, template_vars=[], if not self.graph_found: raise SuiteConfigError, 'No suite dependency graph defined.' - self.compute_runahead_limit() + self.compute_runahead_limits() self.configure_queues() @@ -309,8 +309,10 @@ def __init__( self, suite, fpath, template_vars=[], self.cfg['visualization']['initial cycle point'] = vict vict_rh = None - if vict and self.runahead_limit: - vict_rh = str( get_point( vict ) + self.runahead_limit ) + v_runahead_limit = ( + self.custom_runahead_limit or self.minimum_runahead_limit) + if vict and v_runahead_limit: + vict_rh = str( get_point( vict ) + v_runahead_limit ) vfct = self.cfg['visualization']['final cycle point'] or vict_rh or vict self.cfg['visualization']['final cycle point'] = vfct @@ -549,96 +551,55 @@ def print_inheritance(self): for item, val in self.runtime[foo].items(): print ' ', ' ', item, val - def compute_runahead_limit( self ): + def compute_runahead_limits( self ): + """Extract the custom and the minimum runahead limits.""" + + self.max_num_active_cycle_points = self.cfg['scheduling'][ + 'max active cycle points'] + limit = self.cfg['scheduling']['runahead limit'] if (limit is not None and limit.isdigit() and get_interval_cls().get_null().TYPE == ISO8601_CYCLING_TYPE): # Backwards-compatibility for raw number of hours. limit = "PT%sH" % limit - self.custom_runahead_limit = get_interval(limit) - if self.custom_runahead_limit is not None: - self.runahead_limit = self.custom_runahead_limit - return - initial_point = get_point( - self.cfg['scheduling']['initial cycle point']) - - offsets = set([]) - seq_points = {} - point_tasks = {} + # The custom runahead limit is None if not user-configured. + self.custom_runahead_limit = get_interval(limit) - # Loop through all sequences and extract the first few points. - for seq in self.sequences: - seq_points[seq] = [] - seq_point_0 = seq.get_first_point(initial_point) - if seq_point_0 is None: - continue - seq_points[seq].append(seq_point_0) - point_tasks.setdefault(seq_point_0, set()) - iter_point = seq_point_0 - for i in range(NUM_RUNAHEAD_SEQ_POINTS - 1): - next_point = seq.get_next_point(iter_point) - if next_point is None: - break - seq_points[seq].append(next_point) - point_tasks.setdefault(next_point, set()) - iter_point = next_point + # Find the minimum runahead limit necessary for any future triggers. + self.minimum_runahead_limit = None - # Loop through all tasks and log their sequences. + offsets = set() for name, taskdef in self.taskdefs.items(): - if taskdef.min_intercycle_offset is not None: - if taskdef.min_intercycle_offset: - offsets.add(taskdef.min_intercycle_offset) - for seq in taskdef.sequences: - for point in seq_points.get(seq, []): - point_tasks[point].add(name) - - points = sorted(point_tasks.keys()) - min_interval = None - - # Loop through all point pairs with common tasks to get the interval. - for i, point in enumerate(points): - tasks = point_tasks[point] - for other_point in points[i + 1:]: - other_tasks = point_tasks[other_point] - if point == other_point: - # Technically, hash-different points could compare equal. - continue - common_tasks = tasks.intersection(other_tasks) - if common_tasks: - # These points share the same task or tasks. - interval = abs(other_point - point) - if min_interval is None or interval < min_interval: - min_interval = interval - min_interval_task_example = common_tasks.pop() - min_interval_points = [point, other_point] - - if min_interval is None: - rlim = get_interval_cls().get_null() # Null interval. - description = "(null)" - else: - rlim = min_interval * AUTO_RUNAHEAD_FACTOR - description = "from %d * %s (min interval)" % ( - AUTO_RUNAHEAD_FACTOR, min_interval - ) + if taskdef.min_intercycle_offset: + offsets.add(taskdef.min_intercycle_offset) + if offsets: - min_offset = min( offsets ) - if min_offset < get_interval_cls().get_null(): - # future triggers... - if abs(min_offset) >= rlim: - #... that extend past the default rl - # set to offsets plus one minimum interval - rlim = abs(min_offset) + rlim - description += " + %s (future trigger)" % ( - abs(min_offset)) - if flags.verbose: - print "Runahead limit: %s: %s" % (rlim, description) + min_offset = min(offsets) + if min_offset < get_interval_cls().get_null(): + # A negative offset comes from future triggering. + self.minimum_runahead_limit = abs(min_offset) + if (self.custom_runahead_limit is not None and + self.custom_runahead_limit < + self.minimum_runahead_limit): + print >> sys.stderr, ( + ' WARNING, custom runahead limit of %s is less than ' + 'future triggering offset %s: suite may stall.' % + (self.custom_runahead_limit, + self.minimum_runahead_limit) + ) + + def get_custom_runahead_limit( self ): + """Return the custom runahead limit (may be None).""" + return self.custom_runahead_limit - self.runahead_limit = rlim + def get_max_num_active_cycle_points( self ): + """Return the maximum allowed number of pool cycle points.""" + return self.max_num_active_cycle_points - def get_runahead_limit( self ): - # may be None (no cycling tasks) - return self.runahead_limit + def get_minimum_runahead_limit( self ): + """Return the minimum runahead limit to apply.""" + return self.minimum_runahead_limit def get_config( self, args, sparse=False ): return self.pcfg.get( args, sparse ) diff --git a/lib/cylc/cycling/__init__.py b/lib/cylc/cycling/__init__.py index a16766290f6..9b84cab2113 100644 --- a/lib/cylc/cycling/__init__.py +++ b/lib/cylc/cycling/__init__.py @@ -117,7 +117,6 @@ def __abs__( self ): raise NotImplementedError() def __mul__( self, m ): - # the suite runahead limit is a multiple of the smallest sequence interval raise NotImplementedError() def __nonzero__(self): diff --git a/lib/cylc/cycling/integer.py b/lib/cylc/cycling/integer.py index b61d5dd5ea4..139c9868098 100755 --- a/lib/cylc/cycling/integer.py +++ b/lib/cylc/cycling/integer.py @@ -44,8 +44,7 @@ # absolute, or relative to some context, so a special character 'c' # is used to signify that context is required. '?' can be used for # the period in one-off (no-repeat) expressions, otherwise an arbitrary -# given value will be ignored (an arbitrary interval is not stored as -# it may affect the default runahead limit calculation). +# given value will be ignored. # # 1) REPEAT/START/PERIOD: R[n]/[c]i/Pi # missing n means repeat indefinitely @@ -131,7 +130,6 @@ def __int__(self): return int(self.value.replace("P", "")) def __mul__(self, m): - # the suite runahead limit is a multiple of the smallest sequence interval return IntegerInterval(int(self) * m) def __nonzero__(self): diff --git a/lib/cylc/cycling/iso8601.py b/lib/cylc/cycling/iso8601.py index 9ac8063d559..52d12950981 100755 --- a/lib/cylc/cycling/iso8601.py +++ b/lib/cylc/cycling/iso8601.py @@ -189,7 +189,6 @@ def __abs__(self): self._iso_interval_abs(self.value, self.NULL_INTERVAL_STRING)) def __mul__(self, m): - # the suite runahead limit is a multiple of the smallest sequence interval return ISO8601Interval(self._iso_interval_mul(self.value, m)) def __nonzero__(self): diff --git a/lib/cylc/task_pool.py b/lib/cylc/task_pool.py index 699bd07fe31..3b9e39798ce 100644 --- a/lib/cylc/task_pool.py +++ b/lib/cylc/task_pool.py @@ -59,9 +59,13 @@ def __init__( self, suite, db, stop_point, config, pyro, log, run_mode ): self.reconfiguring = False self.db = db - self.runahead_limit = config.get_runahead_limit() + self.custom_runahead_limit = config.get_custom_runahead_limit() + self.minimum_runahead_limit = config.get_minimum_runahead_limit() + self.max_num_active_cycle_points = ( + config.get_max_num_active_cycle_points()) self.config = config + self.pool = {} self.runahead_pool = {} self.myq = {} self.queues = {} @@ -139,7 +143,8 @@ def add_to_runahead_pool( self, itask ): itask.reset_state_held() # add to the runahead pool - self.runahead_pool[itask.id] = itask + self.runahead_pool.setdefault(itask.c_time, {}) + self.runahead_pool[itask.c_time][itask.id] = itask self.rhpool_changed = True return True @@ -157,42 +162,76 @@ def release_runahead_tasks( self ): if not self.runahead_pool: return - runahead_base = None - for itask in self.get_tasks(all=True): - if itask.state.is_currently('failed', 'succeeded'): - continue - if not runahead_base or itask.c_time < runahead_base: - runahead_base = itask.c_time - - # release tasks below the limit - if runahead_base: - for itask in self.runahead_pool.values(): - if not self.runahead_limit or \ - itask.c_time - self.runahead_limit <= runahead_base: - # release task to the appropriate queue - # (no runahead limit implies R1 tasks only) - queue = self.myq[itask.name] - if queue not in self.queues: - self.queues[queue] = {} - self.queues[queue][itask.id] = itask - self.pool_changed = True - flags.pflag = True - itask.log('DEBUG', "released to the task pool" ) - del self.runahead_pool[itask.id] - self.rhpool_changed = True - try: - self.pyro.connect( itask.message_queue, itask.id ) - except Exception, x: - if flags.debug: - raise - print >> sys.stderr, x - self.log.warning( itask.id + ' cannot be added (use --debug and see stderr)' ) - return False + points = set() + for point, itasks in self.get_tasks_by_point(all=True): + has_ok_itasks = False + for itask in itasks: + if not itask.state.is_currently('failed', 'succeeded'): + has_ok_itasks = True + break + if has_ok_itasks: + points.add(point) + + if not points: + return + + runahead_base_point = min(points) + + if self.custom_runahead_limit is None: + # Calculate which tasks to release based on a maximum number of + # active cycle points (active meaning non-finished tasks). + limit = self.max_num_active_cycle_points + latest_allowed_point = sorted(points)[:limit][-1] + if self.minimum_runahead_limit is not None: + latest_allowed_point = max([ + latest_allowed_point, + runahead_base_point + self.minimum_runahead_limit + ]) + else: + # Calculate which tasks to release based on a maximum duration + # measured from the oldest non-finished task. + latest_allowed_point = ( + runahead_base_point + self.custom_runahead_limit) + + for point, itask_id_map in self.runahead_pool.values(): + if point <= latest_allowed_point: + for itask in itask_id_map.values(): + self.release_runahead_task(itask) + + def release_runahead_task(itask): + """Release itask to the appropriate queue in the active pool.""" + queue = self.myq[itask.name] + if queue not in self.queues: + self.queues[queue] = {} + self.queues[queue][itask.id] = itask + self.pool.setdefault(itask.c_time, {}) + self.pool[itask.c_time][itask.id] = itask + self.pool_changed = True + flags.pflag = True + itask.log('DEBUG', "released to the task pool" ) + del self.runahead_pool[itask.c_time][itask.id] + if not self.runahead_pool[itask.c_time]: + self.runahead_pool.pop(itask.c_time) + self.rhpool_changed = True + try: + self.pyro.connect( itask.message_queue, itask.id ) + except Exception, x: + if flags.debug: + raise + print >> sys.stderr, x + self.log.warning( + '%s cannot be added (use --debug and see stderr)' % itask.id) + return False def remove( self, itask, reason=None ): - if itask.id in self.runahead_pool: - del self.runahead_pool[itask.id] + try: + del self.runahead_pool[itask.c_time][itask.id] + except KeyError: + pass + else: + if not self.runahead_pool[itask.c_time]: + self.runahead_pool.pop(itask.c_time) self.rhpool_changed = True return @@ -209,6 +248,9 @@ def remove( self, itask, reason=None ): # remove from queue queue = self.myq[itask.name] del self.queues[queue][itask.id] + del self.pool[itask.c_time][itask.id] + if not self.pool[itask.c_time]: + self.pool.pop(itask_c_time) self.pool_changed = True msg = "task proxy removed" if reason: @@ -235,17 +277,33 @@ def get_tasks( self, all=False ): if all: if self.rhpool_changed: self.rhpool_changed = False - self.rhpool_list = self.runahead_pool.values() + self.rhpool_list = [] + for itask_id_maps in self.runahead_pool.values(): + self.rhpool_list.extend(itask_id_maps.values()) return self.rhpool_list + self.pool_list else: return self.pool_list + def get_tasks_by_point( self, all=False ): + """Return a map of task proxies by cycle point.""" + point_itasks = {} + for point, itask_id_map in self.cycle_point_itask_map.items(): + point_itasks[point] = itask_id_map.values() + + if not all: + return point_itasks + + for point, itask_id_map in self.runahead_pool.items(): + point_itasks.setdefault(point, []) + point_itasks[point].extend(itask_id_map.values()) + return point_itasks def id_exists( self, id ): """Check if task id is in the runahead_pool or pool""" - if id in self.runahead_pool: - return True + for point, itask_ids in self.runahead_pool.items(): + if id in itask_ids: + return True for queue in self.queues: if id in self.queues[queue]: return True @@ -340,16 +398,16 @@ def set_runahead( self, interval=None ): if interval is None: # No limit self.log.warning( "setting NO runahead limit" ) - self.runahead_limit = None + self.custom_runahead_limit = None else: self.log.info( "setting runahead limit to " + str(interval) ) - self.runahead_limit = interval + self.custom_runahead_limit = interval self.release_runahead_tasks() def get_min_ctime( self ): """Return the minimum cycle currently in the pool.""" - cycles = [ t.c_time for t in self.get_tasks() ] + cycles = self.pool.keys() minc = None if cycles: minc = min(cycles) @@ -358,7 +416,7 @@ def get_min_ctime( self ): def get_max_ctime( self ): """Return the maximum cycle currently in the pool.""" - cycles = [ t.c_time for t in self.get_tasks() ] + cycles = self.pool.keys() maxc = None if cycles: maxc = max(cycles) @@ -369,7 +427,10 @@ def reconfigure( self, config, stop_point ): self.reconfiguring = True - self.runahead_limit = config.get_runahead_limit() + self.custom_runahead_limit = config.get_custom_runahead_limit() + self.minimum_runahead_limit = config.get_minimum_runahead_limit() + self.max_num_active_cycle_points = ( + config.get_max_num_active_cycle_points()) self.config = config self.stop_point = stop_point # TODO: Any point in using set_stop_point? From 742e4d5a46e6237edbcdacf0378ddcdc48f09b57 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Mon, 14 Jul 2014 17:22:29 +0100 Subject: [PATCH 10/13] #983: fix bugs in new runahead implementation --- lib/cylc/cycling/iso8601.py | 3 ++ lib/cylc/scheduler.py | 2 +- lib/cylc/task_pool.py | 34 +++++++++++------------ tests/cylc-get-config/00-simple.t | 7 +++-- tests/queues/01-queuesize-5.t | 1 - tests/runahead/02-check-default-complex.t | 5 ++-- tests/runahead/03-check-default-future.t | 7 ++--- tests/runahead/default-complex/suite.rc | 4 ++- tests/runahead/default-future/suite.rc | 4 +-- 9 files changed, 34 insertions(+), 33 deletions(-) diff --git a/lib/cylc/cycling/iso8601.py b/lib/cylc/cycling/iso8601.py index 52d12950981..0694fba9cd1 100755 --- a/lib/cylc/cycling/iso8601.py +++ b/lib/cylc/cycling/iso8601.py @@ -113,6 +113,9 @@ def sub(self, other): return ISO8601Point( self._iso_point_sub_interval(self.value, other.value)) + def __hash__(self): + return hash(self.value) + @staticmethod @memoize def _iso_point_add(point_string, interval_string): diff --git a/lib/cylc/scheduler.py b/lib/cylc/scheduler.py index c30d47edd60..edddb237916 100644 --- a/lib/cylc/scheduler.py +++ b/lib/cylc/scheduler.py @@ -1004,7 +1004,7 @@ def update_state_summary( self ): self.pool.get_min_ctime(), self.pool.get_max_ctime(), self.paused(), self.will_pause_at(), self.do_shutdown is not None, - self.will_stop_at(), self.pool.runahead_limit, + self.will_stop_at(), self.pool.custom_runahead_limit, self.config.ns_defn_order ) diff --git a/lib/cylc/task_pool.py b/lib/cylc/task_pool.py index 3b9e39798ce..317c1660258 100644 --- a/lib/cylc/task_pool.py +++ b/lib/cylc/task_pool.py @@ -162,19 +162,19 @@ def release_runahead_tasks( self ): if not self.runahead_pool: return - points = set() - for point, itasks in self.get_tasks_by_point(all=True): - has_ok_itasks = False + points = [] + for point, itasks in sorted( + self.get_tasks_by_point(all=True).items()): + has_unfinished_itasks = False for itask in itasks: if not itask.state.is_currently('failed', 'succeeded'): - has_ok_itasks = True + has_unfinished_itasks = True break - if has_ok_itasks: - points.add(point) - - if not points: - return - + if not points and not has_unfinished_itasks: + # We need to begin with an unfinished cycle point. + continue + points.append(point) + runahead_base_point = min(points) if self.custom_runahead_limit is None: @@ -193,12 +193,12 @@ def release_runahead_tasks( self ): latest_allowed_point = ( runahead_base_point + self.custom_runahead_limit) - for point, itask_id_map in self.runahead_pool.values(): + for point, itask_id_map in self.runahead_pool.items(): if point <= latest_allowed_point: for itask in itask_id_map.values(): self.release_runahead_task(itask) - def release_runahead_task(itask): + def release_runahead_task(self, itask): """Release itask to the appropriate queue in the active pool.""" queue = self.myq[itask.name] if queue not in self.queues: @@ -211,7 +211,7 @@ def release_runahead_task(itask): itask.log('DEBUG', "released to the task pool" ) del self.runahead_pool[itask.c_time][itask.id] if not self.runahead_pool[itask.c_time]: - self.runahead_pool.pop(itask.c_time) + del self.runahead_pool[itask.c_time] self.rhpool_changed = True try: self.pyro.connect( itask.message_queue, itask.id ) @@ -231,7 +231,7 @@ def remove( self, itask, reason=None ): pass else: if not self.runahead_pool[itask.c_time]: - self.runahead_pool.pop(itask.c_time) + del self.runahead_pool[itask.c_time] self.rhpool_changed = True return @@ -250,7 +250,7 @@ def remove( self, itask, reason=None ): del self.queues[queue][itask.id] del self.pool[itask.c_time][itask.id] if not self.pool[itask.c_time]: - self.pool.pop(itask_c_time) + del self.pool[itask.c_time] self.pool_changed = True msg = "task proxy removed" if reason: @@ -288,7 +288,7 @@ def get_tasks( self, all=False ): def get_tasks_by_point( self, all=False ): """Return a map of task proxies by cycle point.""" point_itasks = {} - for point, itask_id_map in self.cycle_point_itask_map.items(): + for point, itask_id_map in self.pool.items(): point_itasks[point] = itask_id_map.values() if not all: @@ -297,7 +297,7 @@ def get_tasks_by_point( self, all=False ): for point, itask_id_map in self.runahead_pool.items(): point_itasks.setdefault(point, []) point_itasks[point].extend(itask_id_map.values()) - return point_itasks + return point_itasks def id_exists( self, id ): """Check if task id is in the runahead_pool or pool""" diff --git a/tests/cylc-get-config/00-simple.t b/tests/cylc-get-config/00-simple.t index aa6b4540176..6b84b4179a9 100644 --- a/tests/cylc-get-config/00-simple.t +++ b/tests/cylc-get-config/00-simple.t @@ -91,9 +91,12 @@ TEST_NAME=$TEST_NAME_BASE-section1 run_ok $TEST_NAME cylc get-config --item=[scheduling] $SUITE_NAME cmp_ok $TEST_NAME.stdout - <<__OUT__ cycling mode = integer +max active cycle points = 3 initial cycle point = 1 -runahead limit = final cycle point = 1 +runahead limit = +[[dependencies]] + graph = OPS:finish-all => VAR [[queues]] [[[default]]] limit = 0 @@ -105,8 +108,6 @@ final cycle point = 1 sequential = clock-triggered = exclude at start-up = -[[dependencies]] - graph = OPS:finish-all => VAR __OUT__ cmp_ok $TEST_NAME.stderr - /dev/tty -cat $TEST_NAME.stderr >/dev/tty #------------------------------------------------------------------------------- TEST_NAME=$TEST_NAME_BASE-run run_fail $TEST_NAME cylc run --debug $SUITE_NAME @@ -35,7 +32,7 @@ TEST_NAME=$TEST_NAME_BASE-check-fail DB=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db RUNAHEAD=$(sqlite3 $DB "select max(cycle) from task_states") # manual comparison for the test -if [[ "$RUNAHEAD" == "20100101T0800Z" ]]; then +if [[ "$RUNAHEAD" == "20100101T1000Z" ]]; then ok $TEST_NAME else fail $TEST_NAME diff --git a/tests/runahead/default-complex/suite.rc b/tests/runahead/default-complex/suite.rc index c1793219e77..7ae430f9f37 100644 --- a/tests/runahead/default-complex/suite.rc +++ b/tests/runahead/default-complex/suite.rc @@ -10,11 +10,13 @@ # T00, T07, T14, ... [[[PT7H]]] graph = "foo => bar" - # T00, T12, T18... 2 hours difference + # T00, T12, T18... [[[T00, T12, T18]]] graph = "foo" + # T04... [[[T04]]] graph = "run_ok" + # T05... [[[T05]]] graph = "never_run" [runtime] diff --git a/tests/runahead/default-future/suite.rc b/tests/runahead/default-future/suite.rc index 396f4ddbf3d..d10cfa5c413 100644 --- a/tests/runahead/default-future/suite.rc +++ b/tests/runahead/default-future/suite.rc @@ -7,10 +7,10 @@ initial cycle point = 20100101T00 final cycle point = 20100105T00 [[dependencies]] - # Every hour, so would expect a 2 hour runahead limit. + # T00, T01, T02, ... [[[PT1H]]] graph = "foo => bar" - # A 6 hour future trigger, => 2 + 6 = 8 hour runahead limit. + # T04 (depending on T10) [[[T04/PT6H]]] graph = """ baz[+PT6H] => bar From 079adb63a3b00efd8c1f8054bc8d9990c432c0cf Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Thu, 17 Jul 2014 16:42:54 +0100 Subject: [PATCH 11/13] #983: use sequence-generated cycle points for runahead calculation --- doc/suiterc.tex | 14 ++++++----- lib/cylc/task_pool.py | 32 ++++++++++++++++++++++-- tests/runahead/03-check-default-future.t | 2 +- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/doc/suiterc.tex b/doc/suiterc.tex index e58f5bf868b..97a08f65c4c 100644 --- a/doc/suiterc.tex +++ b/doc/suiterc.tex @@ -604,8 +604,9 @@ \subsection{[scheduling]} sufficiently. The default behaviour is to limit the number of tasks -by only allowing up to \lstinline=N= (default 3) active cycle points at -any time, up-adjusted if necessary for any future triggering. +by only allowing up to \lstinline=N= (default 3) consecutive cycle +points to be active at any time, up-adjusted if necessary for any +future triggering. \lstinline=runahead limit= is deprecated in favour of specifying this maximum number of active cycle points (\ref{max active cycle points}). @@ -628,12 +629,13 @@ \subsection{[scheduling]} sufficiently. The default behaviour is to limit the number of tasks -by only allowing up to \lstinline=N= (default 3) active cycle points at -any time, up-adjusted if necessary for any future triggering. +by only allowing up to \lstinline=N= (default 3) consecutive cycle +points to be active at any time, up-adjusted if necessary for any +future triggering. This input allows you to change \lstinline=N=, the maximum -number of cycle points that can be active at once. This supersedes -\ref{runahead limit}. +number of consecutive cycle points that can be active at once. This +supersedes \ref{runahead limit}. \begin{myitemize} \item {\em type:} integer diff --git a/lib/cylc/task_pool.py b/lib/cylc/task_pool.py index f1bddd2750a..02271ffeb01 100644 --- a/lib/cylc/task_pool.py +++ b/lib/cylc/task_pool.py @@ -63,6 +63,9 @@ def __init__( self, suite, db, stop_point, config, pyro, log, run_mode ): self.minimum_runahead_limit = config.get_minimum_runahead_limit() self.max_num_active_cycle_points = ( config.get_max_num_active_cycle_points()) + self._prev_runahead_base_point = None + self._prev_runahead_sequence_points = None + self.config = config self.pool = {} @@ -162,6 +165,8 @@ def release_runahead_tasks( self ): if not self.runahead_pool: return + limit = self.max_num_active_cycle_points + points = [] for point, itasks in sorted( self.get_tasks_by_point(all=True).items()): @@ -174,13 +179,36 @@ def release_runahead_tasks( self ): # We need to begin with an unfinished cycle point. continue points.append(point) - + + if not points: + return + + # Get the earliest point with unfinished tasks. runahead_base_point = min(points) + # Get all cycling points possible after the runahead base point. + if (self._prev_runahead_base_point is not None and + runahead_base_point == self._prev_runahead_base_point): + # Cache for speed. + sequence_points = self._prev_runahead_sequence_points + else: + sequence_points = [] + for sequence in self.config.sequences: + point = runahead_base_point + for i in range(limit): + point = sequence.get_next_point(point) + if point is None: + break + sequence_points.append(point) + sequence_points = set(sequence_points) + self._prev_runahead_sequence_points = sequence_points + self._prev_runahead_base_point = runahead_base_point + + points = set(points).union(sequence_points) + if self.custom_runahead_limit is None: # Calculate which tasks to release based on a maximum number of # active cycle points (active meaning non-finished tasks). - limit = self.max_num_active_cycle_points latest_allowed_point = sorted(points)[:limit][-1] if self.minimum_runahead_limit is not None: latest_allowed_point = max([ diff --git a/tests/runahead/03-check-default-future.t b/tests/runahead/03-check-default-future.t index 9e490fd4e19..4315c7337c0 100644 --- a/tests/runahead/03-check-default-future.t +++ b/tests/runahead/03-check-default-future.t @@ -32,7 +32,7 @@ TEST_NAME=$TEST_NAME_BASE-check-fail DB=$(cylc get-global-config --print-run-dir)/$SUITE_NAME/cylc-suite.db RUNAHEAD=$(sqlite3 $DB "select max(cycle) from task_states") # manual comparison for the test -if [[ "$RUNAHEAD" == "20100101T1000Z" ]]; then +if [[ "$RUNAHEAD" == "20100101T0600Z" ]]; then ok $TEST_NAME else fail $TEST_NAME From 4cfedfbc750810f1c075e3ef69c1463b663ece42 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Mon, 21 Jul 2014 14:24:25 +0100 Subject: [PATCH 12/13] Add busy example suite --- dev/suites/busy/suite.rc | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 dev/suites/busy/suite.rc diff --git a/dev/suites/busy/suite.rc b/dev/suites/busy/suite.rc new file mode 100644 index 00000000000..66e6af0831e --- /dev/null +++ b/dev/suites/busy/suite.rc @@ -0,0 +1,49 @@ +#!jinja2 +title = Suite-1000-x3 +description = A suite of 1000 tasks per cycle +[cylc] + UTC mode = True # Ignore DST +[scheduling] + initial cycle point = 20130101T00 + final cycle point = 20130103T00 + runahead limit = PT12H + [[dependencies]] + [[[T00]]] + graph=""" + {% for i0 in range(10) -%} + {% for i1 in range(10) -%} + {% for i2 in range(1) -%} + t{{i0}}{{i1}}{{i2}}9[-P1D] => t{{i0}}{{i1}}{{i2}}0 + t{{i0}}{{i1}}{{i2}}0 => t{{i0}}{{i1}}{{i2}}1 => \ + t{{i0}}{{i1}}{{i2}}2 => t{{i0}}{{i1}}{{i2}}3 + t{{i0}}{{i1}}{{i2}}3 => t{{i0}}{{i1}}{{i2}}4 => \ + t{{i0}}{{i1}}{{i2}}5 => t{{i0}}{{i1}}{{i2}}6 + t{{i0}}{{i1}}{{i2}}6 => t{{i0}}{{i1}}{{i2}}7 => \ + t{{i0}}{{i1}}{{i2}}8 => t{{i0}}{{i1}}{{i2}}9 + {% endfor -%} + {% endfor -%} + {% endfor -%} + """ +[runtime] + [[root]] + command scripting = sleep 1 + [[[job submission]]] + method = at + [[[event hooks]]] + succeeded handler = true + failed handler = true + retry handler = true + submission failed handler = true + submission timeout handler = true + execution timeout handler = true + execution timeout = PT6M + submission timeout = PT1M +{% for i0 in range(10) -%} +{% for i1 in range(10) -%} +{% for i2 in range(1) -%} +{% for i3 in range(10) -%} + [[t{{i0}}{{i1}}{{i2}}{{i3}}]] +{% endfor -%} +{% endfor -%} +{% endfor -%} +{% endfor -%} From a13ee12418275f3dc0fe88c8c893d0900feb0208 Mon Sep 17 00:00:00 2001 From: Ben Fitzpatrick Date: Tue, 22 Jul 2014 11:45:27 +0100 Subject: [PATCH 13/13] #983: fix test failures due to update --- tests/lib/bash/test_header | 8 +++++--- tests/restart/02-failed.t | 1 - tests/restart/04-running.t | 12 ------------ tests/restart/05-submit-failed.t | 1 - tests/restart/06-succeeded.t | 1 - tests/restart/07-waiting.t | 1 - tests/restart/pre-init/ref-state | 1 + 7 files changed, 6 insertions(+), 19 deletions(-) diff --git a/tests/lib/bash/test_header b/tests/lib/bash/test_header index 819861c300f..ae06ed7cceb 100644 --- a/tests/lib/bash/test_header +++ b/tests/lib/bash/test_header @@ -101,8 +101,9 @@ function fail() { ((++FAILURES)) echo "not ok $((++TEST_NUMBER)) - $@" if [[ -n ${CYLC_TEST_DEBUG:-} ]]; then - ls -l $TEST_NAME.stderr >/dev/tty - cat $TEST_NAME.stderr >/dev/tty + echo >/dev/tty + echo "$TEST_NAME_BASE $TEST_NAME" >/dev/tty + cat "$TEST_NAME.stderr" >/dev/tty fi } @@ -186,6 +187,7 @@ function contains_ok() { comm -13 <(sort "$FILE_TEST") <(sort "$FILE_CONTROL") \ 1>$TEST_NAME.stdout 2>$TEST_NAME.stderr if [[ -s $TEST_NAME.stdout ]]; then + mkdir -p $TEST_LOG_DIR echo "Missing lines:" >>$TEST_NAME.stderr cat $TEST_NAME.stdout >>$TEST_NAME.stderr cp $TEST_NAME.stderr $TEST_LOG_DIR/$TEST_NAME.stderr @@ -305,7 +307,7 @@ TEST_SOURCE_DIR=$(cd $(dirname $0) && pwd) TEST_DIR=$(mktemp -d) cd $TEST_DIR TEST_LOG_DIR_BASE=${TMPDIR:-/tmp}/cylc-tests-"$LOGNAME"/$(basename $TEST_SOURCE_DIR) -TEST_LOG_DIR=$TEST_LOG_DIR_BASE/$TEST_NAME_BASE-$(date -u +%Y%m%dT%H%M%SZ)/ +TEST_LOG_DIR=$TEST_LOG_DIR_BASE/$TEST_NAME_BASE-$(date -u +%Y%m%dT%H%M%SZ) SUITE_RUN_FAILS=false # these variables should be moved to site/user config: diff --git a/tests/restart/02-failed.t b/tests/restart/02-failed.t index e0ec65a87fd..203f69a026c 100644 --- a/tests/restart/02-failed.t +++ b/tests/restart/02-failed.t @@ -102,7 +102,6 @@ failed_task|2013092300|1|1|failed failed_task|2013092306|1|1|failed force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting tidy|2013092300|1|1|succeeded diff --git a/tests/restart/04-running.t b/tests/restart/04-running.t index 6c8fbe287d3..2509cc8fb19 100644 --- a/tests/restart/04-running.t +++ b/tests/restart/04-running.t @@ -78,7 +78,6 @@ __STATE__ grep_ok "running_task|2013092300|1|1|running" $TEST_DIR/states-db-pre-restart-2013092300 contains_ok $TEST_DIR/states-db-post-restart-2013092300 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded -output_states|2013092300|1|1|running running_task|2013092300|1|1|running tidy|2013092300|0|1|waiting __DB_DUMP__ @@ -100,7 +99,6 @@ __STATE__ contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting running_task|2013092300|1|1|succeeded @@ -113,12 +111,7 @@ contains_ok $TEST_DIR/states-db-post-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded output_states|2013092300|1|1|succeeded -<<<<<<< HEAD -output_states|2013092306|1|1|running -======= output_states|2013092306|2|1|running -output_states|2013092312|0|1|held ->>>>>>> c4a1c85afb063877cef367008f1b64954d248b6a running_task|2013092300|1|1|succeeded running_task|2013092306|1|1|succeeded tidy|2013092300|1|1|succeeded @@ -145,12 +138,7 @@ contains_ok $TEST_DIR/states-db <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|succeeded output_states|2013092300|1|1|succeeded -<<<<<<< HEAD -output_states|2013092306|1|1|succeeded -======= output_states|2013092306|2|1|succeeded -output_states|2013092312|0|1|held ->>>>>>> c4a1c85afb063877cef367008f1b64954d248b6a running_task|2013092300|1|1|succeeded running_task|2013092306|1|1|succeeded tidy|2013092300|1|1|succeeded diff --git a/tests/restart/05-submit-failed.t b/tests/restart/05-submit-failed.t index a758a77669d..65317935338 100644 --- a/tests/restart/05-submit-failed.t +++ b/tests/restart/05-submit-failed.t @@ -98,7 +98,6 @@ __STATE__ contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting submit_fail_task|2013092300|1|1|submit-failed diff --git a/tests/restart/06-succeeded.t b/tests/restart/06-succeeded.t index e71a98b1df3..2dd99479006 100644 --- a/tests/restart/06-succeeded.t +++ b/tests/restart/06-succeeded.t @@ -100,7 +100,6 @@ __STATE__ contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting succeed_task|2013092300|1|1|succeeded diff --git a/tests/restart/07-waiting.t b/tests/restart/07-waiting.t index 9121131c76b..3f66109ddb4 100644 --- a/tests/restart/07-waiting.t +++ b/tests/restart/07-waiting.t @@ -98,7 +98,6 @@ __STATE__ contains_ok $TEST_DIR/states-db-pre-restart-2013092306 <<'__DB_DUMP__' force_restart|2013092300|1|1|succeeded force_restart|2013092306|1|1|running -force_restart|2013092312|0|1|held output_states|2013092300|1|1|succeeded output_states|2013092306|0|1|waiting tidy|2013092300|1|1|succeeded diff --git a/tests/restart/pre-init/ref-state b/tests/restart/pre-init/ref-state index 2c8109f6407..bc13cf30d17 100644 --- a/tests/restart/pre-init/ref-state +++ b/tests/restart/pre-init/ref-state @@ -1,4 +1,5 @@ bar|2010080800|succeeded +bar|2010080900|held foo|2010080800|succeeded foo|2010080900|held p1|2010080800|succeeded