Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record and reuse warmstart points for restarts #2296

Merged
merged 1 commit into from
May 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion lib/cylc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def __init__(self, is_restart, options, args):
self.config = None

self.is_restart = is_restart
if self.is_restart:
self.restart_warm_point = None
self._cli_initial_point_string = None
self._cli_start_point_string = None
start_point_str = None
Expand Down Expand Up @@ -349,7 +351,8 @@ def configure(self):
self.initial_point,
self.final_point,
self.pool.is_held,
self.config.cfg['cylc']['cycle point format'])
self.config.cfg['cylc']['cycle point format'],
self._cli_start_point_string)
self.suite_db_mgr.put_suite_template_vars(self.template_vars)
self.configure_suite_environment()

Expand Down Expand Up @@ -403,6 +406,8 @@ def load_tasks_for_restart(self):
"""Load tasks for restart."""
self.suite_db_mgr.pri_dao.select_suite_params(
self._load_suite_params, self.options.checkpoint)
if self.restart_warm_point:
self.start_point = self.restart_warm_point
self.suite_db_mgr.pri_dao.select_broadcast_states(
BroadcastServer.get_inst().load_db_broadcast_states,
self.options.checkpoint)
Expand Down Expand Up @@ -900,6 +905,13 @@ def _load_initial_cycle_point(self, _, row):
self._cli_initial_point_string = value
self.do_process_tasks = True

def _load_warm_cycle_point(self, _, row):
"""Load previous warm start point on restart"""
key, value = row
if key == "warm_point":
self._cli_start_point_string = value
self.restart_warm_point = value

def _load_template_vars(self, _, row):
"""Load suite start up template variables."""
key, value = row
Expand All @@ -918,6 +930,7 @@ def configure_suite(self, reconfigure=False):
pri_dao = self.suite_db_mgr.get_pri_dao()
pri_dao.select_suite_params(self._load_initial_cycle_point)
pri_dao.select_suite_template_vars(self._load_template_vars)
pri_dao.select_suite_params(self._load_warm_cycle_point)
# Take checkpoint and commit immediately so that checkpoint can be
# copied to the public database.
pri_dao.take_checkpoints("restart")
Expand Down
6 changes: 5 additions & 1 deletion lib/cylc/suite_db_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def put_runtime_inheritance(self, config):

def put_suite_params(
self, run_mode, initial_point, final_point, is_held,
cycle_point_format=None):
cycle_point_format=None, warm_point=None):
"""Put run mode, initial/final cycle point in runtime database.

This method queues the relevant insert statements.
Expand All @@ -214,6 +214,10 @@ def put_suite_params(
if is_held:
self.db_inserts_map[self.TABLE_SUITE_PARAMS].append(
{"key": "is_held", "value": 1})
if warm_point:
self.db_inserts_map[self.TABLE_SUITE_PARAMS].append(
{"key": "warm_point", "value": str(warm_point)}
)

def put_suite_template_vars(self, template_vars):
"""Put template_vars in runtime database.
Expand Down
59 changes: 59 additions & 0 deletions tests/pre-initial/12-warm-restart.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash
# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
# Copyright (C) 2008-2017 NIWA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------
# Test warm start persists across restarts
. $(dirname $0)/test_header
#-------------------------------------------------------------------------------
set_test_number 6
#-------------------------------------------------------------------------------
install_suite $TEST_NAME_BASE warm-start
#-------------------------------------------------------------------------------
TEST_NAME=$TEST_NAME_BASE-validate
run_ok $TEST_NAME cylc validate $SUITE_NAME
#-------------------------------------------------------------------------------
TEST_NAME=$TEST_NAME_BASE-run-hold
suite_run_ok $TEST_NAME cylc run --warm $SUITE_NAME 20130101T12 --hold
#-------------------------------------------------------------------------------
cylc stop --max-polls=10 --interval=2 "${SUITE_NAME}"
#-------------------------------------------------------------------------------
TEST_NAME=$TEST_NAME_BASE-run-hold-restart
suite_run_ok $TEST_NAME cylc restart $SUITE_NAME
# Ensure suite has started
poll ! test -f "${SUITE_RUN_DIR}/.service/contact"
#-------------------------------------------------------------------------------
# Check pre-reqs
TEST_NAME=$TEST_NAME_BASE-check-prereq
run_ok $TEST_NAME cylc show ${SUITE_NAME} foo.20130101T1200Z --list-prereqs
cmp_ok $TEST_NAME.stdout <<'__OUT__'
__OUT__
#-------------------------------------------------------------------------------
# Stop suite
cylc stop --max-polls=10 --interval=2 "${SUITE_NAME}"
#-------------------------------------------------------------------------------
if ! which sqlite3 > /dev/null; then
skip 1 "sqlite3 not installed?"
fi
DB_FILE="$(cylc get-global-config '--print-run-dir')/${SUITE_NAME}/log/db"
NAME='database-entry'
sqlite3 "${DB_FILE}" \
'SELECT value FROM suite_params
WHERE key is "warm_point"' >"${NAME}"
cmp_ok "${NAME}" <<'__SELECT__'
20130101T12
__SELECT__
#-------------------------------------------------------------------------------
purge_suite $SUITE_NAME