Skip to content

Commit

Permalink
Fix broadcast submission timeout restart
Browse files Browse the repository at this point in the history
Suites were dying on restart when there is a submission timeout
broadcast. On restart, submission timeout values became unicode, which
caused job submission timeout logic to die. This fix ensures that the
values are converted back to float.
  • Loading branch information
matthewrmshin committed Mar 2, 2017
1 parent d6c4d33 commit b839795
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 7 deletions.
14 changes: 7 additions & 7 deletions lib/cylc/task_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,11 +900,11 @@ def job_submission_succeeded(self):
db_event='submission succeeded')

if self.state.set_submit_succeeded():
submit_timeout = self._get_events_conf('submission timeout')
if submit_timeout:
try:
self.state.submission_timer_timeout = (
self.summary['submitted_time'] + submit_timeout)
else:
self.summary['submitted_time'] +
float(self._get_events_conf('submission timeout')))
except (TypeError, ValueError):
self.state.submission_timer_timeout = None
self._set_next_poll_time(self.KEY_SUBMIT)

Expand Down Expand Up @@ -1397,10 +1397,10 @@ def process_incoming_message(
execution_timeout = self.summary['execution_time_limit']
else:
execution_timeout = self._get_events_conf('execution timeout')
if execution_timeout:
try:
self.state.execution_timer_timeout = (
self.summary['started_time'] + execution_timeout)
else:
self.summary['started_time'] + float(execution_timeout))
except (TypeError, ValueError):
self.state.execution_timer_timeout = None

# submission was successful so reset submission try number
Expand Down
31 changes: 31 additions & 0 deletions tests/restart/27-broadcast-timeout.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
# Copyright (C) 2008-2017 NIWA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------
# Test restart with broadcast to "[events]submission timeout".
. "$(dirname "$0")/test_header"
set_test_number 4
install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"

run_ok "${TEST_NAME_BASE}-validate" cylc validate "${SUITE_NAME}"
suite_run_ok "${TEST_NAME_BASE}-run" cylc run "${SUITE_NAME}" --debug
sqlite3 "${SUITE_RUN_DIR}/log/db" \
'SELECT * FROM broadcast_states' >'sqlite3.out'
cmp_ok 'sqlite3.out' <<<'*|root|[events]submission timeout|60.0'
suite_run_ok "${TEST_NAME_BASE}-restart" \
cylc restart "${SUITE_NAME}" --debug --reference-test
purge_suite "${SUITE_NAME}"
exit
3 changes: 3 additions & 0 deletions tests/restart/27-broadcast-timeout/reference.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
2017-03-02T20:25:55Z INFO - Initial point: 1
2017-03-02T20:25:55Z INFO - Final point: 1
2017-03-02T20:25:55Z INFO - [bar.1] -triggered off ['foo.1']
14 changes: 14 additions & 0 deletions tests/restart/27-broadcast-timeout/suite.rc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[cylc]
UTC mode = True
cycle point format = %Y
[scheduling]
[[dependencies]]
graph = foo => bar
[runtime]
[[foo]]
script="""
cylc broadcast "${CYLC_SUITE_NAME}" --set='[events]submission timeout=PT1M'
cylc stop "${CYLC_SUITE_NAME}"
"""
[[bar]]
script=true

0 comments on commit b839795

Please sign in to comment.