Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix NN for remote jobs #1089

Merged
merged 4 commits into from
Aug 18, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/cylc/cfgspec/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
},

'test battery' : {
'remote host with shared fs' : vdr( vtype='string' ),
'remote host' : vdr( vtype='string' ),
'directives' : {
'loadleveler host' : vdr( vtype='string' ),
Expand Down
29 changes: 17 additions & 12 deletions lib/cylc/job_submission/background.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
#C:
#C: You should have received a copy of the GNU General Public License
#C: along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Implement background job submission."""

from job_submit import JobSubmit
from cylc.job_submission.job_submit import JobSubmit
from cylc.command_env import pr_scripting_sl
import os
from signal import SIGKILL
from subprocess import Popen, PIPE

class background( JobSubmit ):

class background(JobSubmit):
"""
Background 'job submission' runs the task directly in the background
(with '&') so that we can get the job PID (with $!) but then uses
Expand All @@ -32,14 +34,16 @@ class background( JobSubmit ):
% ssh user@host 'job-script & echo $!; wait'
(We have to override the general command templates to achieve this)."""

LOCAL_COMMAND_TEMPLATE = ( "( %(command)s & echo $!; wait )" )
LOCAL_COMMAND_TEMPLATE = "( %(command)s & echo $!; wait )"

REMOTE_COMMAND_TEMPLATE = (
" '" +
pr_scripting_sl +
"; " +
# Retry "mkdir" once to avoid race to create log/job/CYCLE/
" (mkdir -p %(jobfile_dir)s || mkdir -p %(jobfile_dir)s)" +
" && rm -f $(dirname %(jobfile_dir)s)/NN"
" && ln -s $(basename %(jobfile_dir)s) $(dirname %(jobfile_dir)s)/NN"
" && cat >%(jobfile_path)s.tmp" +
" && mv %(jobfile_path)s.tmp %(jobfile_path)s" +
" && chmod +x %(jobfile_path)s" +
Expand All @@ -50,32 +54,33 @@ class background( JobSubmit ):
# N.B. The perl command ensures that the job script is executed in its own
# process group, which allows the job script and its child processes to be
# killed correctly.
COMMAND_TEMPLATE = ("perl -e \"setpgrp(0,0);exec(@ARGV)\" %s " +
"</dev/null 1>%s 2>%s")
COMMAND_TEMPLATE = (
"perl -e \"setpgrp(0,0);exec(@ARGV)\" %s </dev/null 1>%s 2>%s")

def construct_job_submit_command( self ):
def construct_job_submit_command(self):
"""
Construct a command to submit this job to run.
"""
command_template = self.job_submit_command_template
if not command_template:
command_template = self.__class__.COMMAND_TEMPLATE
self.command = command_template % ( self.jobfile_path,
self.stdout_file,
self.stderr_file )
self.command = command_template % (
self.jobfile_path, self.stdout_file, self.stderr_file)

def get_id( self, out, err ):
def get_id(self, out, err):
"""
Extract the job process ID from job submission command
output. For background jobs the submission command simply
echoes the process ID to stdout as described above.
"""
return out.strip()

def kill( self, jid, st_file=None ):
@classmethod
def kill(cls, jid, _=None):
"""Kill the job."""
os.killpg(int(jid), SIGKILL)

def poll( self, jid ):
@classmethod
def poll(cls, jid):
"""Return 0 if jid is in the queueing system, 1 otherwise."""
return Popen(["ps", jid], stdout=PIPE).wait()
2 changes: 2 additions & 0 deletions lib/cylc/job_submission/job_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class JobSubmit(object):
"; " +
# Retry "mkdir" once to avoid race to create log/job/CYCLE/
" (mkdir -p %(jobfile_dir)s || mkdir -p %(jobfile_dir)s)" +
" && rm -f $(dirname %(jobfile_dir)s)/NN"
" && ln -s $(basename %(jobfile_dir)s) $(dirname %(jobfile_dir)s)/NN"
" && cat >%(jobfile_path)s.tmp" +
" && mv %(jobfile_path)s.tmp %(jobfile_path)s" +
" && chmod +x %(jobfile_path)s" +
Expand Down
Empty file modified tests/job-submission/00-user.t
100644 → 100755
Empty file.
32 changes: 32 additions & 0 deletions tests/job-submission/01-job-nn-localhost.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
#C: THIS FILE IS PART OF THE CYLC SUITE ENGINE.
#C: Copyright (C) 2008-2014 Hilary Oliver, NIWA
#C:
#C: This program is free software: you can redistribute it and/or modify
#C: it under the terms of the GNU General Public License as published by
#C: the Free Software Foundation, either version 3 of the License, or
#C: (at your option) any later version.
#C:
#C: This program is distributed in the hope that it will be useful,
#C: but WITHOUT ANY WARRANTY; without even the implied warranty of
#C: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#C: GNU General Public License for more details.
#C:
#C: You should have received a copy of the GNU General Public License
#C: along with this program. If not, see <http://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------
# Test localhost job log NN link correctness.
. $(dirname $0)/test_header
#-------------------------------------------------------------------------------
set_test_number 2
#-------------------------------------------------------------------------------
install_suite "$TEST_NAME_BASE" "$TEST_NAME_BASE"
#-------------------------------------------------------------------------------
TEST_NAME="$TEST_NAME_BASE-validate"
run_ok "$TEST_NAME" cylc validate "$SUITE_NAME"
#-------------------------------------------------------------------------------
TEST_NAME="$TEST_NAME_BASE-run"
suite_run_ok "$TEST_NAME" cylc run --reference-test --debug "$SUITE_NAME"
#-------------------------------------------------------------------------------
purge_suite "$SUITE_NAME"
exit
Loading