Skip to content

Commit

Permalink
Merge pull request #1089 from matthewrmshin/fix-remote-nn
Browse files Browse the repository at this point in the history
Fix NN for remote jobs
  • Loading branch information
hjoliver committed Aug 18, 2014
2 parents 36fdc27 + 07623a8 commit 2a3fadd
Show file tree
Hide file tree
Showing 11 changed files with 889 additions and 12 deletions.
1 change: 1 addition & 0 deletions lib/cylc/cfgspec/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
},

'test battery' : {
'remote host with shared fs' : vdr( vtype='string' ),
'remote host' : vdr( vtype='string' ),
'directives' : {
'loadleveler host' : vdr( vtype='string' ),
Expand Down
29 changes: 17 additions & 12 deletions lib/cylc/job_submission/background.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
#C:
#C: You should have received a copy of the GNU General Public License
#C: along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Implement background job submission."""

from job_submit import JobSubmit
from cylc.job_submission.job_submit import JobSubmit
from cylc.command_env import pr_scripting_sl
import os
from signal import SIGKILL
from subprocess import Popen, PIPE

class background( JobSubmit ):

class background(JobSubmit):
"""
Background 'job submission' runs the task directly in the background
(with '&') so that we can get the job PID (with $!) but then uses
Expand All @@ -32,14 +34,16 @@ class background( JobSubmit ):
% ssh user@host 'job-script & echo $!; wait'
(We have to override the general command templates to achieve this)."""

LOCAL_COMMAND_TEMPLATE = ( "( %(command)s & echo $!; wait )" )
LOCAL_COMMAND_TEMPLATE = "( %(command)s & echo $!; wait )"

REMOTE_COMMAND_TEMPLATE = (
" '" +
pr_scripting_sl +
"; " +
# Retry "mkdir" once to avoid race to create log/job/CYCLE/
" (mkdir -p %(jobfile_dir)s || mkdir -p %(jobfile_dir)s)" +
" && rm -f $(dirname %(jobfile_dir)s)/NN"
" && ln -s $(basename %(jobfile_dir)s) $(dirname %(jobfile_dir)s)/NN"
" && cat >%(jobfile_path)s.tmp" +
" && mv %(jobfile_path)s.tmp %(jobfile_path)s" +
" && chmod +x %(jobfile_path)s" +
Expand All @@ -50,32 +54,33 @@ class background( JobSubmit ):
# N.B. The perl command ensures that the job script is executed in its own
# process group, which allows the job script and its child processes to be
# killed correctly.
COMMAND_TEMPLATE = ("perl -e \"setpgrp(0,0);exec(@ARGV)\" %s " +
"</dev/null 1>%s 2>%s")
COMMAND_TEMPLATE = (
"perl -e \"setpgrp(0,0);exec(@ARGV)\" %s </dev/null 1>%s 2>%s")

def construct_job_submit_command( self ):
def construct_job_submit_command(self):
"""
Construct a command to submit this job to run.
"""
command_template = self.job_submit_command_template
if not command_template:
command_template = self.__class__.COMMAND_TEMPLATE
self.command = command_template % ( self.jobfile_path,
self.stdout_file,
self.stderr_file )
self.command = command_template % (
self.jobfile_path, self.stdout_file, self.stderr_file)

def get_id( self, out, err ):
def get_id(self, out, err):
"""
Extract the job process ID from job submission command
output. For background jobs the submission command simply
echoes the process ID to stdout as described above.
"""
return out.strip()

def kill( self, jid, st_file=None ):
@classmethod
def kill(cls, jid, _=None):
"""Kill the job."""
os.killpg(int(jid), SIGKILL)

def poll( self, jid ):
@classmethod
def poll(cls, jid):
"""Return 0 if jid is in the queueing system, 1 otherwise."""
return Popen(["ps", jid], stdout=PIPE).wait()
2 changes: 2 additions & 0 deletions lib/cylc/job_submission/job_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class JobSubmit(object):
"; " +
# Retry "mkdir" once to avoid race to create log/job/CYCLE/
" (mkdir -p %(jobfile_dir)s || mkdir -p %(jobfile_dir)s)" +
" && rm -f $(dirname %(jobfile_dir)s)/NN"
" && ln -s $(basename %(jobfile_dir)s) $(dirname %(jobfile_dir)s)/NN"
" && cat >%(jobfile_path)s.tmp" +
" && mv %(jobfile_path)s.tmp %(jobfile_path)s" +
" && chmod +x %(jobfile_path)s" +
Expand Down
Empty file modified tests/job-submission/00-user.t
100644 → 100755
Empty file.
32 changes: 32 additions & 0 deletions tests/job-submission/01-job-nn-localhost.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
#C: THIS FILE IS PART OF THE CYLC SUITE ENGINE.
#C: Copyright (C) 2008-2014 Hilary Oliver, NIWA
#C:
#C: This program is free software: you can redistribute it and/or modify
#C: it under the terms of the GNU General Public License as published by
#C: the Free Software Foundation, either version 3 of the License, or
#C: (at your option) any later version.
#C:
#C: This program is distributed in the hope that it will be useful,
#C: but WITHOUT ANY WARRANTY; without even the implied warranty of
#C: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#C: GNU General Public License for more details.
#C:
#C: You should have received a copy of the GNU General Public License
#C: along with this program. If not, see <http://www.gnu.org/licenses/>.
#-------------------------------------------------------------------------------
# Test localhost job log NN link correctness.
. $(dirname $0)/test_header
#-------------------------------------------------------------------------------
set_test_number 2
#-------------------------------------------------------------------------------
install_suite "$TEST_NAME_BASE" "$TEST_NAME_BASE"
#-------------------------------------------------------------------------------
TEST_NAME="$TEST_NAME_BASE-validate"
run_ok "$TEST_NAME" cylc validate "$SUITE_NAME"
#-------------------------------------------------------------------------------
TEST_NAME="$TEST_NAME_BASE-run"
suite_run_ok "$TEST_NAME" cylc run --reference-test --debug "$SUITE_NAME"
#-------------------------------------------------------------------------------
purge_suite "$SUITE_NAME"
exit
Loading

0 comments on commit 2a3fadd

Please sign in to comment.