Skip to content
This repository has been archived by the owner on Mar 20, 2023. It is now read-only.

Commit

Permalink
Proxy non-native task execution via script
Browse files Browse the repository at this point in the history
- Resolves #235
  • Loading branch information
alfpark committed Jun 18, 2019
1 parent a16e125 commit bc4be6d
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 54 deletions.
164 changes: 122 additions & 42 deletions convoy/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4256,6 +4256,17 @@ def _generate_non_native_env_dump(env_vars, envfile):
return 'env | {}'.format(envfile)


def _generate_non_native_env_var(env_vars):
# type: (dict, str) -> str
"""Generate env dump command for non-native tasks
:param dict env_vars: env vars
"""
exclude = [
'^{}='.format(x) for x in _ENV_EXCLUDE_LINUX if x not in env_vars
]
return '{}'.format('|'.join(exclude))


def _construct_task(
batch_client, blob_client, keyvault_client, config, federation_id,
bxfile, bs, native, is_windows, tempdisk, allow_run_on_missing,
Expand Down Expand Up @@ -4404,64 +4415,110 @@ def _construct_task(
# set application command
if native:
task_commands = [task.command]
elif is_singularity:
# add env vars
else:
task_commands = []
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_SINGULARITY_COMMAND',
value='singularity {} {} {}'.format(
task.singularity_cmd,
' '.join(task.run_options),
task.singularity_image,
)
name='SHIPYARD_ENV_EXCLUDE',
value=_generate_non_native_env_var(env_vars)
)
)
# singularity command is passed as-is for multi-instance
task_commands = [
_generate_non_native_env_dump(env_vars, task.envfile),
'{}'.format(' ' + task.command) if task.command else ''
]
else:
task_commands = [
_generate_non_native_env_dump(env_vars, task.envfile),
'{} {} {} {}'.format(
task.docker_exec_cmd,
' '.join(task.docker_exec_options),
task.name,
task.command,
),
]
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_ENV_FILE',
value=task.envfile,
)
)
if is_singularity:
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_SINGULARITY_COMMAND',
value='singularity {} {} {}'.format(
task.singularity_cmd,
' '.join(task.run_options),
task.singularity_image,
)
)
)
else:
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_RUNTIME_CMD_OPTS',
value=' '.join(task.docker_exec_options)
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_RUNTIME',
value='docker',
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_RUNTIME_CMD',
value='exec',
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_CONTAINER_IMAGE_NAME',
value=task.name, # docker exec requires task name
)
)
else:
if native:
task_commands = [
'{}'.format(' ' + task.command) if task.command else ''
]
elif is_singularity:
task_commands = [
_generate_non_native_env_dump(env_vars, task.envfile),
'singularity {} {} {}{}'.format(
task.singularity_cmd,
' '.join(task.run_options),
task.singularity_image,
'{}'.format(' ' + task.command) if task.command else '',
)
]
else:
task_commands = [
_generate_non_native_env_dump(env_vars, task.envfile),
'{} {} {}{}'.format(
task.docker_run_cmd,
' '.join(task.run_options),
task.docker_image,
'{}'.format(' ' + task.command) if task.command else ''),
]
task_commands = []
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_ENV_EXCLUDE',
value=_generate_non_native_env_var(env_vars)
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_ENV_FILE',
value=task.envfile,
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_RUNTIME_CMD_OPTS',
value=' '.join(task.run_options)
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_RUNTIME',
value='singularity' if is_singularity else 'docker',
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_RUNTIME_CMD',
value=task.singularity_cmd if is_singularity else 'run',
)
)
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_CONTAINER_IMAGE_NAME',
value=(
task.singularity_image if is_singularity else
task.docker_image
),
)
)
output_files = None
# get registry login if missing images
if (not native and allow_run_on_missing and
(len(docker_missing_images) > 0 or
len(singularity_missing_images) > 0)):
taskenv, logincmd = generate_docker_login_settings(config)
loginenv, logincmd = generate_docker_login_settings(config)
logincmd.extend(task_commands)
taskenv = util.merge_dict(taskenv, loginenv)
task_commands = logincmd
# digest any input_data
addlcmds = data.process_input_data(config, bxfile, _task, on_task=True)
Expand All @@ -4471,6 +4528,16 @@ def _construct_task(
'input_data at task-level is not supported on '
'native container pools')
task_commands.insert(0, addlcmds)
if not native:
if util.is_not_empty(task_commands):
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_USER_PROLOGUE_CMD',
value=util.wrap_commands_in_shell(
task_commands, windows=is_windows),
)
)
task_commands = []
# digest any output data
addlcmds = data.process_output_data(config, bxfile, _task)
if addlcmds is not None:
Expand All @@ -4479,6 +4546,19 @@ def _construct_task(
else:
task_commands.append(addlcmds)
del addlcmds
if not native:
if util.is_not_empty(task_commands):
taskenv.append(
batchmodels.EnvironmentSetting(
name='SHIPYARD_USER_EPILOGUE_CMD',
value=util.wrap_commands_in_shell(
task_commands, windows=is_windows),
)
)
task_commands = [
'$AZ_BATCH_NODE_STARTUP_DIR/wd/shipyard_task_runner.sh {}'.format(
task.command)
]
# always add env vars in (host) task to be dumped into container
# task (if non-native)
if util.is_not_empty(env_vars):
Expand Down
6 changes: 6 additions & 0 deletions convoy/fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@
'scripts/windows/shipyard_nodeprep_nativedocker.ps1'
)
)
_TASK_RUNNER_FILE = (
'shipyard_task_runner.sh',
pathlib.Path(_ROOT_PATH, 'scripts/shipyard_task_runner.sh')
)
_GLUSTERPREP_FILE = (
'shipyard_glusterfs_on_compute.sh',
pathlib.Path(_ROOT_PATH, 'scripts/shipyard_glusterfs_on_compute.sh')
Expand Down Expand Up @@ -1362,6 +1366,8 @@ def _construct_pool_object(
)
else:
_rflist.append(_NODEPREP_FILE)
if not native:
_rflist.append(_TASK_RUNNER_FILE)
# create start task commandline
start_task.append(
('{npf}{a}{b}{c}{d}{e}{f}{g}{i}{j}{k}{lis}{m}{n}{o}{p}{q}{r}{s}'
Expand Down
16 changes: 4 additions & 12 deletions convoy/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,7 @@
'environment_variables', 'environment_variables_keyvault_secret_id',
'envfile', 'resource_files', 'command', 'infiniband', 'gpu',
'depends_on', 'depends_on_range', 'max_task_retries', 'max_wall_time',
'retention_time', 'docker_run_cmd', 'docker_exec_cmd',
'multi_instance', 'default_exit_options',
'retention_time', 'multi_instance', 'default_exit_options',
]
)
MultiInstanceSettings = collections.namedtuple(
Expand Down Expand Up @@ -3981,14 +3980,10 @@ def task_settings(
('cannot initialize a gpu task on nodes without '
'gpus: pool={} vm_size={}').format(pool_id, vm_size))
# set docker commands with nvidia docker wrapper
docker_run_cmd = 'nvidia-docker run'
docker_exec_cmd = 'nvidia-docker exec'
if util.is_not_empty(singularity_image):
run_opts.append('--nv')
else:
# set normal run and exec commands
docker_run_cmd = 'docker run'
docker_exec_cmd = 'docker exec'
else:
run_opts.append('--runtime=nvidia')
# infiniband
infiniband = _kv_read(conf, 'infiniband')
if infiniband is None:
Expand Down Expand Up @@ -4157,8 +4152,7 @@ def task_settings(
cc_args = None
else:
cc_args = [
'{} {} {}{}'.format(
docker_run_cmd,
'docker run {} {}{}'.format(
' '.join(run_opts),
docker_image,
coordination_command),
Expand Down Expand Up @@ -4239,8 +4233,6 @@ def task_settings(
gpu=gpu,
depends_on=depends_on,
depends_on_range=depends_on_range,
docker_run_cmd=docker_run_cmd,
docker_exec_cmd=docker_exec_cmd,
singularity_cmd=singularity_cmd,
run_elevated=run_elevated,
multi_instance=MultiInstanceSettings(
Expand Down
5 changes: 5 additions & 0 deletions scripts/shipyard_nodeprep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1687,6 +1687,11 @@ if [ $azureblob -eq 1 ]; then
mount_azureblob_container
fi

# prep task execute helper script in non-native mode
if [ "$native_mode" -eq 0 ]; then
chmod 755 shipyard_task_runner.sh
fi

# check if we're coming up from a reboot
if [ -f "$cascadefailed" ]; then
log ERROR "$cascadefailed file exists, assuming cascade failure during node prep"
Expand Down
44 changes: 44 additions & 0 deletions scripts/shipyard_task_runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

set -e
set -o pipefail

# environment variables used
# SHIPYARD_USER_PROLOGUE_CMD: pre-exec user cmd
# SHIPYARD_USER_EPILOGUE_CMD: post-exec user cmd
# SHIPYARD_ENV_EXCLUDE: environment vars to exclude
# SHIPYARD_ENV_FILE: env file
# SHIPYARD_RUNTIME: docker or singularity
# SHIPYARD_RUNTIME_CMD: run or exec
# SHIPYARD_RUNTIME_CMD_OPTS: options
# SHIPYARD_CONTAINER_IMAGE_NAME: container name

## PRE-EXEC
if [ -n "$SHIPYARD_USER_PROLOGUE_CMD" ]; then
eval "$SHIPYARD_USER_PROLOGUE_CMD"
fi

## TASK EXEC
# dump env for envfile
if [ -n "$SHIPYARD_ENV_EXCLUDE" ]; then
env | grep -vE "$SHIPYARD_ENV_EXCLUDE" > "$SHIPYARD_ENV_FILE"
else
env > "$SHIPYARD_ENV_FILE"
fi

cat "$SHIPYARD_ENV_FILE"

SHIPYARD_RUNTIME_CMD_OPTS=$(eval echo "${SHIPYARD_RUNTIME_CMD_OPTS}")

if [ -n "$SHIPYARD_RUNTIME" ]; then
# shellcheck disable=SC2086
"$SHIPYARD_RUNTIME" "$SHIPYARD_RUNTIME_CMD" $SHIPYARD_RUNTIME_CMD_OPTS \
"$SHIPYARD_CONTAINER_IMAGE_NAME" "$@"
else
"$@"
fi

## POST EXEC
if [ -n "$SHIPYARD_USER_EPILOGUE_CMD" ]; then
eval "$SHIPYARD_USER_EPILOGUE_CMD"
fi

0 comments on commit bc4be6d

Please sign in to comment.