Skip to content

Commit

Permalink
runner: Centralize handling of hostenv and pass it via extra_env
Browse files Browse the repository at this point in the history
The hostenv concept in the runner code predates the extra_env concept,
and, unlike extra_env which is centralized thru the runner dispatcher,
hostenv was handled independently by each runner.¹

By centralizing hostenv handling in the runner dispatcher and making it
just another (automatic) part of extra_env, we simplify the runners,
improve the security/visibility of the values in hostenv thru the recent
improvements to extra_env handling, and make it easier to eventually
deprecate/remove hostenv entirely.

¹ The reasoning at the time was that hostenv forwarding was only needed
  for containerized runners, and so the ambient (née native) runner
  didn't need to use it.
  • Loading branch information
tsibley committed Jun 16, 2023
1 parent cd09323 commit 38a5ddf
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 47 deletions.
9 changes: 5 additions & 4 deletions nextstrain/cli/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
ambient as __ambient,
aws_batch as __aws_batch,
)
from .. import config, env
from .. import config, env, hostenv
from ..argparse import DirectoryPath, SKIP_AUTO_DEFAULT_IN_HELP
from ..errors import UserError
from ..types import Env, Options, RunnerModule
Expand Down Expand Up @@ -258,10 +258,11 @@ def run(opts: Options, working_volume: NamedVolume = None, extra_env: Env = {},
if opts.__runner__ is singularity and opts.image is docker.DEFAULT_IMAGE: # type: ignore
opts.image = singularity.DEFAULT_IMAGE # type: ignore

# Add values from --envdir and --env to extra_env without overriding values
# explicitly set by our commands' own internals (i.e. the callers of this
# function).
# Add env from automatically forwarded vars, from --envdir, and from --env
# without overriding values explicitly set by our commands' own internals
# (i.e. the callers of this function).
extra_env = {
**dict(hostenv.forwarded_values()),
**dict(env.from_dirs(opts.envdir)),
**dict(env.from_vars(opts.env)),
**extra_env,
Expand Down
33 changes: 2 additions & 31 deletions nextstrain/cli/runner/aws_batch/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from copy import deepcopy
from operator import itemgetter
from time import time
from typing import Callable, Generator, Iterable, Mapping, List, Optional
from ... import hostenv, aws
from typing import Callable, Generator, Iterable, Mapping, Optional
from ... import aws
from ...errors import UserError
from ...util import split_image_name
from . import logs, s3
Expand Down Expand Up @@ -173,7 +173,6 @@ def submit(name: str,
"name": "NEXTSTRAIN_AWS_BATCH_WORKDIR_URL",
"value": s3.object_url(workdir),
},
*forwarded_environment(),
*[{"name": name, "value": value} for name, value in env.items()]
],
"resourceRequirements": [
Expand Down Expand Up @@ -203,34 +202,6 @@ def lookup(job_id: str) -> JobState:
return job


def forwarded_environment() -> List[dict]:
"""
Return a list of Batch job environment entries for the ambient local host
environment we want to forward along.
"""

# XXX TODO: This isn't great from a security perspective as it makes the
# secrets visible in the AWS Batch job descriptions and possibly even the
# underlying Docker invocations in the process list on our
# dynamically-provisioned EC2 instances. It probably ends up in logs
# somewhere too. Naturally, Amazon recommends against it:
#
# https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties
#
# A better approach to implement in the future would be writing an env dir,
# shipping it over S3 like the rest of the job context, and adding envdir
# into the entrypoint exec-chain.
#
# I'm punting on that in the immediate-term as the risk/threat seems low,
# especially as our AWS Batch queue will only be used by ourselves. (Other
# people will have to setup their own Batch queue.)
# -trs, 17 Sept 2018
return [
{ "name": name, "value": value }
for name, value in hostenv.forwarded_values()
]


def override_definition(base_definition_name: str, image: str) -> str:
"""
Find or create a job definition based on *base_definition_name* but which
Expand Down
7 changes: 2 additions & 5 deletions nextstrain/cli/runner/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tempfile import TemporaryDirectory
from textwrap import dedent
from typing import Iterable, List
from .. import hostenv, config, env
from .. import config, env
from ..errors import UserError
from ..types import Env, RunnerSetupStatus, RunnerTestResults, RunnerTestResultStatus, RunnerUpdateStatus
from ..util import warn, colored, capture_output, exec_or_return, split_image_name
Expand Down Expand Up @@ -147,10 +147,7 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None
# Change the default working directory if requested
*(["--workdir=/nextstrain/%s" % working_volume.name] if working_volume else []),

# Pass through certain environment variables
*["--env=%s" % name for name in hostenv.forwarded_names],

# Plus any extra environment variables provided by us (not via an env.d)
# Pass thru any extra environment variables provided by us (not via an env.d)
*["--env=%s" % name for name, value in extra_env.items() if value is not None],

# Set resource limits if any
Expand Down
8 changes: 1 addition & 7 deletions nextstrain/cli/runner/singularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pathlib import Path
from typing import Iterable, List, Optional
from urllib.parse import urlsplit
from .. import config, hostenv
from .. import config
from ..errors import UserError
from ..paths import RUNTIMES
from ..types import Env, RunnerSetupStatus, RunnerTestResults, RunnerUpdateStatus
Expand Down Expand Up @@ -177,12 +177,6 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None
# prefixing with SINGULARITYENV_….¹
#
# ¹ <https://docs.sylabs.io/guides/3.0/user-guide/environment_and_metadata.html#environment>
#
# Pass through certain environment variables
**{f"SINGULARITYENV_{k}": v
for k, v in hostenv.forwarded_values() },

# Plus any extra environment variables provided by us
**{f"SINGULARITYENV_{k}": v
for k, v in extra_env.items()
if v is not None },
Expand Down

0 comments on commit 38a5ddf

Please sign in to comment.