Skip to content

Commit

Permalink
Avoid bind-mounts for docker environments on macOS (pantsbuild#18225)
Browse files Browse the repository at this point in the history
Bind mounts in Docker for macOS have been implemented in a variety of
ways over time, and are still in flux
(https://www.cncf.io/blog/2023/02/02/docker-on-macos-is-slow-and-how-to-fix-it/
is a good overview of the chaos). But the `gRPC FUSE` implementation
which is the default in the most recently released version at the time
of writing (`4.16.2`) can suffer from race conditions where files which
are created on the host inside a bind mount may not be visible to the
container. This causes issues like pantsbuild#18162.

To avoid race conditions for file inputs, this change introduces a
"pipe" IO strategy for Docker inputs, which uses a tar-pipe (from a
tar-file stream written by the `Store`, to an `exec` of `tar` inside the
container) to write process inputs. This strategy is used by default on
macOS (for now), but the choice of strategy can be overridden.

The `pipe` IO strategy is about 30% slower for test running than the
`mount` strategy. As we gather feedback from macOS users, we should be
able to gain a clearer picture of which Docker for macOS versions and
filesystem implementations can safely use the `mount` strategy.

Fixes pantsbuild#18162.
  • Loading branch information
stuhood authored Feb 14, 2023
1 parent f862de1 commit c478a13
Show file tree
Hide file tree
Showing 23 changed files with 650 additions and 219 deletions.
1 change: 1 addition & 0 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def __init__(
local_parallelism=execution_options.process_execution_local_parallelism,
local_enable_nailgun=execution_options.process_execution_local_enable_nailgun,
remote_parallelism=execution_options.process_execution_remote_parallelism,
docker_strategy=execution_options.docker_strategy.value,
child_max_memory=execution_options.process_total_child_memory_usage or 0,
child_default_memory=execution_options.process_per_child_memory_usage,
graceful_shutdown_timeout=execution_options.process_execution_graceful_shutdown_timeout,
Expand Down
10 changes: 8 additions & 2 deletions src/python/pants/engine/internals/scheduler_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,17 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import os
from dataclasses import replace
from pathlib import Path

from pants.engine.internals.native_engine import PyExecutor
from pants.engine.internals.scheduler import Scheduler, SchedulerSession
from pants.engine.unions import UnionMembership
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS, DEFAULT_LOCAL_STORE_OPTIONS
from pants.option.global_options import (
DEFAULT_EXECUTION_OPTIONS,
DEFAULT_LOCAL_STORE_OPTIONS,
DockerStrategy,
)
from pants.util.contextutil import temporary_file_path
from pants.util.dirutil import safe_mkdtemp
from pants.util.logging import LogLevel
Expand Down Expand Up @@ -35,6 +40,7 @@ def mk_scheduler(

local_execution_root_dir = os.path.realpath(safe_mkdtemp())
named_caches_dir = os.path.realpath(safe_mkdtemp())
execution_options = replace(DEFAULT_EXECUTION_OPTIONS, docker_strategy=DockerStrategy.mount)
scheduler = Scheduler(
ignore_patterns=[],
use_gitignore=False,
Expand All @@ -45,7 +51,7 @@ def mk_scheduler(
rules=rules,
union_membership=UnionMembership({}),
executor=self._executor,
execution_options=DEFAULT_EXECUTION_OPTIONS,
execution_options=execution_options,
local_store_options=DEFAULT_LOCAL_STORE_OPTIONS,
include_trace_on_error=include_trace_on_error,
)
Expand Down
11 changes: 8 additions & 3 deletions src/python/pants/engine/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import re
from dataclasses import dataclass
from dataclasses import dataclass, replace
from enum import Enum
from pathlib import Path
from textwrap import dedent
Expand All @@ -28,14 +28,19 @@
rule_helper,
)
from pants.engine.unions import UnionMembership
from pants.option.global_options import DEFAULT_EXECUTION_OPTIONS, DEFAULT_LOCAL_STORE_OPTIONS
from pants.option.global_options import (
DEFAULT_EXECUTION_OPTIONS,
DEFAULT_LOCAL_STORE_OPTIONS,
DockerStrategy,
)
from pants.testutil.rule_runner import MockGet, run_rule_with_mocks
from pants.util.enums import match
from pants.util.logging import LogLevel


def create_scheduler(rules, validate=True):
"""Create a Scheduler."""
execution_options = replace(DEFAULT_EXECUTION_OPTIONS, docker_strategy=DockerStrategy.mount)
return Scheduler(
ignore_patterns=[],
use_gitignore=False,
Expand All @@ -46,7 +51,7 @@ def create_scheduler(rules, validate=True):
rules=rules,
union_membership=UnionMembership({}),
executor=PyExecutor(core_threads=2, max_threads=4),
execution_options=DEFAULT_EXECUTION_OPTIONS,
execution_options=execution_options,
local_store_options=DEFAULT_LOCAL_STORE_OPTIONS,
validate_reachability=validate,
)
Expand Down
3 changes: 0 additions & 3 deletions src/python/pants/init/engine_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from pants.init import specs_calculator
from pants.init.bootstrap_scheduler import BootstrapStatus
from pants.option.global_options import (
DEFAULT_EXECUTION_OPTIONS,
DynamicRemoteOptions,
ExecutionOptions,
GlobalOptions,
Expand Down Expand Up @@ -237,8 +236,6 @@ def setup_graph_extended(
union_membership: UnionMembership
registered_target_types = RegisteredTargetTypes.create(build_configuration.target_types)

execution_options = execution_options or DEFAULT_EXECUTION_OPTIONS

@rule
def parser_singleton() -> Parser:
return Parser(
Expand Down
64 changes: 62 additions & 2 deletions src/python/pants/option/global_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pants.engine.env_vars import CompleteEnvironmentVars
from pants.engine.fs import FileContent
from pants.engine.internals.native_engine import PyExecutor
from pants.engine.platform import Platform
from pants.option.custom_types import memory_size
from pants.option.errors import OptionsError
from pants.option.option_types import (
Expand Down Expand Up @@ -130,6 +131,14 @@ class KeepSandboxes(Enum):
never = "never"


class DockerStrategy(Enum):
"""An enum for the global option `docker_strategy`."""

auto = "auto"
mount = "mount"
pipe = "pipe"


@enum.unique
class AuthPluginState(Enum):
OK = "ok"
Expand Down Expand Up @@ -491,6 +500,8 @@ class ExecutionOptions:
process_execution_graceful_shutdown_timeout: int
cache_content_behavior: CacheContentBehavior

docker_strategy: DockerStrategy

process_total_child_memory_usage: int | None
process_per_child_memory_usage: int

Expand Down Expand Up @@ -535,6 +546,7 @@ def from_options(
process_execution_cache_namespace=bootstrap_options.process_execution_cache_namespace,
process_execution_graceful_shutdown_timeout=bootstrap_options.process_execution_graceful_shutdown_timeout,
process_execution_local_enable_nailgun=bootstrap_options.process_execution_local_enable_nailgun,
docker_strategy=GlobalOptions.resolve_docker_strategy(bootstrap_options),
cache_content_behavior=bootstrap_options.cache_content_behavior,
process_total_child_memory_usage=bootstrap_options.process_total_child_memory_usage,
process_per_child_memory_usage=bootstrap_options.process_per_child_memory_usage,
Expand Down Expand Up @@ -620,6 +632,7 @@ def from_options(cls, options: OptionValueContainer) -> LocalStoreOptions:
keep_sandboxes=KeepSandboxes.never,
local_cache=True,
cache_content_behavior=CacheContentBehavior.fetch,
docker_strategy=DockerStrategy.auto,
process_execution_local_enable_nailgun=True,
process_execution_graceful_shutdown_timeout=3,
# Remote store setup.
Expand Down Expand Up @@ -1042,7 +1055,11 @@ class BootstrapOptions:
help=softwrap(
"""
The maximum number of threads to use to execute `@rule` logic. Defaults to
a small multiple of `--rule-threads-core`.
`16 * --rule-threads-core`.
Note that setting too low a `--rule-threads-max` value can lead to deadlocks: Pants
uses blocking operations internally for a few use cases, and if the pool of blocking
threads is exhausted, those use cases will wait.
"""
),
)
Expand Down Expand Up @@ -1185,6 +1202,24 @@ class BootstrapOptions:
"""
),
)
process_execution_docker_strategy = EnumOption(
default=DEFAULT_EXECUTION_OPTIONS.docker_strategy,
help=softwrap(
"""
The strategy used to provide inputs to Docker containers when the `docker_environment`
target is in use.
The `mount` strategy provides inputs via bind mounts. The `pipe` strategy provides
inputs by tar-pipe'ing them into the container.
The default value of `auto` will choose the fastest known-consistent strategy for the
platform that Pants is running on, which generally means using the `pipe` strategy when
Docker is implemented via virtualization (on macOS and Windows in particular). See
https://github.com/docker/roadmap/issues/7 for more information on macOS filesystem
virtualization status.
"""
),
)
cache_content_behavior = EnumOption(
advanced=True,
default=DEFAULT_EXECUTION_OPTIONS.cache_content_behavior,
Expand Down Expand Up @@ -1823,10 +1858,22 @@ def validate_remote_headers(opt_name: str) -> None:

@staticmethod
def create_py_executor(bootstrap_options: OptionValueContainer) -> PyExecutor:
# NB: See the `--rule-threads-max` option help for a warning on setting this too low.
#
# This value is chosen somewhat arbitrarily, but has a few concerns at play:
# * When set too low, tasks using `Executor::spawn_blocking` on the Rust side can deadlock
# when too many blocking operations already running.
# * Higher thread counts mean less bounded access to the LMDB store (which is the primary user
# of blocking tasks), which is good up to a point, but then begins to increase kernel time
# as many threads are blocked waiting for IO and locks.
#
# The value 16 was chosen to avoid deadlocks with all current `spawn_blocking` calls, and based
# on the observation that performance drops off by 2-3% points (on my machine!) when multiples
# of 32 and 64.
rule_threads_max = (
bootstrap_options.rule_threads_max
if bootstrap_options.rule_threads_max
else 4 * bootstrap_options.rule_threads_core
else 16 * bootstrap_options.rule_threads_core
)
return PyExecutor(
core_threads=bootstrap_options.rule_threads_core, max_threads=rule_threads_max
Expand All @@ -1853,6 +1900,19 @@ def resolve_keep_sandboxes(
else:
raise TypeError(f"Unexpected option value for `keep_sandboxes`: {resolved_value}")

@staticmethod
def resolve_docker_strategy(
bootstrap_options: OptionValueContainer,
) -> DockerStrategy:
strategy = cast(DockerStrategy, bootstrap_options.process_execution_docker_strategy)
if strategy == DockerStrategy.auto:
return (
DockerStrategy.pipe
if Platform.create_for_localhost().is_macos
else DockerStrategy.mount
)
return strategy

@staticmethod
def compute_pants_ignore(buildroot, global_options):
"""Computes the merged value of the `--pants-ignore` flag.
Expand Down
15 changes: 14 additions & 1 deletion src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/rust/engine/fs/store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ prost-types = "0.9"
serde = "1.0"
serde_derive = "1.0"
sharded_lmdb = { path = "../../sharded_lmdb" }
tar = { version = "0.4", default-features = false }
task_executor = { path = "../../task_executor" }
tempfile = "3"
tokio-rustls = "0.23"
Expand Down
Loading

0 comments on commit c478a13

Please sign in to comment.