Skip to content

Commit

Permalink
Run python sources with a VenvPex (pantsbuild#17700)
Browse files Browse the repository at this point in the history
This change makes it so we use a `VenvPex` to run Python sources, which is a speed boost (I measure a gain of about ~500ms, which is also quoted in `pex.py`).

In order to make this work (specifically ensuring we don't revert the fix for pantsbuild#12055) we now have to weave the complete pex environment through to `VenvPexRequest`.
  • Loading branch information
thejcannon committed Dec 9, 2022
1 parent 54712e1 commit 802ca32
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 48 deletions.
14 changes: 9 additions & 5 deletions src/python/pants/backend/codegen/protobuf/python/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,17 @@ async def generate_python_from_protobuf(
protoc_gen_mypy_script = "protoc-gen-mypy"
protoc_gen_mypy_grpc_script = "protoc-gen-mypy_grpc"
mypy_pex = None
complete_pex_env = pex_environment.in_sandbox(working_directory=None)

if python_protobuf_subsystem.mypy_plugin:
mypy_request = python_protobuf_mypy_plugin.to_pex_request()
mypy_pex = await Get(
VenvPex,
VenvPexRequest(bin_names=[protoc_gen_mypy_script], pex_request=mypy_request),
VenvPexRequest(
pex_request=mypy_request,
complete_pex_env=complete_pex_env,
bin_names=[protoc_gen_mypy_script],
),
)

if request.protocol_target.get(ProtobufGrpcToggleField).value:
Expand All @@ -117,8 +122,9 @@ async def generate_python_from_protobuf(
mypy_pex = await Get(
VenvPex,
VenvPexRequest(
bin_names=[protoc_gen_mypy_script, protoc_gen_mypy_grpc_script],
pex_request=mypy_request,
complete_pex_env=complete_pex_env,
bin_names=[protoc_gen_mypy_script, protoc_gen_mypy_grpc_script],
),
)

Expand Down Expand Up @@ -178,9 +184,7 @@ async def generate_python_from_protobuf(
description=f"Generating Python sources from {request.protocol_target.address}.",
level=LogLevel.DEBUG,
output_directories=(output_dir,),
append_only_caches=pex_environment.in_sandbox(
working_directory=None
).append_only_caches,
append_only_caches=complete_pex_env.append_only_caches,
),
)

Expand Down
69 changes: 34 additions & 35 deletions src/python/pants/backend/python/goals/run_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations

import dataclasses
import os
import textwrap
from typing import Iterable, Optional
from typing import Optional

from pants.backend.python.subsystems.debugpy import DebugPy
from pants.backend.python.target_types import (
Expand All @@ -15,7 +16,7 @@
)
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.local_dists import LocalDistsPex, LocalDistsPexRequest
from pants.backend.python.util_rules.pex import Pex, PexRequest
from pants.backend.python.util_rules.pex import Pex, PexRequest, VenvPex, VenvPexRequest
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_from_targets import (
InterpreterConstraintsRequest,
Expand Down Expand Up @@ -45,46 +46,44 @@ async def _create_python_source_run_request(
pex_env: PexEnvironment,
run_in_sandbox: bool,
console_script: Optional[ConsoleScript] = None,
additional_pex_args: Iterable[str] = (),
) -> RunRequest:
addresses = [address]
entry_point, transitive_targets = await MultiGet(
interpreter_constraints, entry_point, transitive_targets = await MultiGet(
Get(InterpreterConstraints, InterpreterConstraintsRequest(addresses)),
Get(
ResolvedPexEntryPoint,
ResolvePexEntryPointRequest(entry_point_field),
),
Get(TransitiveTargets, TransitiveTargetsRequest(addresses)),
)

interpreter_constraints = await Get(
InterpreterConstraints, InterpreterConstraintsRequest(addresses)
)

pex_filename = (
address.generated_name.replace(".", "_") if address.generated_name else address.target_name
)
pex_get = Get(
Pex,
PexFromTargetsRequest(
addresses,
output_filename=f"{pex_filename}.pex",
internal_only=True,
include_source_files=False,
# `PEX_EXTRA_SYS_PATH` should contain this entry_point's module.
main=console_script or entry_point.val,
additional_args=(
*additional_pex_args,
# N.B.: Since we cobble together the runtime environment via PEX_EXTRA_SYS_PATH
# below, it's important for any app that re-executes itself that these environment
# variables are not stripped.
"--no-strip-pex-env",

pex_request, sources = await MultiGet(
Get(
PexRequest,
PexFromTargetsRequest(
addresses,
output_filename=f"{pex_filename}.pex",
internal_only=True,
include_source_files=False,
# `PEX_EXTRA_SYS_PATH` should contain this entry_point's module.
main=console_script or entry_point.val,
additional_args=(
# N.B.: Since we cobble together the runtime environment via PEX_EXTRA_SYS_PATH
# below, it's important for any app that re-executes itself that these environment
# variables are not stripped.
"--no-strip-pex-env",
),
),
),
Get(
PythonSourceFiles,
PythonSourceFilesRequest(transitive_targets.closure, include_files=True),
),
)
sources_get = Get(
PythonSourceFiles, PythonSourceFilesRequest(transitive_targets.closure, include_files=True)
)
pex, sources = await MultiGet(pex_get, sources_get)

local_dists = await Get(
LocalDistsPex,
Expand All @@ -95,10 +94,14 @@ async def _create_python_source_run_request(
sources=sources,
),
)
pex_request = dataclasses.replace(
pex_request, pex_path=(*pex_request.pex_path, local_dists.pex)
)

complete_pex_environment = pex_env.in_workspace()
venv_pex = await Get(VenvPex, VenvPexRequest(pex_request, complete_pex_environment))
input_digests = [
pex.digest,
local_dists.pex.digest,
venv_pex.digest,
# Note regarding not-in-sandbox mode: You might think that the sources don't need to be copied
# into the chroot when using inline sources. But they do, because some of them might be
# codegenned, and those won't exist in the inline source tree. Rather than incurring the
Expand All @@ -109,9 +112,6 @@ async def _create_python_source_run_request(
]
merged_digest = await Get(Digest, MergeDigests(input_digests))

complete_pex_env = pex_env.in_workspace()
args = complete_pex_env.create_argv(_in_chroot(pex.name), python=pex.python)

chrooted_source_roots = [_in_chroot(sr) for sr in sources.source_roots]
# The order here is important: we want the in-repo sources to take precedence over their
# copies in the sandbox (see above for why those copies exist even in non-sandboxed mode).
Expand All @@ -120,14 +120,13 @@ async def _create_python_source_run_request(
*chrooted_source_roots,
]
extra_env = {
**pex_env.in_workspace().environment_dict(python_configured=pex.python is not None),
"PEX_PATH": _in_chroot(local_dists.pex.name),
**complete_pex_environment.environment_dict(python_configured=venv_pex.python is not None),
"PEX_EXTRA_SYS_PATH": os.pathsep.join(source_roots),
}

return RunRequest(
digest=merged_digest,
args=args,
args=[_in_chroot(venv_pex.pex.argv0)],
extra_env=extra_env,
)

Expand Down
2 changes: 0 additions & 2 deletions src/python/pants/backend/python/goals/run_python_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ async def create_python_source_run_request(
entry_point_field=PexEntryPointField(field_set.source.value, field_set.address),
pex_env=pex_env,
run_in_sandbox=run_goal_use_sandbox,
# Setting --venv is kosher because the PEX we create is just for the thirdparty deps.
additional_pex_args=["--venv"],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,4 +222,34 @@ def test_local_dist() -> None:
f"{tmpdir}/foo/main.py",
]
result = run_pants(args)
assert result.stdout == "LOCAL DIST\n"
assert result.stdout == "LOCAL DIST\n", result.stderr


def test_runs_in_venv() -> None:
# NB: We aren't just testing an implementation detail, users can and should expect their code to
# be run just as if they ran their code in a virtualenv (as is common in the Python ecosystem).
sources = {
"src/app.py": dedent(
"""\
import os
import sys
if __name__ == "__main__":
sys.exit(0 if "VIRTUAL_ENV" in os.environ else 1)
"""
),
"src/BUILD": dedent(
"""\
python_sources(name="lib")
"""
),
}
with setup_tmpdir(sources) as tmpdir:
args = [
"--backend-packages=pants.backend.python",
f"--source-root-patterns=['/{tmpdir}/src']",
"run",
f"{tmpdir}/src/app.py",
]
result = run_pants(args)
assert result.exit_code == 0
3 changes: 3 additions & 0 deletions src/python/pants/backend/python/lint/pylint/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
VenvPexProcess,
VenvPexRequest,
)
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
from pants.backend.python.util_rules.python_sources import (
PythonSourceFiles,
Expand Down Expand Up @@ -118,6 +119,7 @@ async def run_pylint(
request: PylintRequest.Batch[PylintFieldSet, PartitionMetadata],
pylint: Pylint,
first_party_plugins: PylintFirstPartyPlugins,
pex_environment: PexEnvironment,
) -> LintResult:
assert request.partition_metadata is not None

Expand Down Expand Up @@ -181,6 +183,7 @@ async def run_pylint(
internal_only=True,
pex_path=[pylint_pex, requirements_pex],
),
pex_environment.in_sandbox(working_directory=None),
# Astroid < 2.9.1 had a regression that prevented the use of symlinks:
# https://github.com/PyCQA/pylint/issues/1470
site_packages_copies=(astroid_info.version < packaging.version.Version("2.9.1")),
Expand Down
17 changes: 12 additions & 5 deletions src/python/pants/backend/python/util_rules/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,13 +667,12 @@ class VenvScriptWriter:

@classmethod
def create(
cls, pex_environment: PexEnvironment, pex: Pex, venv_rel_dir: PurePath
cls, complete_pex_env: CompletePexEnvironment, pex: Pex, venv_rel_dir: PurePath
) -> VenvScriptWriter:
# N.B.: We don't know the working directory that will be used in any given
# invocation of the venv scripts; so we deal with working_directory once in an
# `adjust_relative_paths` function inside the script to save rule authors from having to do
# CWD offset math in every rule for all the relative paths their process depends on.
complete_pex_env = pex_environment.in_sandbox(working_directory=None)
venv_dir = complete_pex_env.pex_root / venv_rel_dir
return cls(complete_pex_env=complete_pex_env, pex=pex, venv_dir=venv_dir)

Expand Down Expand Up @@ -795,33 +794,39 @@ class VenvPex:
@dataclass(unsafe_hash=True)
class VenvPexRequest:
pex_request: PexRequest
complete_pex_env: CompletePexEnvironment
bin_names: tuple[str, ...] = ()
site_packages_copies: bool = False

def __init__(
self,
pex_request: PexRequest,
complete_pex_env: CompletePexEnvironment,
bin_names: Iterable[str] = (),
site_packages_copies: bool = False,
) -> None:
"""A request for a PEX that runs in a venv and optionally exposes select venv `bin` scripts.
:param pex_request: The details of the desired PEX.
:param complete_pex_env: The complete PEX environment the pex will be run in.
:param bin_names: The names of venv `bin` scripts to expose for execution.
:param site_packages_copies: `True` to use copies (hardlinks when possible) of PEX
dependencies when installing them in the venv site-packages directory. By default this
is `False` and symlinks are used instead which is a win in the time and space dimensions
but results in a non-standard venv structure that does trip up some libraries.
"""
self.pex_request = pex_request
self.complete_pex_env = complete_pex_env
self.bin_names = tuple(bin_names)
self.site_packages_copies = site_packages_copies


@rule
def wrap_venv_prex_request(pex_request: PexRequest) -> VenvPexRequest:
def wrap_venv_prex_request(
pex_request: PexRequest, pex_environment: PexEnvironment
) -> VenvPexRequest:
# Allow creating a VenvPex from a plain PexRequest when no extra bin scripts need to be exposed.
return VenvPexRequest(pex_request)
return VenvPexRequest(pex_request, pex_environment.in_sandbox(working_directory=None))


@rule
Expand Down Expand Up @@ -877,7 +882,9 @@ async def create_venv_pex(
venv_rel_dir = abs_pex_path.relative_to(abs_pex_root).parent

venv_script_writer = VenvScriptWriter.create(
pex_environment=pex_environment, pex=venv_pex_result.create_pex(), venv_rel_dir=venv_rel_dir
complete_pex_env=request.complete_pex_env,
pex=venv_pex_result.create_pex(),
venv_rel_dir=venv_rel_dir,
)
pex = venv_script_writer.exe(bash)
python = venv_script_writer.python(bash)
Expand Down

0 comments on commit 802ca32

Please sign in to comment.