Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run python sources with a VenvPex #17700

Merged
merged 3 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions src/python/pants/backend/codegen/protobuf/python/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,17 @@ async def generate_python_from_protobuf(
protoc_gen_mypy_script = "protoc-gen-mypy"
protoc_gen_mypy_grpc_script = "protoc-gen-mypy_grpc"
mypy_pex = None
complete_pex_env = pex_environment.in_sandbox(working_directory=None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A drive-by fix?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, below (as you may have seen) we now need to pass complete_pex_env into VenvPexRequest. We could default it, but I like the explicitness of forcing callers to choose.


if python_protobuf_subsystem.mypy_plugin:
mypy_request = python_protobuf_mypy_plugin.to_pex_request()
mypy_pex = await Get(
VenvPex,
VenvPexRequest(bin_names=[protoc_gen_mypy_script], pex_request=mypy_request),
VenvPexRequest(
pex_request=mypy_request,
complete_pex_env=complete_pex_env,
bin_names=[protoc_gen_mypy_script],
),
)

if request.protocol_target.get(ProtobufGrpcToggleField).value:
Expand All @@ -117,8 +122,9 @@ async def generate_python_from_protobuf(
mypy_pex = await Get(
VenvPex,
VenvPexRequest(
bin_names=[protoc_gen_mypy_script, protoc_gen_mypy_grpc_script],
pex_request=mypy_request,
complete_pex_env=complete_pex_env,
bin_names=[protoc_gen_mypy_script, protoc_gen_mypy_grpc_script],
),
)

Expand Down Expand Up @@ -178,9 +184,7 @@ async def generate_python_from_protobuf(
description=f"Generating Python sources from {request.protocol_target.address}.",
level=LogLevel.DEBUG,
output_directories=(output_dir,),
append_only_caches=pex_environment.in_sandbox(
working_directory=None
).append_only_caches,
append_only_caches=complete_pex_env.append_only_caches,
),
)

Expand Down
69 changes: 34 additions & 35 deletions src/python/pants/backend/python/goals/run_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations

import dataclasses
import os
import textwrap
from typing import Iterable, Optional
from typing import Optional

from pants.backend.python.subsystems.debugpy import DebugPy
from pants.backend.python.target_types import (
Expand All @@ -15,7 +16,7 @@
)
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.local_dists import LocalDistsPex, LocalDistsPexRequest
from pants.backend.python.util_rules.pex import Pex, PexRequest
from pants.backend.python.util_rules.pex import Pex, PexRequest, VenvPex, VenvPexRequest
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_from_targets import (
InterpreterConstraintsRequest,
Expand Down Expand Up @@ -45,46 +46,44 @@ async def _create_python_source_run_request(
pex_env: PexEnvironment,
run_in_sandbox: bool,
console_script: Optional[ConsoleScript] = None,
additional_pex_args: Iterable[str] = (),
) -> RunRequest:
addresses = [address]
entry_point, transitive_targets = await MultiGet(
interpreter_constraints, entry_point, transitive_targets = await MultiGet(
Get(InterpreterConstraints, InterpreterConstraintsRequest(addresses)),
Get(
ResolvedPexEntryPoint,
ResolvePexEntryPointRequest(entry_point_field),
),
Get(TransitiveTargets, TransitiveTargetsRequest(addresses)),
)

interpreter_constraints = await Get(
InterpreterConstraints, InterpreterConstraintsRequest(addresses)
)

pex_filename = (
address.generated_name.replace(".", "_") if address.generated_name else address.target_name
)
pex_get = Get(
Pex,
PexFromTargetsRequest(
addresses,
output_filename=f"{pex_filename}.pex",
internal_only=True,
include_source_files=False,
# `PEX_EXTRA_SYS_PATH` should contain this entry_point's module.
main=console_script or entry_point.val,
additional_args=(
*additional_pex_args,
# N.B.: Since we cobble together the runtime environment via PEX_EXTRA_SYS_PATH
# below, it's important for any app that re-executes itself that these environment
# variables are not stripped.
"--no-strip-pex-env",

pex_request, sources = await MultiGet(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the example I was thinking of. AFAICT this diff is entirely due to inlining the two Gets into the MultiGet?

Copy link
Member Author

@thejcannon thejcannon Dec 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, sorry that'd a byproduct of several refactors where it was part of the earlier MultiGet (on line 51) which has them inlined.

I wish we had better lint tools and we could choose a convention 😮‍💨

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a big deal at all, just something to think about in the future. I often do little cleanups like this in PRs and then realize it's contributing to the diff in ways that aren't directly pertinent to the change at hand...

Get(
PexRequest,
PexFromTargetsRequest(
addresses,
output_filename=f"{pex_filename}.pex",
internal_only=True,
include_source_files=False,
# `PEX_EXTRA_SYS_PATH` should contain this entry_point's module.
main=console_script or entry_point.val,
additional_args=(
# N.B.: Since we cobble together the runtime environment via PEX_EXTRA_SYS_PATH
# below, it's important for any app that re-executes itself that these environment
# variables are not stripped.
"--no-strip-pex-env",
),
),
),
Get(
PythonSourceFiles,
PythonSourceFilesRequest(transitive_targets.closure, include_files=True),
),
)
sources_get = Get(
PythonSourceFiles, PythonSourceFilesRequest(transitive_targets.closure, include_files=True)
)
pex, sources = await MultiGet(pex_get, sources_get)

local_dists = await Get(
LocalDistsPex,
Expand All @@ -95,10 +94,14 @@ async def _create_python_source_run_request(
sources=sources,
),
)
pex_request = dataclasses.replace(
pex_request, pex_path=(*pex_request.pex_path, local_dists.pex)
)

complete_pex_environment = pex_env.in_workspace()
venv_pex = await Get(VenvPex, VenvPexRequest(pex_request, complete_pex_environment))
input_digests = [
pex.digest,
local_dists.pex.digest,
venv_pex.digest,
# Note regarding not-in-sandbox mode: You might think that the sources don't need to be copied
# into the chroot when using inline sources. But they do, because some of them might be
# codegenned, and those won't exist in the inline source tree. Rather than incurring the
Expand All @@ -109,9 +112,6 @@ async def _create_python_source_run_request(
]
merged_digest = await Get(Digest, MergeDigests(input_digests))

complete_pex_env = pex_env.in_workspace()
args = complete_pex_env.create_argv(_in_chroot(pex.name), python=pex.python)

chrooted_source_roots = [_in_chroot(sr) for sr in sources.source_roots]
# The order here is important: we want the in-repo sources to take precedence over their
# copies in the sandbox (see above for why those copies exist even in non-sandboxed mode).
Expand All @@ -120,14 +120,13 @@ async def _create_python_source_run_request(
*chrooted_source_roots,
]
extra_env = {
**pex_env.in_workspace().environment_dict(python_configured=pex.python is not None),
"PEX_PATH": _in_chroot(local_dists.pex.name),
**complete_pex_environment.environment_dict(python_configured=venv_pex.python is not None),
"PEX_EXTRA_SYS_PATH": os.pathsep.join(source_roots),
}

return RunRequest(
digest=merged_digest,
args=args,
args=[_in_chroot(venv_pex.pex.argv0)],
extra_env=extra_env,
)

Expand Down
2 changes: 0 additions & 2 deletions src/python/pants/backend/python/goals/run_python_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ async def create_python_source_run_request(
entry_point_field=PexEntryPointField(field_set.source.value, field_set.address),
pex_env=pex_env,
run_in_sandbox=run_goal_use_sandbox,
# Setting --venv is kosher because the PEX we create is just for the thirdparty deps.
additional_pex_args=["--venv"],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,4 +222,34 @@ def test_local_dist() -> None:
f"{tmpdir}/foo/main.py",
]
result = run_pants(args)
assert result.stdout == "LOCAL DIST\n"
assert result.stdout == "LOCAL DIST\n", result.stderr


def test_runs_in_venv() -> None:
# NB: We aren't just testing an implementation detail, users can and should expect their code to
# be run just as if they ran their code in a virtualenv (as is common in the Python ecosystem).
sources = {
"src/app.py": dedent(
"""\
import os
import sys

if __name__ == "__main__":
sys.exit(0 if "VIRTUAL_ENV" in os.environ else 1)
"""
),
"src/BUILD": dedent(
"""\
python_sources(name="lib")
"""
),
}
with setup_tmpdir(sources) as tmpdir:
args = [
"--backend-packages=pants.backend.python",
f"--source-root-patterns=['/{tmpdir}/src']",
"run",
f"{tmpdir}/src/app.py",
]
result = run_pants(args)
assert result.exit_code == 0
3 changes: 3 additions & 0 deletions src/python/pants/backend/python/lint/pylint/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
VenvPexProcess,
VenvPexRequest,
)
from pants.backend.python.util_rules.pex_environment import PexEnvironment
from pants.backend.python.util_rules.pex_from_targets import RequirementsPexRequest
from pants.backend.python.util_rules.python_sources import (
PythonSourceFiles,
Expand Down Expand Up @@ -118,6 +119,7 @@ async def run_pylint(
request: PylintRequest.Batch[PylintFieldSet, PartitionMetadata],
pylint: Pylint,
first_party_plugins: PylintFirstPartyPlugins,
pex_environment: PexEnvironment,
) -> LintResult:
assert request.partition_metadata is not None

Expand Down Expand Up @@ -181,6 +183,7 @@ async def run_pylint(
internal_only=True,
pex_path=[pylint_pex, requirements_pex],
),
pex_environment.in_sandbox(working_directory=None),
# Astroid < 2.9.1 had a regression that prevented the use of symlinks:
# https://github.com/PyCQA/pylint/issues/1470
site_packages_copies=(astroid_info.version < packaging.version.Version("2.9.1")),
Expand Down
17 changes: 12 additions & 5 deletions src/python/pants/backend/python/util_rules/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,13 +667,12 @@ class VenvScriptWriter:

@classmethod
def create(
cls, pex_environment: PexEnvironment, pex: Pex, venv_rel_dir: PurePath
cls, complete_pex_env: CompletePexEnvironment, pex: Pex, venv_rel_dir: PurePath
) -> VenvScriptWriter:
# N.B.: We don't know the working directory that will be used in any given
# invocation of the venv scripts; so we deal with working_directory once in an
# `adjust_relative_paths` function inside the script to save rule authors from having to do
# CWD offset math in every rule for all the relative paths their process depends on.
complete_pex_env = pex_environment.in_sandbox(working_directory=None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you stripped the comment of its claim. The now CompletePexEnvironment may have a known working directory, which seems to suggest an assert complete_pex_env._working_directory is None, but that's obviously begging why let people pass this in at all?

Copy link
Member Author

@thejcannon thejcannon Dec 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I'm only partially grokking the comment, but it still feels true, albeit missing a qualifier.

Should it not start with "We may not know the ..."?

EDIT: No, if we may not know, then we don't know... so I think it's still OK

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh and "why let people pass this in at all?": Because in the case of run, working_directory very much is set to something: the build root!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, missed that obvious case given the PR subject. As to the comment, it seems like "in any given invocation" -> "in any given sandbox invocation" then makes it True again with the creation of the complete_pex_env now delegated upstream. Not worth another CI burn though.

venv_dir = complete_pex_env.pex_root / venv_rel_dir
return cls(complete_pex_env=complete_pex_env, pex=pex, venv_dir=venv_dir)

Expand Down Expand Up @@ -795,33 +794,39 @@ class VenvPex:
@dataclass(unsafe_hash=True)
class VenvPexRequest:
pex_request: PexRequest
complete_pex_env: CompletePexEnvironment
bin_names: tuple[str, ...] = ()
site_packages_copies: bool = False

def __init__(
self,
pex_request: PexRequest,
complete_pex_env: CompletePexEnvironment,
bin_names: Iterable[str] = (),
site_packages_copies: bool = False,
) -> None:
"""A request for a PEX that runs in a venv and optionally exposes select venv `bin` scripts.

:param pex_request: The details of the desired PEX.
:param complete_pex_env: The complete PEX environment the pex will be run in.
:param bin_names: The names of venv `bin` scripts to expose for execution.
:param site_packages_copies: `True` to use copies (hardlinks when possible) of PEX
dependencies when installing them in the venv site-packages directory. By default this
is `False` and symlinks are used instead which is a win in the time and space dimensions
but results in a non-standard venv structure that does trip up some libraries.
"""
self.pex_request = pex_request
self.complete_pex_env = complete_pex_env
self.bin_names = tuple(bin_names)
self.site_packages_copies = site_packages_copies


@rule
def wrap_venv_prex_request(pex_request: PexRequest) -> VenvPexRequest:
def wrap_venv_prex_request(
pex_request: PexRequest, pex_environment: PexEnvironment
) -> VenvPexRequest:
# Allow creating a VenvPex from a plain PexRequest when no extra bin scripts need to be exposed.
return VenvPexRequest(pex_request)
return VenvPexRequest(pex_request, pex_environment.in_sandbox(working_directory=None))


@rule
Expand Down Expand Up @@ -877,7 +882,9 @@ async def create_venv_pex(
venv_rel_dir = abs_pex_path.relative_to(abs_pex_root).parent

venv_script_writer = VenvScriptWriter.create(
pex_environment=pex_environment, pex=venv_pex_result.create_pex(), venv_rel_dir=venv_rel_dir
complete_pex_env=request.complete_pex_env,
pex=venv_pex_result.create_pex(),
venv_rel_dir=venv_rel_dir,
)
pex = venv_script_writer.exe(bash)
python = venv_script_writer.python(bash)
Expand Down