Skip to content

Commit

Permalink
Add path to cache key for Rust dep inference, for relative imports (c…
Browse files Browse the repository at this point in the history
…herry-pick of #19630) (#19640)

This adds the file path for the file being dependency-inferred to the
cache key for the Rust dep inference. Without this, the same file
contents appearing multiple times in different places will give the
wrong results for relative imports, because the dep inference process
mashes together the file path and the relative import.

The circumstances here seem most likely to occur in the real world if a
file is moved, with the inference results from before the move reused
for the file _after_ the move too.

I've tested this manually on the reproducer in
#19618 (comment),
in addition to comparing the before (fails) and after (passes) behaviour
of the new test.

To try to make this code more resilient to this sort of mistake, I've
removed the top level `PreparedInferenceRequest.path` key, in favour of
using the new `input_file_path` on the request protobuf struct, at the
cost of an additional `String` -> `PathBuf` conversion when doing the
inference.

Fixes #19618 

This is a cherry pick of #19630, but is essentially a slightly weird
rewrite that partially cherry-picks #19001 too, by copying over the
whole `DependencyInferenceRequest` protobuf type as is (even with some
fields that are unused) because that's the easiest way to generate
appropriate bytes for turning into the digest for use in the cache-key.
I think it's okay to have this "weird" behaviour in the 2.17 branch,
with the real/normal code in main/2.18?
  • Loading branch information
huonw authored Aug 22, 2023
1 parent c576de6 commit feeb60a
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 2 deletions.
44 changes: 44 additions & 0 deletions src/python/pants/backend/python/dependency_inference/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,50 @@ def test_infer_python_strict_multiple_resolves(imports_rule_runner: PythonRuleRu
)


def test_infer_python_identical_files_with_relative_imports_should_be_treated_differently(
imports_rule_runner: PythonRuleRunner,
) -> None:
# dependency inference shouldn't cache _just_ based on file contents, because this can break
# relative imports. When b reused a's results, b/__init__.py was incorrectly depending on
# a/file.py (https://github.com/pantsbuild/pants/issues/19618).
contents = "from . import file"
imports_rule_runner.write_files(
{
"a/BUILD": "python_sources()",
"a/__init__.py": contents,
"a/file.py": "",
"b/BUILD": "python_sources()",
"b/__init__.py": contents,
"b/file.py": "",
}
)

def get_deps(directory: str) -> InferredDependencies:
tgt = imports_rule_runner.get_target(
Address(directory, target_name=directory, relative_file_path="__init__.py")
)

return imports_rule_runner.request(
InferredDependencies,
[InferPythonImportDependencies(PythonImportDependenciesInferenceFieldSet.create(tgt))],
)

# first, seed the cache with the deps for "a"
assert get_deps("a") == InferredDependencies(
[
Address("a", target_name="a", relative_file_path="file.py"),
]
)

# then, run with "b", which _shouldn't_ reuse the cache from the previous run to give
# "a/file.py:a" (as it did previously, see #19618), and should instead give "b/file.py:b"
assert get_deps("b") == InferredDependencies(
[
Address("b", target_name="b", relative_file_path="file.py"),
]
)


class TestCategoriseImportsInfo:
address = Address("sample/path")
import_cases = {
Expand Down
21 changes: 21 additions & 0 deletions src/rust/engine/protos/protos/pants/cache.proto
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,27 @@ message CacheKey {
build.bazel.remote.execution.v2.Digest digest = 2;
}

message DependencyInferenceRequest {
build.bazel.remote.execution.v2.Digest input_file_digest = 1;
oneof metadata {
JavascriptInferenceMetadata js = 2;
}
// Ensure using this as a cache key reflects everything that might influence the output: inference
// implementation inside Pants, and the input's file location (if there's any relative imports)
string impl_hash = 3;
string input_file_path = 4;
}


message JavascriptInferenceMetadata {
message ImportPattern {
string pattern = 1;
repeated string replacements = 2;
}
string package_root = 1;
repeated ImportPattern import_patterns = 2;
}

// A URL and Digest tuple, which is itself digested and used as a CacheKey. ObservedURLs
// collectively represent the set of digests that we have ever observed for a particular URL:
// their cache value is always empty.
Expand Down
11 changes: 9 additions & 2 deletions src/rust/engine/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ use crate::tasks::Intrinsic;
use crate::types::Types;
use crate::Failure;
use dep_inference::python::get_dependencies;
use protos::gen::pants::cache::{CacheKey, CacheKeyType};
use grpc_util::prost::MessageExt;
use protos::gen::pants::cache::{CacheKey, CacheKeyType, DependencyInferenceRequest};

use bytes::Bytes;
use futures::future::{BoxFuture, FutureExt, TryFutureExt};
Expand Down Expand Up @@ -783,9 +784,15 @@ fn parse_python_deps(context: Context, args: Vec<Value>) -> BoxFuture<'static, N
Level::Debug,
desc = Some(format!("Determine Python dependencies for {path:?}")),
|_workunit| async move {
let request = DependencyInferenceRequest {
input_file_digest: Some(digest.into()),
metadata: None,
impl_hash: "fixed value for 2.17".into(),
input_file_path: path.to_str().expect("must have UTF-8 path").into(),
};
let cache_key = CacheKey {
key_type: CacheKeyType::DepInferenceRequest.into(),
digest: Some(digest.into()),
digest: Some(Digest::of_bytes(&request.to_bytes()).into()),
};
let cached_result = core.local_cache.load(&cache_key).await?;

Expand Down

0 comments on commit feeb60a

Please sign in to comment.