Skip to content

Commit

Permalink
Add path to cache key for Rust dep inference, for relative imports (#…
Browse files Browse the repository at this point in the history
…19630)

This adds the file path for the file being dependency-inferred to the
cache key for the Rust dep inference. Without this, the same file
contents appearing multiple times in different places will give the
wrong results for relative imports, because the dep inference process
mashes together the file path and the relative import.

The circumstances here seem most likely to occur in the real world if a
file is moved, with the inference results from before the move reused
for the file _after_ the move too.

I've tested this manually on the reproducer in
#19618 (comment),
in addition to comparing the before (fails) and after (passes) behaviour
of the new test.

To try to make this code more resilient to this sort of mistake, I've
removed the top level `PreparedInferenceRequest.path` key, in favour of
using the new `input_file_path` on the request protobuf struct, at the
cost of an additional `String` -> `PathBuf` conversion when doing the
inference.

Fixes #19618
  • Loading branch information
huonw authored and WorkerPants committed Aug 29, 2023
1 parent f1ec650 commit f5cb420
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 7 deletions.
44 changes: 44 additions & 0 deletions src/python/pants/backend/python/dependency_inference/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,50 @@ def test_infer_python_strict_multiple_resolves(imports_rule_runner: PythonRuleRu
)


def test_infer_python_identical_files_with_relative_imports_should_be_treated_differently(
imports_rule_runner: PythonRuleRunner,
) -> None:
# dependency inference shouldn't cache _just_ based on file contents, because this can break
# relative imports. When b reused a's results, b/__init__.py was incorrectly depending on
# a/file.py (https://github.com/pantsbuild/pants/issues/19618).
contents = "from . import file"
imports_rule_runner.write_files(
{
"a/BUILD": "python_sources()",
"a/__init__.py": contents,
"a/file.py": "",
"b/BUILD": "python_sources()",
"b/__init__.py": contents,
"b/file.py": "",
}
)

def get_deps(directory: str) -> InferredDependencies:
tgt = imports_rule_runner.get_target(
Address(directory, target_name=directory, relative_file_path="__init__.py")
)

return imports_rule_runner.request(
InferredDependencies,
[InferPythonImportDependencies(PythonImportDependenciesInferenceFieldSet.create(tgt))],
)

# first, seed the cache with the deps for "a"
assert get_deps("a") == InferredDependencies(
[
Address("a", target_name="a", relative_file_path="file.py"),
]
)

# then, run with "b", which _shouldn't_ reuse the cache from the previous run to give
# "a/file.py:a" (as it did previously, see #19618), and should instead give "b/file.py:b"
assert get_deps("b") == InferredDependencies(
[
Address("b", target_name="b", relative_file_path="file.py"),
]
)


class TestCategoriseImportsInfo:
address = Address("sample/path")
import_cases = {
Expand Down
3 changes: 3 additions & 0 deletions src/rust/engine/protos/protos/pants/cache.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ message DependencyInferenceRequest {
oneof metadata {
JavascriptInferenceMetadata js = 2;
}
// Ensure using this as a cache key reflects everything that might influence the output: inference
// implementation inside Pants, and the input's file location (if there's any relative imports)
string impl_hash = 3;
string input_file_path = 4;
}


Expand Down
20 changes: 13 additions & 7 deletions src/rust/engine/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -765,8 +765,11 @@ fn docker_resolve_image(
}

struct PreparedInferenceRequest {
pub path: PathBuf,
pub digest: Digest,
digest: Digest,
/// The request that's guaranteed to have been constructed via ::prepare().
///
/// NB. this `inner` value is used as the cache key, so anything that can influence the dep
/// inference should (also) be inside it, not just a key on the outer struct
inner: DependencyInferenceRequest,
}

Expand All @@ -783,11 +786,12 @@ impl PreparedInferenceRequest {
} = Python::with_gil(|py| (*args[0]).as_ref(py).extract())?;

let (path, digest) = Self::find_one_file(directory_digest, store, backend).await?;
let str_path = path.display().to_string();

Ok(Self {
path,
digest,
inner: DependencyInferenceRequest {
input_file_path: str_path,
input_file_digest: Some(digest.into()),
metadata,
impl_hash: impl_hash.to_string(),
Expand Down Expand Up @@ -851,14 +855,16 @@ fn parse_python_deps(context: Context, args: Vec<Value>) -> BoxFuture<'static, N
Level::Debug,
desc = Some(format!(
"Determine Python dependencies for {:?}",
&prepared_inference_request.path
&prepared_inference_request.inner.input_file_path
)),
|_workunit| async move {
let result: ParsedPythonDependencies = get_or_create_inferred_dependencies(
core,
&store,
prepared_inference_request,
|content, request| python::get_dependencies(content, request.path),
|content, request| {
python::get_dependencies(content, request.inner.input_file_path.into())
},
)
.await?;

Expand Down Expand Up @@ -896,7 +902,7 @@ fn parse_javascript_deps(
Level::Debug,
desc = Some(format!(
"Determine Javascript dependencies for {:?}",
prepared_inference_request.path
prepared_inference_request.inner.input_file_path
)),
|_workunit| async move {
let result: ParsedJavascriptDependencies = get_or_create_inferred_dependencies(
Expand All @@ -907,7 +913,7 @@ fn parse_javascript_deps(
if let Some(dependency_inference_request::Metadata::Js(metadata)) =
request.inner.metadata
{
javascript::get_dependencies(content, request.path, metadata)
javascript::get_dependencies(content, request.inner.input_file_path.into(), metadata)
} else {
Err(format!(
"{:?} is not valid metadata for Javascript dependency inference",
Expand Down

0 comments on commit f5cb420

Please sign in to comment.