From ec77fa9346dd612006dbdba24e5d823b7733da46 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 22 Aug 2023 10:43:31 +1000 Subject: [PATCH] Add path to cache key for Rust dep inference, for relative imports (#19630) This adds the file path for the file being dependency-inferred to the cache key for the Rust dep inference. Without this, the same file contents appearing multiple times in different places will give the wrong results for relative imports, because the dep inference process mashes together the file path and the relative import. The circumstances here seem most likely to occur in the real world if a file is moved, with the inference results from before the move reused for the file _after_ the move too. I've tested this manually on the reproducer in https://github.com/pantsbuild/pants/issues/19618#issuecomment-1685631591, in addition to comparing the before (fails) and after (passes) behaviour of the new test. To try to make this code more resilient to this sort of mistake, I've removed the top level `PreparedInferenceRequest.path` key, in favour of using the new `input_file_path` on the request protobuf struct, at the cost of an additional `String` -> `PathBuf` conversion when doing the inference. Fixes #19618 --- .../python/dependency_inference/rules_test.py | 44 +++++++++++++++++++ .../engine/protos/protos/pants/cache.proto | 3 ++ src/rust/engine/src/intrinsics.rs | 20 ++++++--- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/python/pants/backend/python/dependency_inference/rules_test.py b/src/python/pants/backend/python/dependency_inference/rules_test.py index d73d320fcb7..dbb684235a4 100644 --- a/src/python/pants/backend/python/dependency_inference/rules_test.py +++ b/src/python/pants/backend/python/dependency_inference/rules_test.py @@ -744,6 +744,50 @@ def test_infer_python_strict_multiple_resolves(imports_rule_runner: PythonRuleRu ) +def test_infer_python_identical_files_with_relative_imports_should_be_treated_differently( + imports_rule_runner: PythonRuleRunner, +) -> None: + # dependency inference shouldn't cache _just_ based on file contents, because this can break + # relative imports. When b reused a's results, b/__init__.py was incorrectly depending on + # a/file.py (https://github.com/pantsbuild/pants/issues/19618). + contents = "from . import file" + imports_rule_runner.write_files( + { + "a/BUILD": "python_sources()", + "a/__init__.py": contents, + "a/file.py": "", + "b/BUILD": "python_sources()", + "b/__init__.py": contents, + "b/file.py": "", + } + ) + + def get_deps(directory: str) -> InferredDependencies: + tgt = imports_rule_runner.get_target( + Address(directory, target_name=directory, relative_file_path="__init__.py") + ) + + return imports_rule_runner.request( + InferredDependencies, + [InferPythonImportDependencies(PythonImportDependenciesInferenceFieldSet.create(tgt))], + ) + + # first, seed the cache with the deps for "a" + assert get_deps("a") == InferredDependencies( + [ + Address("a", target_name="a", relative_file_path="file.py"), + ] + ) + + # then, run with "b", which _shouldn't_ reuse the cache from the previous run to give + # "a/file.py:a" (as it did previously, see #19618), and should instead give "b/file.py:b" + assert get_deps("b") == InferredDependencies( + [ + Address("b", target_name="b", relative_file_path="file.py"), + ] + ) + + class TestCategoriseImportsInfo: address = Address("sample/path") import_cases = { diff --git a/src/rust/engine/protos/protos/pants/cache.proto b/src/rust/engine/protos/protos/pants/cache.proto index 5734fa86967..8d666ac3f61 100644 --- a/src/rust/engine/protos/protos/pants/cache.proto +++ b/src/rust/engine/protos/protos/pants/cache.proto @@ -22,7 +22,10 @@ message DependencyInferenceRequest { oneof metadata { JavascriptInferenceMetadata js = 2; } + // Ensure using this as a cache key reflects everything that might influence the output: inference + // implementation inside Pants, and the input's file location (if there's any relative imports) string impl_hash = 3; + string input_file_path = 4; } diff --git a/src/rust/engine/src/intrinsics.rs b/src/rust/engine/src/intrinsics.rs index 094c0d546dd..c00448cfde3 100644 --- a/src/rust/engine/src/intrinsics.rs +++ b/src/rust/engine/src/intrinsics.rs @@ -765,8 +765,11 @@ fn docker_resolve_image( } struct PreparedInferenceRequest { - pub path: PathBuf, - pub digest: Digest, + digest: Digest, + /// The request that's guaranteed to have been constructed via ::prepare(). + /// + /// NB. this `inner` value is used as the cache key, so anything that can influence the dep + /// inference should (also) be inside it, not just a key on the outer struct inner: DependencyInferenceRequest, } @@ -783,11 +786,12 @@ impl PreparedInferenceRequest { } = Python::with_gil(|py| (*args[0]).as_ref(py).extract())?; let (path, digest) = Self::find_one_file(directory_digest, store, backend).await?; + let str_path = path.display().to_string(); Ok(Self { - path, digest, inner: DependencyInferenceRequest { + input_file_path: str_path, input_file_digest: Some(digest.into()), metadata, impl_hash: impl_hash.to_string(), @@ -851,14 +855,16 @@ fn parse_python_deps(context: Context, args: Vec) -> BoxFuture<'static, N Level::Debug, desc = Some(format!( "Determine Python dependencies for {:?}", - &prepared_inference_request.path + &prepared_inference_request.inner.input_file_path )), |_workunit| async move { let result: ParsedPythonDependencies = get_or_create_inferred_dependencies( core, &store, prepared_inference_request, - |content, request| python::get_dependencies(content, request.path), + |content, request| { + python::get_dependencies(content, request.inner.input_file_path.into()) + }, ) .await?; @@ -896,7 +902,7 @@ fn parse_javascript_deps( Level::Debug, desc = Some(format!( "Determine Javascript dependencies for {:?}", - prepared_inference_request.path + prepared_inference_request.inner.input_file_path )), |_workunit| async move { let result: ParsedJavascriptDependencies = get_or_create_inferred_dependencies( @@ -907,7 +913,7 @@ fn parse_javascript_deps( if let Some(dependency_inference_request::Metadata::Js(metadata)) = request.inner.metadata { - javascript::get_dependencies(content, request.path, metadata) + javascript::get_dependencies(content, request.inner.input_file_path.into(), metadata) } else { Err(format!( "{:?} is not valid metadata for Javascript dependency inference",