Add path to cache key for Rust dep inference, for relative imports (#…

…19630) This adds the file path for the file being dependency-inferred to the cache key for the Rust dep inference. Without this, the same file contents appearing multiple times in different places will give the wrong results for relative imports, because the dep inference process mashes together the file path and the relative import. The circumstances here seem most likely to occur in the real world if a file is moved, with the inference results from before the move reused for the file _after_ the move too. I've tested this manually on the reproducer in #19618 (comment), in addition to comparing the before (fails) and after (passes) behaviour of the new test. To try to make this code more resilient to this sort of mistake, I've removed the top level `PreparedInferenceRequest.path` key, in favour of using the new `input_file_path` on the request protobuf struct, at the cost of an additional `String` -> `PathBuf` conversion when doing the inference. Fixes #19618
pantsbuild · Aug 29, 2023 · f5cb420 · f5cb420
1 parent f1ec650
commit f5cb420
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 7 deletions.
diff --git a/src/python/pants/backend/python/dependency_inference/rules_test.py b/src/python/pants/backend/python/dependency_inference/rules_test.py
@@ -744,6 +744,50 @@ def test_infer_python_strict_multiple_resolves(imports_rule_runner: PythonRuleRu
         )
 
 
+def test_infer_python_identical_files_with_relative_imports_should_be_treated_differently(
+    imports_rule_runner: PythonRuleRunner,
+) -> None:
+    # dependency inference shouldn't cache _just_ based on file contents, because this can break
+    # relative imports. When b reused a's results, b/__init__.py was incorrectly depending on
+    # a/file.py (https://github.com/pantsbuild/pants/issues/19618).
+    contents = "from . import file"
+    imports_rule_runner.write_files(
+        {
+            "a/BUILD": "python_sources()",
+            "a/__init__.py": contents,
+            "a/file.py": "",
+            "b/BUILD": "python_sources()",
+            "b/__init__.py": contents,
+            "b/file.py": "",
+        }
+    )
+
+    def get_deps(directory: str) -> InferredDependencies:
+        tgt = imports_rule_runner.get_target(
+            Address(directory, target_name=directory, relative_file_path="__init__.py")
+        )
+
+        return imports_rule_runner.request(
+            InferredDependencies,
+            [InferPythonImportDependencies(PythonImportDependenciesInferenceFieldSet.create(tgt))],
+        )
+
+    # first, seed the cache with the deps for "a"
+    assert get_deps("a") == InferredDependencies(
+        [
+            Address("a", target_name="a", relative_file_path="file.py"),
+        ]
+    )
+
+    # then, run with "b", which _shouldn't_ reuse the cache from the previous run to give
+    # "a/file.py:a" (as it did previously, see #19618), and should instead give "b/file.py:b"
+    assert get_deps("b") == InferredDependencies(
+        [
+            Address("b", target_name="b", relative_file_path="file.py"),
+        ]
+    )
+
+
 class TestCategoriseImportsInfo:
     address = Address("sample/path")
     import_cases = {

diff --git a/src/rust/engine/protos/protos/pants/cache.proto b/src/rust/engine/protos/protos/pants/cache.proto
@@ -22,7 +22,10 @@ message DependencyInferenceRequest {
   oneof metadata {
     JavascriptInferenceMetadata js = 2;
   }
+  // Ensure using this as a cache key reflects everything that might influence the output: inference
+  // implementation inside Pants, and the input's file location (if there's any relative imports)
   string impl_hash = 3;
+  string input_file_path = 4;
 }
 
 

diff --git a/src/rust/engine/src/intrinsics.rs b/src/rust/engine/src/intrinsics.rs
@@ -765,8 +765,11 @@ fn docker_resolve_image(
 }
 
 struct PreparedInferenceRequest {
-  pub path: PathBuf,
-  pub digest: Digest,
+  digest: Digest,
+  /// The request that's guaranteed to have been constructed via ::prepare().
+  ///
+  /// NB. this `inner` value is used as the cache key, so anything that can influence the dep
+  /// inference should (also) be inside it, not just a key on the outer struct
   inner: DependencyInferenceRequest,
 }
 
@@ -783,11 +786,12 @@ impl PreparedInferenceRequest {
     } = Python::with_gil(|py| (*args[0]).as_ref(py).extract())?;
 
     let (path, digest) = Self::find_one_file(directory_digest, store, backend).await?;
+    let str_path = path.display().to_string();
 
     Ok(Self {
-      path,
       digest,
       inner: DependencyInferenceRequest {
+        input_file_path: str_path,
         input_file_digest: Some(digest.into()),
         metadata,
         impl_hash: impl_hash.to_string(),
@@ -851,14 +855,16 @@ fn parse_python_deps(context: Context, args: Vec<Value>) -> BoxFuture<'static, N
       Level::Debug,
       desc = Some(format!(
         "Determine Python dependencies for {:?}",
-        &prepared_inference_request.path
+        &prepared_inference_request.inner.input_file_path
       )),
       |_workunit| async move {
         let result: ParsedPythonDependencies = get_or_create_inferred_dependencies(
           core,
           &store,
           prepared_inference_request,
-          |content, request| python::get_dependencies(content, request.path),
+          |content, request| {
+            python::get_dependencies(content, request.inner.input_file_path.into())
+          },
         )
         .await?;
 
@@ -896,7 +902,7 @@ fn parse_javascript_deps(
       Level::Debug,
       desc = Some(format!(
         "Determine Javascript dependencies for {:?}",
-        prepared_inference_request.path
+        prepared_inference_request.inner.input_file_path
       )),
       |_workunit| async move {
         let result: ParsedJavascriptDependencies = get_or_create_inferred_dependencies(
@@ -907,7 +913,7 @@ fn parse_javascript_deps(
             if let Some(dependency_inference_request::Metadata::Js(metadata)) =
               request.inner.metadata
             {
-              javascript::get_dependencies(content, request.path, metadata)
+              javascript::get_dependencies(content, request.inner.input_file_path.into(), metadata)
             } else {
               Err(format!(
                 "{:?} is not valid metadata for Javascript dependency inference",