Fix caching when mounting a cached stage with COPY/ADD

A comment states that avoidLookingCache is set when a previous stage that executed as part of the build is referenced by --mount, to avoid reusing content from an older build of the stage: // Only attempt to find cache if its needed, this part // so that if a step is using RUN --mount and mounts // previous stages then it uses the freshly built stage // of re-using the older stage from the store. However, stages consisting of COPY/ADD seem to be flagged with didExecute even if they were fetched from cache instead. I believe this is an oversight, and these stages should not prevent subsequent caching. Also, avoidLookingCache would prevent a cache push, but I think it should only prevent cache lookups, since populating the cache is still useful in these caess. It's very possible I'm misunderstanding something, but I believe the RUN step in test case I've added wrongly skips cache, and I'd appreciate some pointers in the right direction if what I've proposed here isn't the right solution. Signed-off-by: Aaron Lehmann <alehmann@netflix.com>
containers · Mar 31, 2024 · b413360 · b413360
1 parent 39ea15c
commit b413360
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 2 deletions.
diff --git a/imagebuildah/stage_executor.go b/imagebuildah/stage_executor.go
@@ -1400,7 +1400,7 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
 			}
 		}
 
-		needsCacheKey := (len(s.executor.cacheFrom) != 0 || len(s.executor.cacheTo) != 0) && !avoidLookingCache
+		needsCacheKey := (len(s.executor.cacheFrom) != 0 && !avoidLookingCache) || len(s.executor.cacheTo) != 0
 
 		// If we have to commit for this instruction, only assign the
 		// stage's configured output name to the last layer.
@@ -1431,7 +1431,6 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
 			// and copy the content.
 			canMatchCacheOnlyAfterRun = (step.Command == command.Add || step.Command == command.Copy)
 			if canMatchCacheOnlyAfterRun {
-				s.didExecute = true
 				if err = ib.Run(step, s, noRunsRemaining); err != nil {
 					logrus.Debugf("Error building at step %+v: %v", *step, err)
 					return "", nil, false, fmt.Errorf("building at STEP \"%s\": %w", step.Message, err)
@@ -1468,6 +1467,9 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
 					}
 				}
 			}
+			if canMatchCacheOnlyAfterRun && cacheID == "" {
+				s.didExecute = true
+			}
 		}
 
 		// If we didn't find a cache entry, or we need to add content

diff --git a/tests/bud.bats b/tests/bud.bats
@@ -5271,6 +5271,49 @@ _EOF
 
 }
 
+@test "build test run mounting stage cached from remote cache source" {
+  _prefetch alpine
+
+  start_registry
+  run_buildah login --tls-verify=false --authfile ${TEST_SCRATCH_DIR}/test.auth --username testuser --password testpassword localhost:${REGISTRY_PORT}
+
+  # ------ Test case ------ #
+  # prepare expected output beforehand
+  run printf "STEP 2/2: COPY / /\n--> Pushing cache"
+  step1_2=$output
+  run printf "COMMIT test\n--> Pushing cache"
+  step2_2=$output
+
+  # actually run build
+  run_buildah build $WITH_POLICY_JSON --tls-verify=false --authfile ${TEST_SCRATCH_DIR}/test.auth --layers --cache-to localhost:${REGISTRY_PORT}/temp -t test -f $BUDFILES/cache-from-stage/Containerfile $BUDFILES/cache-from-stage
+  expect_output --substring "$step1_2"
+  #expect_output --substring "$step2_2"
+
+  # clean all cache and intermediate images
+  # to make sure that we are only using cache
+  # from remote repo and not the local storage.
+
+  # Important side-note: don't use `run_buildah rmi --all -f`
+  # since on podman-remote test this will remove prefetched alpine
+  # and it will try to pull alpine from docker.io with
+  # completely different digest (ruining our cache logic).
+  run_buildah rmi test
+
+  # ------ Test case ------ #
+  # expect cache to be pushed on remote stream
+  # now a build on clean slate must pull cache
+  # from remote instead of actually computing the
+  # run steps
+  run printf "STEP 2/2: COPY / /\n--> Using cache"
+  step1_2=$output
+  run printf "STEP 2/2: RUN --mount=type=bind,from=stage1,target=/mnt echo hi > test\n--> Cache pulled from remote"
+  step2_2=$output
+  run_buildah build $WITH_POLICY_JSON --tls-verify=false --authfile ${TEST_SCRATCH_DIR}/test.auth --layers --cache-from localhost:${REGISTRY_PORT}/temp --cache-to localhost:${REGISTRY_PORT}/temp -t test -f $BUDFILES/cache-from-stage/Containerfile $BUDFILES/cache-from-stage
+  expect_output --substring "$step1_2"
+  expect_output --substring "$step2_2"
+
+}
+
 @test "bud with undefined build arg directory" {
   _prefetch alpine
   mytmpdir=${TEST_SCRATCH_DIR}/my-dir1

diff --git a/tests/bud/cache-from-stage/Containerfile b/tests/bud/cache-from-stage/Containerfile
@@ -0,0 +1,5 @@
+FROM scratch AS stage1
+COPY / /
+
+FROM alpine
+RUN --mount=type=bind,from=stage1,target=/mnt echo hi > test
diff --git a/tests/bud/cache-from-stage/testfile b/tests/bud/cache-from-stage/testfile
@@ -0,0 +1 @@
+Hi, I'm a test file.  Enjoy the test.