From ecb600dbad8850a77f626b8dac31dab58b315bd4 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Sun, 1 May 2022 10:16:01 +0200 Subject: [PATCH] Create output directories for remote execution By explicitly declaring output directories as inputs to a remote action, this commit ensures that these directories are created by the remote executor prior to the execution of the action. This brings the behavior of remote execution regarding tree artifacts in line with that of local or sandboxed execution. Getting the tests to pass required modifying a check in Bazel's own remote worker implementation: Previously, the worker explicitly verified that output directories don't exist after the inputs have been staged. This behavior is not backed by the spec and has thus been modified: Now, it is only checked that the output directories either don't exist or are directories. --- .../lib/remote/RemoteExecutionService.java | 34 ++++++++++--- .../merkletree/DirectoryTreeBuilder.java | 28 +++-------- .../bazel/remote/remote_execution_test.sh | 50 +++++++++++++++++++ .../build/remote/worker/ExecutionServer.java | 5 +- 4 files changed, 87 insertions(+), 30 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java index e274afcac6e410..89b7d8b96e3de8 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java @@ -139,6 +139,7 @@ import java.util.Objects; import java.util.Set; import java.util.SortedMap; +import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Executor; @@ -305,8 +306,23 @@ public boolean mayBeExecutedRemotely(Spawn spawn) { && Spawns.mayBeExecutedRemotely(spawn); } + private SortedMap buildOutputDirMap(Spawn spawn) { + TreeMap outputDirMap = new TreeMap<>(); + for (ActionInput output : spawn.getOutputFiles()) { + if (output instanceof Artifact && ((Artifact) output).isTreeArtifact()) { + outputDirMap.put(PathFragment.create(remotePathResolver.getWorkingDirectory()) + .getRelative(remotePathResolver.localPathToOutputPath(output.getExecPath())), output); + } + } + return outputDirMap; + } + private MerkleTree buildInputMerkleTree(Spawn spawn, SpawnExecutionContext context) throws IOException, ForbiddenActionInputException { + // Add output directories to inputs so that they are created as empty directories by the + // executor. The spec only requires the executor to create the parent directory of an output + // directory, which differs from the behavior of both local and sandboxed execution. + SortedMap outputDirMap = buildOutputDirMap(spawn); if (remoteOptions.remoteMerkleTreeCache) { MetadataProvider metadataProvider = context.getMetadataProvider(); ConcurrentLinkedQueue subMerkleTrees = new ConcurrentLinkedQueue<>(); @@ -316,9 +332,20 @@ private MerkleTree buildInputMerkleTree(Spawn spawn, SpawnExecutionContext conte (Object nodeKey, InputWalker walker) -> { subMerkleTrees.add(buildMerkleTreeVisitor(nodeKey, walker, metadataProvider)); }); + if (!outputDirMap.isEmpty()) { + subMerkleTrees.add(MerkleTree.build(outputDirMap, metadataProvider, execRoot, digestUtil)); + } return MerkleTree.merge(subMerkleTrees, digestUtil); } else { SortedMap inputMap = remotePathResolver.getInputMapping(context); + if (!outputDirMap.isEmpty()) { + // The map returned by getInputMapping is mutable, but must not be mutated here as it is + // shared with all other strategies. + SortedMap newInputMap = new TreeMap<>(); + newInputMap.putAll(inputMap); + newInputMap.putAll(outputDirMap); + inputMap = newInputMap; + } return MerkleTree.build(inputMap, context.getMetadataProvider(), execRoot, digestUtil); } } @@ -938,13 +965,6 @@ public InMemoryOutput downloadOutputs(RemoteAction action, RemoteActionResult re // Check that all mandatory outputs are created. for (ActionInput output : action.getSpawn().getOutputFiles()) { if (action.getSpawn().isMandatoryOutput(output)) { - // Don't check output that is tree artifact since spawn could generate nothing under that - // directory. Remote server typically doesn't create directory ahead of time resulting in - // empty tree artifact missing from action cache entry. - if (output instanceof Artifact && ((Artifact) output).isTreeArtifact()) { - continue; - } - Path localPath = execRoot.getRelative(output.getExecPath()); if (!metadata.files.containsKey(localPath) && !metadata.directories.containsKey(localPath) diff --git a/src/main/java/com/google/devtools/build/lib/remote/merkletree/DirectoryTreeBuilder.java b/src/main/java/com/google/devtools/build/lib/remote/merkletree/DirectoryTreeBuilder.java index c3d0223151c1cb..f188b234ef18d6 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/merkletree/DirectoryTreeBuilder.java +++ b/src/main/java/com/google/devtools/build/lib/remote/merkletree/DirectoryTreeBuilder.java @@ -24,7 +24,6 @@ import com.google.devtools.build.lib.remote.merkletree.DirectoryTree.DirectoryNode; import com.google.devtools.build.lib.remote.merkletree.DirectoryTree.FileNode; import com.google.devtools.build.lib.remote.util.DigestUtil; -import com.google.devtools.build.lib.skyframe.TreeArtifactValue; import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; @@ -35,7 +34,6 @@ import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; -import javax.annotation.Nullable; /** Builder for directory trees. */ class DirectoryTreeBuilder { @@ -97,7 +95,6 @@ private static int buildFromPaths( Map tree) throws IOException { return build( - null, inputs, tree, (input, path, currDir) -> { @@ -126,7 +123,6 @@ private static int buildFromActionInputs( Map tree) throws IOException { return build( - metadataProvider, inputs, tree, (input, path, currDir) -> { @@ -182,7 +178,6 @@ private static int buildFromActionInputs( } private static int build( - @Nullable MetadataProvider metadataProvider, SortedMap inputs, Map tree, FileNodeVisitor fileNodeVisitor) @@ -200,22 +195,13 @@ private static int build( T input = e.getValue(); if (input instanceof DerivedArtifact && ((DerivedArtifact) input).isTreeArtifact()) { - DerivedArtifact artifact = (DerivedArtifact) input; - // MetadataProvider is provided by all callers for which T is a superclass of - // DerivedArtifact. - Preconditions.checkNotNull(metadataProvider); - FileArtifactValue metadata = - Preconditions.checkNotNull( - metadataProvider.getMetadata(artifact), - "missing metadata for '%s'", - artifact.getExecPathString()); - Preconditions.checkState(metadata.equals(TreeArtifactValue.empty().getMetadata()), - "Encountered non-empty TreeArtifact '%s' with metadata '%s', which should have" - + " been expanded by SpawnInputExpander. This is a bug.", - path, metadata); - // Create an empty directory and its parent directories but don't visit the TreeArtifact - // input itself: A TreeArtifact's metadata has type REGULAR_FILE, not DIRECTORY, and would - // thus lead to an empty file being created in the buildFromActionInputs visitor. + // SpawnInputExpander has already expanded non-empty tree artifacts into a collection of + // TreeFileArtifacts. Thus, at this point, tree artifacts represent empty directories, which + // we create together with their parents. + // Note: This also handles output directories of actions, which are explicitly included as + // inputs so that they are created by the executor before the action executes. Since such a + // directory must remain writeable, MetadataProvider#getMetadata must not be called on the + // tree artifact here as it would have the side effect of making it read only. DirectoryNode emptyDir = new DirectoryNode(path.getBaseName()); tree.put(path, emptyDir); createParentDirectoriesIfNotExist(path, emptyDir, tree); diff --git a/src/test/shell/bazel/remote/remote_execution_test.sh b/src/test/shell/bazel/remote/remote_execution_test.sh index 34d5c3bc253e3e..8e54c99fa81feb 100755 --- a/src/test/shell/bazel/remote/remote_execution_test.sh +++ b/src/test/shell/bazel/remote/remote_execution_test.sh @@ -3119,6 +3119,56 @@ EOF //pkg:a &>$TEST_log || fail "expected build to succeed with sibling repository layout" } +function test_create_tree_artifact_outputs() { + touch WORKSPACE + mkdir -p pkg + + cat > pkg/def.bzl <<'EOF' +def _r(ctx): + empty_dir = ctx.actions.declare_directory("%s/empty_dir" % ctx.label.name) + ctx.actions.run_shell( + outputs = [empty_dir], + command = "cd %s && pwd" % empty_dir.path, + ) + non_empty_dir = ctx.actions.declare_directory("%s/non_empty_dir" % ctx.label.name) + ctx.actions.run_shell( + outputs = [non_empty_dir], + command = "cd %s && pwd && touch out" % non_empty_dir.path, + ) + return [DefaultInfo(files = depset([empty_dir, non_empty_dir]))] + +r = rule(implementation = _r) +EOF + +cat > pkg/BUILD <<'EOF' +load(":def.bzl", "r") + +r(name = "a") +EOF + + bazel build \ + --spawn_strategy=remote \ + --remote_executor=grpc://localhost:${worker_port} \ + //pkg:a &>$TEST_log || fail "expected build to succeed" + [[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file" + + bazel clean --expunge + bazel build \ + --spawn_strategy=remote \ + --remote_executor=grpc://localhost:${worker_port} \ + --experimental_remote_merkle_tree_cache \ + //pkg:a &>$TEST_log || fail "expected build to succeed with Merkle tree cache" + [[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file" + + bazel clean --expunge + bazel build \ + --spawn_strategy=remote \ + --remote_executor=grpc://localhost:${worker_port} \ + --experimental_sibling_repository_layout \ + //pkg:a &>$TEST_log || fail "expected build to succeed with sibling repository layout" + [[ -f bazel-bin/pkg/a/non_empty_dir/out ]] || fail "expected tree artifact to contain a file" +} + # Runs coverage with `cc_test` and RE then checks the coverage file is returned. # Older versions of gcov are not supported with bazel coverage and so will be skipped. # See the above `test_java_rbe_coverage_produces_report` for more information. diff --git a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java index af325e3b3b1a8b..2088a58915e2a6 100644 --- a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java +++ b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java @@ -283,8 +283,9 @@ private ActionResult execute( } for (String output : command.getOutputDirectoriesList()) { Path file = workingDirectory.getRelative(output); - if (file.exists()) { - throw new FileAlreadyExistsException("Output directory/file already exists: " + file); + if (file.exists() && !file.isDirectory()) { + throw new FileAlreadyExistsException( + "Non-directory exists at output directory path: " + file); } file.getParentDirectory().createDirectoryAndParents(); outputs.add(file);