Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suppress last-ditch download exceptions w/cleanup #10029

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
Expand All @@ -53,6 +54,7 @@
import com.google.devtools.build.lib.remote.RemoteCache.ActionResultMetadata.DirectoryMetadata;
import com.google.devtools.build.lib.remote.RemoteCache.ActionResultMetadata.FileMetadata;
import com.google.devtools.build.lib.remote.RemoteCache.ActionResultMetadata.SymlinkMetadata;
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
Expand Down Expand Up @@ -260,6 +262,36 @@ private static Path toTmpDownloadPath(Path actualPath) {
return actualPath.getParentDirectory().getRelative(actualPath.getBaseName() + ".tmp");
}

/**
* Exception which represents a collection of IOExceptions for the purpose
* of distinguishing remote communication exceptions from those which occur
* on filesystems locally. This exception serves as a trace point for the actual
* download, so that the intented operation can be observed in a stack, with all
* constituent exceptions available for observation.
*/
static class DownloadException extends Exception {
werkt marked this conversation as resolved.
Show resolved Hide resolved
// true since no empty DownloadException is ever thrown
boolean allCacheNotFoundException = true;
werkt marked this conversation as resolved.
Show resolved Hide resolved

DownloadException() {
}

DownloadException(IOException e) {
add(e);
}

void add(IOException e) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not override addSuppressed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted a filter on this, so that it couldn't contain/suppress anything but IOExceptions. That is probably unnecessary, but I felt it needed scope.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And as it turns out, addSuppressed is final

if (allCacheNotFoundException) {
allCacheNotFoundException = e instanceof CacheNotFoundException;
}
addSuppressed(e);
}

boolean onlyCausedByCacheNotFoundException() {
return allCacheNotFoundException;
}
}

/**
* Download the output files and directory trees of a remotely executed action to the local
* machine, as well stdin / stdout to the given files.
Expand All @@ -276,8 +308,13 @@ public void download(
Path execRoot,
FileOutErr origOutErr,
OutputFilesLocker outputFilesLocker)
throws ExecException, IOException, InterruptedException {
ActionResultMetadata metadata = parseActionResultMetadata(result, execRoot);
throws ExecException, DownloadException, IOException, InterruptedException {
ActionResultMetadata metadata;
try {
metadata = parseActionResultMetadata(result, execRoot);
} catch (IOException e) {
throw new DownloadException(e);
}

List<ListenableFuture<FileMetadata>> downloads =
Stream.concat(
Expand All @@ -299,9 +336,8 @@ public void download(
// Subsequently we need to wait for *every* download to finish, even if we already know that
// one failed. That's so that when exiting this method we can be sure that all downloads have
// finished and don't race with the cleanup routine.
// TODO(buchgr): Look into cancellation.

IOException downloadException = null;
DownloadException downloadException = null;
InterruptedException interruptedException = null;
FileOutErr tmpOutErr = null;
try {
Expand All @@ -310,25 +346,29 @@ public void download(
}
downloads.addAll(downloadOutErr(result, tmpOutErr));
} catch (IOException e) {
downloadException = e;
downloadException = new DownloadException();
downloadException.add(e);
}

boolean cancelling = false;
boolean interrupted = Thread.currentThread().isInterrupted();
for (ListenableFuture<FileMetadata> download : downloads) {
try {
// Wait for all downloads to finish.
getFromFuture(download);
if (!cancelling || download.isDone()) {
// Wait for all downloads to finish.
getFromFuture(download);
} else {
download.cancel(true);
}
} catch (IOException e) {
if (downloadException == null) {
downloadException = e;
} else if (e != downloadException) {
downloadException.addSuppressed(e);
downloadException = new DownloadException();
}
downloadException.add(e);
} catch (InterruptedException e) {
if (interruptedException == null) {
interruptedException = e;
} else if (e != interruptedException) {
interruptedException.addSuppressed(e);
}
interrupted = Thread.interrupted() || interrupted;
interruptedException = e;
cancelling = true;
}
}

Expand All @@ -348,12 +388,7 @@ public void download(
tmpOutErr.clearErr();
}
} catch (IOException e) {
if (downloadException != null && e != downloadException) {
e.addSuppressed(downloadException);
}
if (interruptedException != null) {
e.addSuppressed(interruptedException);
}
downloadException.add(e);
werkt marked this conversation as resolved.
Show resolved Hide resolved

// If deleting of output files failed, we abort the build with a decent error message as
// any subsequent local execution failure would likely be incomprehensible.
Expand All @@ -363,6 +398,9 @@ public void download(
}

if (interruptedException != null) {
if (downloadException != null) {
interruptedException.addSuppressed(downloadException);
}
throw interruptedException;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import com.google.devtools.build.lib.remote.RemoteCache.DownloadException;
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
import com.google.devtools.build.lib.remote.merkletree.MerkleTree;
Expand Down Expand Up @@ -169,6 +170,8 @@ public CacheHandle lookup(Spawn spawn, SpawnExecutionContext context)
prof.profile(ProfilerTask.REMOTE_DOWNLOAD, "download outputs")) {
remoteCache.download(
result, execRoot, context.getFileOutErr(), context::lockOutputFiles);
} catch (DownloadException e) {
throw new IOException(e);
werkt marked this conversation as resolved.
Show resolved Hide resolved
}
} else {
PathFragment inMemoryOutputPath = getInMemoryOutputPath(spawn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import com.google.devtools.build.lib.remote.RemoteCache.DownloadException;
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
import com.google.devtools.build.lib.remote.merkletree.MerkleTree;
Expand Down Expand Up @@ -102,8 +103,9 @@ public class RemoteSpawnRunner implements SpawnRunner {
private static final String VIOLATION_TYPE_MISSING = "MISSING";

private static boolean retriableExecErrors(Exception e) {
if (e instanceof CacheNotFoundException || e.getCause() instanceof CacheNotFoundException) {
return true;
if (e.getCause() instanceof DownloadException) {
DownloadException downloadException = (DownloadException) e.getCause();
return downloadException.onlyCausedByCacheNotFoundException();
}
if (!RemoteRetrierUtils.causedByStatus(e, Code.FAILED_PRECONDITION)) {
return false;
Expand Down Expand Up @@ -242,7 +244,10 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
try {
return downloadAndFinalizeSpawnResult(
cachedResult, /* cacheHit= */ true, spawn, context, remoteOutputsMode);
} catch (CacheNotFoundException e) {
} catch (DownloadException e) {
if (!e.onlyCausedByCacheNotFoundException()) {
throw new IOException(e);
}
// No cache hit, so we fall through to local or remote execution.
// We set acceptCachedResult to false in order to force the action re-execution.
acceptCachedResult = false;
Expand Down Expand Up @@ -302,11 +307,13 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
try {
return downloadAndFinalizeSpawnResult(
actionResult, reply.getCachedResult(), spawn, context, remoteOutputsMode);
} catch (CacheNotFoundException e) {
// No cache hit, so if we retry this execution, we must no longer accept
// cached results, it must be reexecuted
requestBuilder.setSkipCacheLookup(true);
throw e;
} catch (DownloadException e) {
if (e.onlyCausedByCacheNotFoundException()) {
// No cache hit, so if we retry this execution, we must no longer accept
// cached results, it must be reexecuted
requestBuilder.setSkipCacheLookup(true);
}
throw new IOException(e);
}
});
} catch (IOException e) {
Expand All @@ -324,7 +331,7 @@ private SpawnResult downloadAndFinalizeSpawnResult(
Spawn spawn,
SpawnExecutionContext context,
RemoteOutputsMode remoteOutputsMode)
throws ExecException, IOException, InterruptedException {
throws DownloadException, ExecException, IOException, InterruptedException {
boolean downloadOutputs =
shouldDownloadAllSpawnOutputs(
remoteOutputsMode,
Expand Down Expand Up @@ -434,14 +441,19 @@ private SpawnResult execLocallyAndUploadOrFail(
private SpawnResult handleError(
IOException exception, FileOutErr outErr, ActionKey actionKey, SpawnExecutionContext context)
throws ExecException, InterruptedException, IOException {
boolean remoteCacheFailed = false;
if (exception.getCause() instanceof ExecutionStatusException) {
ExecutionStatusException e = (ExecutionStatusException) exception.getCause();
if (e.getResponse() != null) {
ExecuteResponse resp = e.getResponse();
maybeDownloadServerLogs(resp, actionKey);
if (resp.hasResult()) {
// We try to download all (partial) results even on server error, for debuggability.
remoteCache.download(resp.getResult(), execRoot, outErr, context::lockOutputFiles);
try {
// We try to download all (partial) results even on server error, for debuggability.
remoteCache.download(resp.getResult(), execRoot, outErr, context::lockOutputFiles);
} catch (DownloadException downloadEx) {
exception.addSuppressed(downloadEx);
}
}
}
if (e.isExecutionTimeout()) {
Expand All @@ -451,11 +463,14 @@ private SpawnResult handleError(
.setExitCode(POSIX_TIMEOUT_EXIT_CODE)
.build();
}
} else if (exception.getCause() instanceof DownloadException) {
DownloadException e = (DownloadException) exception.getCause();
remoteCacheFailed = e.onlyCausedByCacheNotFoundException();
}
final Status status;
if (RemoteRetrierUtils.causedByStatus(exception, Code.UNAVAILABLE)) {
status = Status.EXECUTION_FAILED_CATASTROPHICALLY;
} else if (exception instanceof CacheNotFoundException) {
} else if (remoteCacheFailed) {
status = Status.REMOTE_CACHE_FAILED;
} else {
status = Status.EXECUTION_FAILED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import com.google.devtools.build.lib.actions.cache.MetadataInjector;
import com.google.devtools.build.lib.actions.util.ActionsTestUtil;
import com.google.devtools.build.lib.clock.JavaClock;
import com.google.devtools.build.lib.remote.RemoteCache.DownloadException;
import com.google.devtools.build.lib.remote.RemoteCache.OutputFilesLocker;
import com.google.devtools.build.lib.remote.RemoteCache.UploadManifest;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
Expand Down Expand Up @@ -643,7 +644,7 @@ public void downloadFailureMaintainsDirectories() throws Exception {
OutputFile.newBuilder().setPath("outputdir/outputfile").setDigest(outputFileDigest));
result.addOutputFiles(OutputFile.newBuilder().setPath("otherfile").setDigest(otherFileDigest));
assertThrows(
IOException.class, () -> cache.download(result.build(), execRoot, null, outputFilesLocker));
DownloadException.class, () -> cache.download(result.build(), execRoot, null, outputFilesLocker));
assertThat(cache.getNumFailedDownloads()).isEqualTo(1);
assertThat(execRoot.getRelative("outputdir").exists()).isTrue();
assertThat(execRoot.getRelative("outputdir/outputfile").exists()).isFalse();
Expand Down Expand Up @@ -672,15 +673,17 @@ public void onErrorWaitForRemainingDownloadsToComplete() throws Exception {
.addOutputFiles(OutputFile.newBuilder().setPath("file2").setDigest(digest2))
.addOutputFiles(OutputFile.newBuilder().setPath("file3").setDigest(digest3))
.build();
IOException e =
DownloadException downloadException =
assertThrows(
IOException.class,
DownloadException.class,
() ->
cache.download(
result, execRoot, new FileOutErr(stdout, stderr), outputFilesLocker));
assertThat(e.getSuppressed()).isEmpty();
assertThat(downloadException.getSuppressed()).hasLength(1);
assertThat(cache.getNumSuccessfulDownloads()).isEqualTo(2);
assertThat(cache.getNumFailedDownloads()).isEqualTo(1);
assertThat(downloadException.getSuppressed()[0]).isInstanceOf(IOException.class);
IOException e = (IOException) downloadException.getSuppressed()[0];
assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("download failed");
verify(outputFilesLocker, never()).lock();
}
Expand All @@ -702,17 +705,18 @@ public void downloadWithMultipleErrorsAddsThemAsSuppressed() throws Exception {
.addOutputFiles(OutputFile.newBuilder().setPath("file2").setDigest(digest2))
.addOutputFiles(OutputFile.newBuilder().setPath("file3").setDigest(digest3))
.build();
IOException e =
DownloadException e =
assertThrows(
IOException.class,
DownloadException.class,
() ->
cache.download(
result, execRoot, new FileOutErr(stdout, stderr), outputFilesLocker));

assertThat(e.getSuppressed()).hasLength(1);
assertThat(e.getSuppressed()).hasLength(2);
assertThat(e.getSuppressed()[0]).isInstanceOf(IOException.class);
assertThat(e.getSuppressed()[0]).hasMessageThat().isEqualTo("file3 failed");
assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("file2 failed");
assertThat(e.getSuppressed()[0]).hasMessageThat().isAnyOf("file2 failed", "file3 failed");
assertThat(e.getSuppressed()[1]).isInstanceOf(IOException.class);
assertThat(e.getSuppressed()[1]).hasMessageThat().isAnyOf("file2 failed", "file3 failed");
}

@Test
Expand All @@ -733,15 +737,18 @@ public void downloadWithDuplicateIOErrorsDoesNotSuppress() throws Exception {
.addOutputFiles(OutputFile.newBuilder().setPath("file2").setDigest(digest2))
.addOutputFiles(OutputFile.newBuilder().setPath("file3").setDigest(digest3))
.build();
IOException e =
DownloadException downloadException =
assertThrows(
IOException.class,
DownloadException.class,
() ->
cache.download(
result, execRoot, new FileOutErr(stdout, stderr), outputFilesLocker));

assertThat(e.getSuppressed()).isEmpty();
assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("reused io exception");
for (Throwable t : downloadException.getSuppressed()) {
assertThat(t).isInstanceOf(IOException.class);
IOException e = (IOException) t;
assertThat(Throwables.getRootCause(e)).hasMessageThat().isEqualTo("reused io exception");
}
}

@Test
Expand Down Expand Up @@ -838,7 +845,7 @@ public void testDownloadWithStdoutStderrOnFailure() throws Exception {
.setStderrDigest(digestStderr)
.build();
assertThrows(
IOException.class, () -> cache.download(result, execRoot, spyOutErr, outputFilesLocker));
DownloadException.class, () -> cache.download(result, execRoot, spyOutErr, outputFilesLocker));
verify(spyOutErr, Mockito.times(2)).childOutErr();
verify(spyChildOutErr).clearOut();
verify(spyChildOutErr).clearErr();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import com.google.devtools.build.lib.exec.SpawnRunner;
import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionContext;
import com.google.devtools.build.lib.exec.util.FakeOwner;
import com.google.devtools.build.lib.remote.RemoteCache.DownloadException;
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
import com.google.devtools.build.lib.remote.options.RemoteOptions;
Expand Down Expand Up @@ -592,7 +593,7 @@ public void cacheDownloadFailureTriggersRemoteExecution() throws Exception {

ActionResult cachedResult = ActionResult.newBuilder().setExitCode(0).build();
when(cache.downloadActionResult(any(ActionKey.class))).thenReturn(cachedResult);
Exception downloadFailure = new CacheNotFoundException(Digest.getDefaultInstance());
Exception downloadFailure = new DownloadException(new CacheNotFoundException(Digest.getDefaultInstance()));
doThrow(downloadFailure)
.when(cache)
.download(eq(cachedResult), any(Path.class), any(FileOutErr.class), any());
Expand Down Expand Up @@ -629,7 +630,7 @@ public void resultsDownloadFailureTriggersRemoteExecutionWithSkipCacheLookup() t
when(executor.executeRemotely(any(ExecuteRequest.class)))
.thenReturn(cachedResponse)
.thenReturn(executedResponse);
Exception downloadFailure = new CacheNotFoundException(Digest.getDefaultInstance());
Exception downloadFailure = new DownloadException(new CacheNotFoundException(Digest.getDefaultInstance()));
doThrow(downloadFailure)
.when(cache)
.download(eq(cachedResult), any(Path.class), any(FileOutErr.class), any());
Expand Down