Skip to content

Commit

Permalink
Fix remote action wall time in the case of action timeout
Browse files Browse the repository at this point in the history
Closes #18843.

Change-Id: Ifa07522df85cbbb39193e60e87de525efc51f492
PiperOrigin-RevId: 546031541
  • Loading branch information
exoson authored and copybara-github committed Jul 6, 2023
1 parent c231b7d commit 70e66eb
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -512,22 +512,22 @@ private SpawnResult execLocallyAndUploadOrFail(
if (remoteOptions.remoteLocalFallback && !RemoteRetrierUtils.causedByExecTimeout(cause)) {
return execLocallyAndUpload(action, spawn, context, uploadLocalResults);
}
return handleError(action, cause, context);
return handleError(action, cause);
}

private SpawnResult handleError(
RemoteAction action, IOException exception, SpawnExecutionContext context)
private SpawnResult handleError(RemoteAction action, IOException exception)
throws ExecException, InterruptedException, IOException {
boolean remoteCacheFailed = BulkTransferException.allCausedByCacheNotFoundException(exception);
if (exception.getCause() instanceof ExecutionStatusException) {
ExecutionStatusException e = (ExecutionStatusException) exception.getCause();
RemoteActionResult result = null;
if (e.getResponse() != null) {
ExecuteResponse resp = e.getResponse();
maybeDownloadServerLogs(action, resp);
if (resp.hasResult()) {
result = RemoteActionResult.createFromResponse(resp);
try {
remoteExecutionService.downloadOutputs(
action, RemoteActionResult.createFromResponse(resp));
remoteExecutionService.downloadOutputs(action, result);
} catch (BulkTransferException bulkTransferEx) {
exception.addSuppressed(bulkTransferEx);
}
Expand All @@ -536,18 +536,33 @@ private SpawnResult handleError(
if (e.isExecutionTimeout()) {
maybePrintExecutionMessages(
action.getSpawn(), e.getResponse().getMessage(), /* success= */ false);
return new SpawnResult.Builder()
.setRunnerName(getName())
.setStatus(Status.TIMEOUT)
.setExitCode(SpawnResult.POSIX_TIMEOUT_EXIT_CODE)
.setFailureDetail(
FailureDetail.newBuilder()
.setMessage("remote spawn timed out")
.setSpawn(
FailureDetails.Spawn.newBuilder()
.setCode(FailureDetails.Spawn.Code.TIMEOUT))
.build())
.build();
SpawnResult.Builder resultBuilder =
new SpawnResult.Builder()
.setRunnerName(getName())
.setStatus(Status.TIMEOUT)
.setExitCode(SpawnResult.POSIX_TIMEOUT_EXIT_CODE)
.setFailureDetail(
FailureDetail.newBuilder()
.setMessage("remote spawn timed out")
.setSpawn(
FailureDetails.Spawn.newBuilder()
.setCode(FailureDetails.Spawn.Code.TIMEOUT))
.build());
if (result != null) {
resultBuilder
.setWallTimeInMs(
(int)
Duration.between(
Utils.timestampToInstant(
result.getExecutionMetadata().getExecutionStartTimestamp()),
Utils.timestampToInstant(
result.getExecutionMetadata().getExecutionCompletedTimestamp()))
.toMillis())
.setStartTime(
Utils.timestampToInstant(
result.getExecutionMetadata().getExecutionStartTimestamp()));
}
return resultBuilder.build();
}
}
final Status status;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ public static SpawnResult createSpawnResult(
return builder.build();
}

private static Instant timestampToInstant(Timestamp timestamp) {
public static Instant timestampToInstant(Timestamp timestamp) {
return Instant.ofEpochSecond(timestamp.getSeconds(), timestamp.getNanos());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -890,6 +891,64 @@ public void testRemoteExecutionTimeout() throws Exception {
verify(service).downloadOutputs(any(), eq(RemoteActionResult.createFromResponse(resp)));
}

@Test
public void testRemoteExecutionTimeoutTimings() throws Exception {
// If remote execution times out the SpawnResult should still have the start and wall times
// reported correctly.

remoteOptions.remoteLocalFallback = false;

RemoteSpawnRunner runner = newSpawnRunner();
RemoteExecutionService service = runner.getRemoteExecutionService();

com.google.protobuf.Duration oneSecond = Durations.fromMillis(1000);
Timestamp executionStart = Timestamp.getDefaultInstance();
Timestamp executionCompleted = Timestamps.add(executionStart, oneSecond);
ExecutedActionMetadata executedMetadata =
ExecutedActionMetadata.newBuilder()
.setExecutionStartTimestamp(executionStart)
.setExecutionCompletedTimestamp(executionCompleted)
.build();

ActionResult cachedResult =
ActionResult.newBuilder().setExitCode(0).setExecutionMetadata(executedMetadata).build();
when(cache.downloadActionResult(
any(RemoteActionExecutionContext.class),
any(ActionKey.class),
/* inlineOutErr= */ eq(false)))
.thenReturn(null);
ExecuteResponse resp =
ExecuteResponse.newBuilder()
.setResult(cachedResult)
.setStatus(
com.google.rpc.Status.newBuilder()
.setCode(Code.DEADLINE_EXCEEDED.getNumber())
.build())
.build();
when(executor.executeRemotely(
any(RemoteActionExecutionContext.class),
any(ExecuteRequest.class),
any(OperationObserver.class)))
.thenThrow(new IOException(new ExecutionStatusException(resp.getStatus(), resp)));

Spawn spawn = newSimpleSpawn();

SpawnExecutionContext policy = getSpawnContext(spawn);

SpawnResult res = runner.exec(spawn, policy);
assertThat(res.status()).isEqualTo(Status.TIMEOUT);
assertThat(res.getWallTimeInMs()).isEqualTo(1000);
assertThat(res.getStartTime())
.isEqualTo(Instant.ofEpochSecond(executionStart.getSeconds(), executionStart.getNanos()));

verify(executor)
.executeRemotely(
any(RemoteActionExecutionContext.class),
any(ExecuteRequest.class),
any(OperationObserver.class));
verify(service).downloadOutputs(any(), eq(RemoteActionResult.createFromResponse(resp)));
}

@Test
public void testRemoteExecutionTimeoutDoesNotTriggerFallback() throws Exception {
// If remote execution times out the SpawnResult status should be TIMEOUT, regardess of local
Expand Down

0 comments on commit 70e66eb

Please sign in to comment.