diff --git a/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceUploader.java b/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceUploader.java index 610c33993b0503..af0becde4ad032 100644 --- a/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceUploader.java +++ b/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceUploader.java @@ -93,12 +93,6 @@ public final class BuildEventServiceUploader implements Runnable { private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); - /** Configuration knobs related to RPC retries. Values chosen by good judgement. */ - private static final int MAX_NUM_RETRIES = - Integer.parseInt(System.getProperty("BAZEL_BES_NUM_RETRIES_ON_RPC_FAILURE", "4")); - - private static final int DELAY_MILLIS = 1000; - private final BuildEventServiceClient besClient; private final BuildEventArtifactUploader buildEventUploader; private final BuildEventServiceProtoUtil besProtoUtil; @@ -544,7 +538,7 @@ private void publishBuildEvents() BuildProgress.Code.BES_STREAM_NOT_RETRYING_FAILURE, message); } - if (retryAttempt == MAX_NUM_RETRIES) { + if (retryAttempt == buildEventProtocolOptions.besUploadMaxRetries) { String message = String.format( "Not retrying publishBuildEvents, no more attempts left: status='%s'", @@ -629,7 +623,7 @@ private void publishLifecycleEvent(PublishLifecycleEventRequest request) throws DetailedStatusException, InterruptedException { int retryAttempt = 0; StatusException cause = null; - while (retryAttempt <= MAX_NUM_RETRIES) { + while (retryAttempt <= this.buildEventProtocolOptions.besUploadMaxRetries) { try { besClient.publish(request); return; @@ -656,7 +650,7 @@ private void publishLifecycleEvent(PublishLifecycleEventRequest request) throw withFailureDetail( cause, BuildProgress.Code.BES_UPLOAD_RETRY_LIMIT_EXCEEDED_FAILURE, - "All retry attempts failed."); + String.format("All %d retry attempts failed.", retryAttempt - 1)); } private void ensureUploadThreadStarted() { @@ -723,9 +717,12 @@ private static boolean shouldRetryStatus(Status status) { && !status.getCode().equals(Code.FAILED_PRECONDITION); } - private static long retrySleepMillis(int attempt) { + private long retrySleepMillis(int attempt) { + Preconditions.checkArgument(attempt >= 0, "attempt must be nonnegative: %s", attempt); // This somewhat matches the backoff used for gRPC connection backoffs. - return (long) (DELAY_MILLIS * Math.pow(1.6, attempt)); + return (long) + (this.buildEventProtocolOptions.besUploadRetryInitialDelay.toMillis() + * Math.pow(1.6, attempt)); } private DetailedStatusException withFailureDetail( diff --git a/src/main/java/com/google/devtools/build/lib/buildeventstream/BuildEventProtocolOptions.java b/src/main/java/com/google/devtools/build/lib/buildeventstream/BuildEventProtocolOptions.java index 0333f0c30bb99b..76c3fa0d3b5023 100644 --- a/src/main/java/com/google/devtools/build/lib/buildeventstream/BuildEventProtocolOptions.java +++ b/src/main/java/com/google/devtools/build/lib/buildeventstream/BuildEventProtocolOptions.java @@ -18,6 +18,7 @@ import com.google.devtools.common.options.OptionDocumentationCategory; import com.google.devtools.common.options.OptionEffectTag; import com.google.devtools.common.options.OptionsBase; +import java.time.Duration; /** Options used to configure the build event protocol. */ public class BuildEventProtocolOptions extends OptionsBase { @@ -34,14 +35,31 @@ public class BuildEventProtocolOptions extends OptionsBase { public boolean legacyImportantOutputs; @Option( - name = "experimental_build_event_upload_strategy", - defaultValue = "null", - documentationCategory = OptionDocumentationCategory.LOGGING, - effectTags = {OptionEffectTag.AFFECTS_OUTPUTS}, - help = "Selects how to upload artifacts referenced in the build event protocol." - ) + name = "experimental_build_event_upload_strategy", + defaultValue = "null", + documentationCategory = OptionDocumentationCategory.LOGGING, + effectTags = {OptionEffectTag.AFFECTS_OUTPUTS}, + help = "Selects how to upload artifacts referenced in the build event protocol.") public String buildEventUploadStrategy; + @Option( + name = "experimental_build_event_upload_max_retries", + defaultValue = "4", + documentationCategory = OptionDocumentationCategory.LOGGING, + effectTags = {OptionEffectTag.BAZEL_INTERNAL_CONFIGURATION}, + help = "The maximum number of times Bazel should retry uploading a build event.") + public int besUploadMaxRetries; + + @Option( + name = "experimental_build_event_upload_retry_minimum_delay", + defaultValue = "1s", + documentationCategory = OptionDocumentationCategory.LOGGING, + effectTags = {OptionEffectTag.BAZEL_INTERNAL_CONFIGURATION}, + help = + "Initial, minimum delay for exponential backoff retries when BEP upload fails. (exponent:" + + " 1.6)") + public Duration besUploadRetryInitialDelay; + @Option( name = "experimental_stream_log_file_uploads", defaultValue = "false", diff --git a/src/test/java/com/google/devtools/build/lib/buildeventservice/BazelBuildEventServiceModuleTest.java b/src/test/java/com/google/devtools/build/lib/buildeventservice/BazelBuildEventServiceModuleTest.java index 8063ae0ab239d0..50b5433da0c635 100644 --- a/src/test/java/com/google/devtools/build/lib/buildeventservice/BazelBuildEventServiceModuleTest.java +++ b/src/test/java/com/google/devtools/build/lib/buildeventservice/BazelBuildEventServiceModuleTest.java @@ -232,6 +232,16 @@ public void testCreatesStreamerForBesTransport() throws Exception { .isInstanceOf(BuildEventServiceTransport.class); } + @Test + public void testRetryCount() throws Exception { + runBuildWithOptions( + "--bes_backend=does.not.exist:1234", "--experimental_build_event_upload_max_retries=3"); + afterBuildCommand(); + + events.assertContainsError( + "The Build Event Protocol upload failed: All 3 retry attempts failed"); + } + @Test public void testConnectivityFailureDisablesBesStreaming() throws Exception { class FailingConnectivityStatusProvider extends BlazeModule