From df70e3258c0ff18b5cf33fc4cfefc8aee6039a10 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 6 May 2024 14:25:01 +0000 Subject: [PATCH 1/2] Increase flake threshold for hotrestart_handoff_test Signed-off-by: Raven Black --- test/integration/python/hotrestart_handoff_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/python/hotrestart_handoff_test.py b/test/integration/python/hotrestart_handoff_test.py index 9627b24b2696..4b7e3b6298e3 100644 --- a/test/integration/python/hotrestart_handoff_test.py +++ b/test/integration/python/hotrestart_handoff_test.py @@ -33,7 +33,7 @@ def random_loopback_host(): # This is a timeout that must be long enough that the hot restarted # instance can reliably be fully started up within this many seconds, or the # test will be flaky. 3 seconds is enough on a not-busy host with a non-tsan -# non-coverage build; 10 seconds should be enough to be not flaky in most +# non-coverage build; 15 seconds should be enough to be not flaky in most # configurations. # # Unfortunately, because the test is verifying the behavior of a connection @@ -44,7 +44,7 @@ def random_loopback_host(): # Ideally this would be adjusted (3x) for tsan and coverage runs, but making that # possible for python is outside the scope of this test, so we're stuck using the # 3x value for all tests. -STARTUP_TOLERANCE_SECONDS = 10 +STARTUP_TOLERANCE_SECONDS = 15 # We send multiple requests in parallel and require them all to function correctly # - this makes it so if something is flaky we're more likely to encounter it, and From 7668d1b85447c22de7b0efa79565836ffca48138 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 9 May 2024 17:43:36 +0000 Subject: [PATCH 2/2] Actually *lower* the value instead. Signed-off-by: Raven Black --- test/integration/python/BUILD | 4 +++- test/integration/python/hotrestart_handoff_test.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/test/integration/python/BUILD b/test/integration/python/BUILD index 8d824bbdad99..5f1e42517986 100644 --- a/test/integration/python/BUILD +++ b/test/integration/python/BUILD @@ -7,7 +7,9 @@ envoy_package() envoy_py_test( name = "hotrestart_handoff_test", - size = "medium", + # This is not actually large, but setting large makes the test skip for + # asan and tsan. + size = "large", srcs = select({ "//bazel:disable_hot_restart_or_admin": ["null_test.py"], "//conditions:default": ["hotrestart_handoff_test.py"], diff --git a/test/integration/python/hotrestart_handoff_test.py b/test/integration/python/hotrestart_handoff_test.py index 4b7e3b6298e3..913ed959940a 100644 --- a/test/integration/python/hotrestart_handoff_test.py +++ b/test/integration/python/hotrestart_handoff_test.py @@ -33,7 +33,7 @@ def random_loopback_host(): # This is a timeout that must be long enough that the hot restarted # instance can reliably be fully started up within this many seconds, or the # test will be flaky. 3 seconds is enough on a not-busy host with a non-tsan -# non-coverage build; 15 seconds should be enough to be not flaky in most +# non-coverage build; 6 seconds should be enough to be not flaky in most # configurations. # # Unfortunately, because the test is verifying the behavior of a connection @@ -41,10 +41,13 @@ def random_loopback_host(): # so increasing this value increases the duration of the test. For this # reason we want to keep it as low as possible without causing flaky failure. # -# Ideally this would be adjusted (3x) for tsan and coverage runs, but making that -# possible for python is outside the scope of this test, so we're stuck using the -# 3x value for all tests. -STARTUP_TOLERANCE_SECONDS = 15 +# If this goes longer than 10 seconds connections start timing out which +# causes the test to get stuck and time out. Unfortunately for tsan or asan +# runs the "long enough to start up" constraint fights with the "too long for +# connections to be idle" constraint. Ideally this test would be disabled for +# those slower test contexts, but we don't currently have infrastructure for +# that. +STARTUP_TOLERANCE_SECONDS = 6 # We send multiple requests in parallel and require them all to function correctly # - this makes it so if something is flaky we're more likely to encounter it, and