From e3965cf35aac00d4e24998c8a3d0093ae1d98bd3 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sun, 25 Feb 2024 22:48:33 +0100 Subject: [PATCH] server: tests - slow inference causes timeout on the CI (#5715) * server: tests - longer inference timeout for CI --- common/sampling.cpp | 2 +- examples/server/tests/features/steps/steps.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index de4331a1182d6..e67096bea6932 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -266,7 +266,7 @@ static llama_token llama_sampling_sample_impl( // } //} - LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str()); + //LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str()); } } diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 8e4babf204f8a..ad87fcb820aa8 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -699,6 +699,8 @@ async def wait_for_health_status(context, if context.debug: print(f"Starting checking for health for expected_health_status={expected_health_status}") timeout = 3 # seconds + if expected_health_status == 'ok': + timeout = 10 # CI slow inference interval = 0.5 counter = 0 async with aiohttp.ClientSession() as session: @@ -736,7 +738,7 @@ async def wait_for_health_status(context, if n_completions > 0: return - assert False, 'timeout exceeded' + assert False, f'{expected_health_status} timeout exceeded {counter}s>={timeout}' def assert_embeddings(embeddings):