Skip to content

Commit

Permalink
Update new chunked prefill distributed test to include non-Ray
Browse files Browse the repository at this point in the history
  • Loading branch information
njhill committed Apr 15, 2024
1 parent 1938c35 commit 56a1ad4
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tests/distributed/test_chunked_prefill_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [5])
@pytest.mark.parametrize("chunked_prefill_token_size", [16])
@pytest.mark.parametrize("worker_use_ray", [False, True])
def test_models(
hf_runner,
vllm_runner,
Expand All @@ -35,6 +36,7 @@ def test_models(
dtype: str,
max_tokens: int,
chunked_prefill_token_size: int,
worker_use_ray: bool,
) -> None:
# Add a chunked prefill config.
max_num_seqs = min(chunked_prefill_token_size, 256)
Expand All @@ -53,6 +55,7 @@ def test_models(
max_num_seqs=max_num_seqs,
enable_chunked_prefill=enable_chunked_prefill,
max_num_batched_tokens=max_num_batched_tokens,
worker_use_ray=worker_use_ray,
)
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
del vllm_model
Expand Down

0 comments on commit 56a1ad4

Please sign in to comment.