From 8bf54f72ba905154920f0662b1aaf6459a5ec0c0 Mon Sep 17 00:00:00 2001 From: Tyler Osterberg Date: Fri, 9 Feb 2024 13:58:21 -0800 Subject: [PATCH] [ci] Fix gpt-j timeout issues in inf2 integration (#1535) --- tests/integration/llm/prepare.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 7246d458d..c656e15f9 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -381,8 +381,7 @@ "option.tensor_parallel_degree": 2, "option.n_positions": 512, "option.dtype": "fp16", - "option.model_loading_timeout": 600, - "option.enable_streaming": False + "option.model_loading_timeout": 600 }, "gpt2-quantize": { "option.model_id": "gpt2", @@ -391,8 +390,7 @@ "option.n_positions": 512, "option.dtype": "fp16", "option.model_loading_timeout": 600, - "option.quantize": "static_int8", - "option.enable_streaming": False + "option.quantize": "static_int8" }, "opt-1.3b": { "option.model_id": "s3://djl-llm/opt-1.3b/", @@ -400,17 +398,15 @@ "option.tensor_parallel_degree": 4, "option.n_positions": 512, "option.dtype": "fp16", - "option.model_loading_timeout": 600, - "option.enable_streaming": False + "option.model_loading_timeout": 600 }, "gpt-j-6b": { "option.model_id": "s3://djl-llm/gpt-j-6b/", "batch_size": 4, "option.tensor_parallel_degree": 8, - "option.n_positions": 1024, + "option.n_positions": 512, "option.dtype": "fp32", - "option.model_loading_timeout": 900, - "option.enable_streaming": False + "option.model_loading_timeout": 1200 }, "pythia-2.8b": { "option.model_id": "s3://djl-llm/pythia-2.8b/", @@ -418,8 +414,7 @@ "option.tensor_parallel_degree": 2, "option.n_positions": 512, "option.dtype": "fp16", - "option.model_loading_timeout": 900, - "option.enable_streaming": False + "option.model_loading_timeout": 900 }, "open-llama-7b": { "option.model_id": "s3://djl-llm/open-llama-7b/", @@ -428,8 +423,7 @@ "option.n_positions": 512, "option.dtype": "fp16", "option.neuron_optimize_level": 1, - "option.model_loading_timeout": 1200, - "option.enable_streaming": False + "option.model_loading_timeout": 1200 }, "bloom-7b1": { "option.model_id": "s3://djl-llm/bloom-7b1/", @@ -437,8 +431,7 @@ "option.tensor_parallel_degree": 4, "option.n_positions": 256, "option.dtype": "fp16", - "option.model_loading_timeout": 720, - "option.enable_streaming": False + "option.model_loading_timeout": 720 }, "llama-7b-split": { "option.model_id": "s3://djl-llm/llama-2-7b-split-inf2/split-model/",