diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 5b4a786305e1f..be8807df0b098 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -140,14 +140,13 @@ steps: working_dir: "/vllm-workspace/examples" mirror_hardwares: [amd] commands: - # install aws cli for llava_example.py # install tensorizer for tensorize_vllm_model.py - pip install awscli tensorizer - python3 offline_inference.py - python3 cpu_offload.py - python3 offline_inference_with_prefix.py - python3 llm_engine_example.py - - python3 llava_example.py + - python3 offline_inference_vision_language.py - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors - label: Inputs Test diff --git a/docs/source/models/vlm.rst b/docs/source/models/vlm.rst index ef4ce0d44a162..a385605c9f8f6 100644 --- a/docs/source/models/vlm.rst +++ b/docs/source/models/vlm.rst @@ -73,7 +73,7 @@ To pass an image to the model, note the following in :class:`vllm.inputs.PromptI generated_text = o.outputs[0].text print(generated_text) -A code example can be found in `examples/llava_example.py `_. +A code example can be found in `examples/offline_inference_vision_language.py `_. Online OpenAI Vision API Compatible Inference diff --git a/examples/api_client.py b/examples/api_client.py index 27a2a08b7b0c3..49a085febdc57 100644 --- a/examples/api_client.py +++ b/examples/api_client.py @@ -31,7 +31,10 @@ def post_http_request(prompt: str, "max_tokens": 16, "stream": stream, } - response = requests.post(api_url, headers=headers, json=pload, stream=True) + response = requests.post(api_url, + headers=headers, + json=pload, + stream=stream) return response