[serve][doc] update serve vllm openai example for latest vllm version (…

…#50192)   ## Why are these changes needed?  https://docs.ray.io/en/latest/serve/tutorials/vllm-example.html - Currently doesn't work out of the box for the latest vllm versions. ## Related issue number N/A  ## Checks - [x] I've signed off every commit(by using the -s flag, i.e., `git commit -s`) in this PR. - [x] I've run `scripts/format.sh` to lint the changes in this PR. - [x] I've included any doc changes needed for https://docs.ray.io/en/master/. - [ ] I've added any new APIs to the API Reference. For example, if I added a method in Tune, I've added it in `doc/source/tune/api/` under the corresponding `.rst` file. - [x] I've made sure the tests are passing. Note that there might be a few flaky tests, see the recent failures at https://flakey-tests.ray.io/ - Testing Strategy - [ ] Unit tests - [ ] Release tests - [x] This PR is not tested :( --------- Signed-off-by: Eric Tang <erictang000@gmail.com>
ray-project · Feb 6, 2025 · f336c59 · f336c59
1 parent 403096a
commit f336c59
Showing 1 changed file with 20 additions and 8 deletions.
diff --git a/doc/source/serve/doc_code/vllm_openai_example.py b/doc/source/serve/doc_code/vllm_openai_example.py
@@ -17,7 +17,13 @@
     ErrorResponse,
 )
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
-from vllm.entrypoints.openai.serving_engine import LoRAModulePath, PromptAdapterPath
+from vllm.entrypoints.openai.serving_models import (
+    BaseModelPath,
+    LoRAModulePath,
+    PromptAdapterPath,
+    OpenAIServingModels,
+)
+
 from vllm.utils import FlexibleArgumentParser
 from vllm.entrypoints.logger import RequestLogger
 
@@ -67,19 +73,25 @@ async def create_chat_completion(
         if not self.openai_serving_chat:
             model_config = await self.engine.get_model_config()
             # Determine the name of the served model for the OpenAI client.
-            if self.engine_args.served_model_name is not None:
-                served_model_names = self.engine_args.served_model_name
-            else:
-                served_model_names = [self.engine_args.model]
-            self.openai_serving_chat = OpenAIServingChat(
+            models = OpenAIServingModels(
                 self.engine,
                 model_config,
-                served_model_names,
-                self.response_role,
+                [
+                    BaseModelPath(
+                        name=self.engine_args.model, model_path=self.engine_args.model
+                    )
+                ],
                 lora_modules=self.lora_modules,
                 prompt_adapters=self.prompt_adapters,
+            )
+            self.openai_serving_chat = OpenAIServingChat(
+                self.engine,
+                model_config,
+                models,
+                self.response_role,
                 request_logger=self.request_logger,
                 chat_template=self.chat_template,
+                chat_template_content_format="auto",
             )
         logger.info(f"Request: {request}")
         generator = await self.openai_serving_chat.create_chat_completion(