Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] Support Lora lineage and base model metadata management #6315

Merged
merged 7 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix unit tests reference on served_model_names
  • Loading branch information
Jeffwan committed Sep 19, 2024
commit 0b8b2cacfbeb0b34ef08a47a4361a5917b83364f
7 changes: 5 additions & 2 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.transformers_utils.tokenizer import get_tokenizer

MODEL_NAME = "openai-community/gpt2"
CHAT_TEMPLATE = "Dummy chat template for testing {}"

BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]


@dataclass
class MockModelConfig:
Expand All @@ -37,7 +40,7 @@ async def _async_serving_chat_init():

serving_completion = OpenAIServingChat(engine,
model_config,
served_model_names=[MODEL_NAME],
BASE_MODEL_PATHS,
response_role="assistant",
chat_template=CHAT_TEMPLATE,
lora_modules=None,
Expand All @@ -58,7 +61,7 @@ def test_serving_chat_should_set_correct_max_tokens():

serving_chat = OpenAIServingChat(mock_engine,
MockModelConfig(),
served_model_names=[MODEL_NAME],
BASE_MODEL_PATHS,
response_role="assistant",
chat_template=CHAT_TEMPLATE,
lora_modules=None,
Expand Down
7 changes: 6 additions & 1 deletion vllm/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# yapf: enable
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, random_uuid
from vllm.version import __version__ as VLLM_VERSION
Expand Down Expand Up @@ -196,6 +197,10 @@ async def main(args):
engine_args, usage_context=UsageContext.OPENAI_BATCH_RUNNER)

model_config = await engine.get_model_config()
base_model_paths = [
BaseModelPath(name=name, model_path=args.model)
for name in served_model_names
]

if args.disable_log_requests:
request_logger = None
Expand All @@ -206,7 +211,7 @@ async def main(args):
openai_serving_chat = OpenAIServingChat(
engine,
model_config,
served_model_names,
base_model_paths,
args.response_role,
lora_modules=None,
prompt_adapters=None,
Expand Down
1 change: 0 additions & 1 deletion vllm/entrypoints/openai/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,4 +495,3 @@ async def unload_lora_adapter(

def _is_model_supported(self, model_name):
return any(model.name == model_name for model in self.base_model_paths)