From 3d11a18054c7b13fdcada64956df6ab66f0c13ff Mon Sep 17 00:00:00 2001 From: Vladislav Kruglikov Date: Mon, 9 Sep 2024 09:53:55 +0000 Subject: [PATCH 1/4] Create initial commit --- vllm/model_executor/models/commandr.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index be7f19d15b623..5c296a721c0f7 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -47,6 +47,8 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.sequence import IntermediateTensors +from .interfaces import SupportsLoRA + @torch.compile def layer_norm_func(hidden_states, weight, variance_epsilon): @@ -292,7 +294,8 @@ def forward( return hidden_states -class CohereForCausalLM(nn.Module): +class CohereForCausalLM(nn.Module, SupportsLoRA): + supports_lora = True packed_modules_mapping = { "qkv_proj": [ From 2830c7afda5a2c1b65a160742c7d38f993dd61b4 Mon Sep 17 00:00:00 2001 From: Vladislav Kruglikov Date: Mon, 9 Sep 2024 12:45:05 +0000 Subject: [PATCH 2/4] Correct adapter usage for jamba model --- vllm/model_executor/models/jamba.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 73be7ffed0f89..578fc18ebae9c 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -37,6 +37,8 @@ from vllm.sequence import IntermediateTensors from vllm.worker.model_runner import (_BATCH_SIZES_TO_CAPTURE, _get_graph_batch_size) +from .interfaces import SupportsLoRA + KVCache = Tuple[torch.Tensor, torch.Tensor] @@ -539,7 +541,9 @@ def forward( return hidden_states -class JambaForCausalLM(nn.Module, HasInnerState): +class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA): + supports_lora = True + packed_modules_mapping = { "qkv_proj": [ "q_proj", From c5c91135a8fec7054638bdfed339a825696faf2b Mon Sep 17 00:00:00 2001 From: Vladislav Kruglikov Date: Mon, 9 Sep 2024 12:49:06 +0000 Subject: [PATCH 3/4] Make pretty --- vllm/model_executor/models/jamba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 578fc18ebae9c..a9b5d7b3b38b9 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -37,8 +37,8 @@ from vllm.sequence import IntermediateTensors from vllm.worker.model_runner import (_BATCH_SIZES_TO_CAPTURE, _get_graph_batch_size) -from .interfaces import SupportsLoRA +from .interfaces import SupportsLoRA KVCache = Tuple[torch.Tensor, torch.Tensor] From dfd8cfd9790e4db1529fb7a985b9c1856ef608da Mon Sep 17 00:00:00 2001 From: Vladislav Kruglikov Date: Mon, 9 Sep 2024 13:09:44 +0000 Subject: [PATCH 4/4] Remove duplicate code --- vllm/model_executor/models/commandr.py | 2 -- vllm/model_executor/models/jamba.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index 5c296a721c0f7..649dc798d22dc 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -295,8 +295,6 @@ def forward( class CohereForCausalLM(nn.Module, SupportsLoRA): - supports_lora = True - packed_modules_mapping = { "qkv_proj": [ "q_proj", diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index a9b5d7b3b38b9..29dd09afac5ad 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -542,8 +542,6 @@ def forward( class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA): - supports_lora = True - packed_modules_mapping = { "qkv_proj": [ "q_proj",