Skip to content

Commit

Permalink
Fix model loading
Browse files Browse the repository at this point in the history
  • Loading branch information
WoosukKwon committed Jul 15, 2024
1 parent 976de20 commit ec15140
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 7 deletions.
6 changes: 3 additions & 3 deletions vllm/model_executor/layers/fused_moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def forward_cuda(
router_logits: torch.Tensor,
top_k: int,
renormalize: bool,
use_grouped_topk: bool = False,
num_expert_group: Optional[int] = None,
topk_group: Optional[int] = None,
use_grouped_topk: bool,
num_expert_group: Optional[int],
topk_group: Optional[int],
) -> torch.Tensor:
from vllm.model_executor.layers.fused_moe.fused_moe import fused_moe
return fused_moe(x,
Expand Down
4 changes: 0 additions & 4 deletions vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,6 @@ def load_model(self, *, model_config: ModelConfig,
quant_method = getattr(module, "quant_method", None)
if quant_method is not None:
quant_method.process_weights_after_loading(module)
# FIXME: Remove this after Mixtral is updated
# to use quant_method.
if hasattr(module, "process_weights_after_loading"):
module.process_weights_after_loading()
return model.eval()


Expand Down

0 comments on commit ec15140

Please sign in to comment.