diff --git a/tests/entrypoints/llm/test_encode.py b/tests/entrypoints/llm/test_encode.py index 41163809237e9..3906ad766e0b6 100644 --- a/tests/entrypoints/llm/test_encode.py +++ b/tests/entrypoints/llm/test_encode.py @@ -105,3 +105,10 @@ def test_multiple_pooling_params(llm: LLM): # pooling_params is None, default params should be applied outputs = llm.encode(PROMPTS, pooling_params=None) assert len(PROMPTS) == len(outputs) + + +@pytest.mark.skip_global_cleanup +def test_right_side_truncation(llm: LLM): + # Embeddings models should truncate the end of the prompt + tokenizer = llm.get_tokenizer() + assert tokenizer.truncation_side == "right" diff --git a/vllm/config.py b/vllm/config.py index 65cb0d85f172a..4ffc13a05e026 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -357,6 +357,10 @@ def __init__(self, supported_tasks, task = self._resolve_task(task, self.hf_config) self.supported_tasks = supported_tasks self.task: Final = task + if self.task in ("draft", "generate"): + self.truncation_side = "left" + else: + self.truncation_side = "right" self.pooler_config = self._init_pooler_config(override_pooler_config) self.logits_processor_pattern = logits_processor_pattern diff --git a/vllm/transformers_utils/tokenizer_group/__init__.py b/vllm/transformers_utils/tokenizer_group/__init__.py index d400276796996..09569c564a58d 100644 --- a/vllm/transformers_utils/tokenizer_group/__init__.py +++ b/vllm/transformers_utils/tokenizer_group/__init__.py @@ -24,7 +24,8 @@ def init_tokenizer_from_configs(model_config: ModelConfig, max_input_length=None, tokenizer_mode=model_config.tokenizer_mode, trust_remote_code=model_config.trust_remote_code, - revision=model_config.tokenizer_revision) + revision=model_config.tokenizer_revision, + truncation_side=model_config.truncation_side) return get_tokenizer_group(parallel_config.tokenizer_pool_config, **init_kwargs)