Skip tokenize/detokenize when it is disabled by arg --skip-tokenizer-…

…init (#367) * switching detokenize flag to be False * detokenize = False for benchmarks * restoring default in main vllm code for detokenize * removing extra spaces * moving detokenize to flag * adding support for token ids --------- Co-authored-by: maleksan85 <maleksan@amd.com>
ROCm · Jan 22, 2025 · b5839a1 · b5839a1
1 parent 78d7d30
commit b5839a1
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/benchmarks/profiling/benchmark_throughput.py b/benchmarks/profiling/benchmark_throughput.py
@@ -272,7 +272,8 @@ def main(args: argparse.Namespace):
         args.tokenizer, trust_remote_code=args.trust_remote_code)
     if args.dataset is None:
         # Synthesize a prompt with the given input length.
-        prompt = "hi" * (args.input_len - 1)
+        prompt = { "prompt_token_ids" : [42] * (args.input_len - 1) } \
+            if args.skip_tokenizer_init else "hi" * (args.input_len - 1)
         requests = [(prompt, args.input_len, args.output_len)
                     for _ in range(args.num_prompts)]
     else:

diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py
@@ -144,7 +144,7 @@ def process_outputs(self,
     def _process_decode_and_stop(self, seq: Sequence,
                                  sampling_params: SamplingParams) -> None:
         new_char_count = 0
-        if sampling_params.detokenize:
+        if sampling_params.detokenize and self.detokenizer:
             new_char_count = self.detokenizer.decode_sequence_inplace(
                 seq, sampling_params)