From 4cdfa2e94003014640d0034122faab5400898dd1 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Mon, 5 Aug 2024 15:07:45 -0700 Subject: [PATCH] from_numpy --- vllm/attention/backends/flash_attn.py | 2 +- vllm/attention/backends/flashinfer.py | 2 +- vllm/attention/backends/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 5b733ba624e62..8a895bbdc2dd7 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -310,7 +310,7 @@ def build(self, seq_lens: List[int], query_lens: List[int], for i, block_table in enumerate(self.block_tables): if block_table: input_block_tables[i, :len(block_table)] = block_table - block_tables = torch.tensor(input_block_tables).to( + block_tables = torch.from_numpy(input_block_tables).to( device=device, non_blocking=True) else: block_tables = make_tensor_with_pad( diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py index 77447a272a6c8..03188164a9637 100644 --- a/vllm/attention/backends/flashinfer.py +++ b/vllm/attention/backends/flashinfer.py @@ -357,7 +357,7 @@ def build(self, seq_lens: List[int], query_lens: List[int], for i, block_table in enumerate(self.block_tables): if block_table: input_block_tables[i, :len(block_table)] = block_table - block_tables = torch.tensor(input_block_tables).to( + block_tables = torch.from_numpy(input_block_tables).to( device, non_blocking=True) last_paged_kv_indptr = self.paged_kv_indptr[-1] diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 7302dad2e3885..f7cb2ee996501 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -181,7 +181,7 @@ def build(self, seq_lens: List[int], query_lens: List[int], for i, block_table in enumerate(self.block_tables): if block_table: input_block_tables[i, :len(block_table)] = block_table - block_tables = torch.tensor(input_block_tables).to( + block_tables = torch.from_numpy(input_block_tables).to( device, non_blocking=True) else: block_tables = make_tensor_with_pad(