Removing the contiguous requirement, as the kernel supports arbitrary…

… strides Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
vllm-project · Feb 13, 2025 · 2205c07 · 2205c07
1 parent bbab81f
commit 2205c07
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -477,7 +477,7 @@ def w8a8_block_fp8_matmul(
     assert triton.cdiv(A.shape[-1], block_k) == As.shape[-1]
     M = A.numel() // A.shape[-1]
 
-    assert B.ndim == 2 and B.is_contiguous() and Bs.ndim == 2
+    assert B.ndim == 2 and Bs.ndim == 2
     N, K = B.shape
     assert triton.cdiv(N, block_n) == Bs.shape[0]
     assert triton.cdiv(K, block_k) == Bs.shape[1]