No f32 attn in text selfattn

EricLBuehler · Oct 24, 2024 · 5b627f1 · 5b627f1
1 parent f34fdb9
commit 5b627f1
Showing 1 changed file with 4 additions and 6 deletions.
diff --git a/mistralrs-core/src/vision_models/mllama/text.rs b/mistralrs-core/src/vision_models/mllama/text.rs
@@ -206,12 +206,10 @@ impl MLlamaTextSelfAttention {
 
         let mut attn_output = Sdpa
             .run_attention(
-                &q.contiguous()?.to_dtype(DType::F32)?,
-                &k.contiguous()?.to_dtype(DType::F32)?,
-                &v.contiguous()?.to_dtype(DType::F32)?,
-                attention_mask
-                    .map(|m| m.to_dtype(DType::F32).unwrap())
-                    .as_ref(),
+                &q.contiguous()?,
+                &k.contiguous()?,
+                &v.contiguous()?,
+                attention_mask,
                 None,
                 &self.sdpa_params,
             )?