diff --git a/mistralrs-core/src/vision_models/phi3_inputs_processor.rs b/mistralrs-core/src/vision_models/phi3_inputs_processor.rs
index 6b66af3bc9..4a80562320 100644
--- a/mistralrs-core/src/vision_models/phi3_inputs_processor.rs
+++ b/mistralrs-core/src/vision_models/phi3_inputs_processor.rs
@@ -379,6 +379,15 @@ impl ImagePreProcessor for Phi3InputsProcessor {
 
             let hd_image = Self::hd_transform(image, config.num_crops.expect("Need `num_crops`"));
 
+            let transforms_hd2 = Transforms {
+                input: &ToTensor,
+                inner_transforms: &[],
+            };
+
+            // (3,h,w)
+            let hd_image2 = hd_image.apply(transforms_hd2, device)?;
+            dbg!(hd_image2);
+
             // Both hd and global have a normalization
             // Transforms for the HD image
             let transforms_hd = Transforms {
@@ -394,7 +403,7 @@ impl ImagePreProcessor for Phi3InputsProcessor {
 
             // Resize with bicubic interpolation
             // (3,336,336)
-            let global_image = hd_image.unsqueeze(0)?.interpolate2d(336, 336)?.squeeze(0)?;
+            let global_image = hd_image.unsqueeze(0)?.interpolate2d(336, 336)?;
 
             let (_, h, w) = hd_image.dims3()?;
             let num_image_tokens = ((h as f32 / 336. * w as f32 / 336. + 1.) * 144.
diff --git a/mistralrs-vision/src/transforms.rs b/mistralrs-vision/src/transforms.rs
index a4aeb27961..f6c462a5a1 100644
--- a/mistralrs-vision/src/transforms.rs
+++ b/mistralrs-vision/src/transforms.rs
@@ -20,7 +20,7 @@ impl ToTensor {
                 )
             }
             let row = Tensor::cat(&row_accum, 0)?;
-            accum.push(row.reshape((row.dim(1)?, ()))?.unsqueeze(1)?);
+            accum.push(row.t()?.unsqueeze(1)?);
         }
         let t = Tensor::cat(&accum, 1)?.to_device(device)?;
         // Rescale to between 0 and 1