From 505e3361479e83574cac51d6a61b40ff17c533f4 Mon Sep 17 00:00:00 2001
From: Cameron Hart <cameron.hart@gmail.com>
Date: Sun, 3 Dec 2023 22:01:48 +1300
Subject: [PATCH] Use BVec4A as the Vec4 mask type even when SIMD is
 unavailable.

When the scalar-math feature is enabled BVec4 is still used.
---
 codegen/templates/vec.rs.tera | 16 ++++++++++-----
 src/f32/coresimd/vec4.rs      |  4 +++-
 src/f32/scalar/vec4.rs        | 37 ++++++++++++++++++++---------------
 src/f32/sse2/vec4.rs          |  4 +++-
 src/f32/wasm32/vec4.rs        |  4 +++-
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/codegen/templates/vec.rs.tera b/codegen/templates/vec.rs.tera
index 6938f557..41e470f5 100644
--- a/codegen/templates/vec.rs.tera
+++ b/codegen/templates/vec.rs.tera
@@ -2,9 +2,8 @@
 
 // Generated from {{template_path}} template. Edit the template, not the generated file.
 
-{% if is_scalar %}
-    {% set mask_t = "BVec" ~ dim %}
-{% else %}
+{% set mask_t = "BVec" ~ dim ~ "A" %}
+{% if not is_scalar %}
     {% set is_simd = true %}
     {% if is_sse2 %}
         {% set simd_t = "__m128" %}
@@ -13,7 +12,6 @@
     {% elif is_coresimd %}
         {% set simd_t = "f32x4" %}
     {% endif %}
-    {% set mask_t = "BVec" ~ dim ~ "A" %}
 {% endif %}
 
 {% if scalar_t == "f32" or scalar_t == "f64" %}
@@ -120,8 +118,16 @@
     {% set zero = "0" %}
 {% endif %}
 
+{% if mask_t == "BVec4A" and is_scalar %}
+    #[cfg(feature = "scalar-math")]
+    use crate::BVec4 as BVec4A;
+    #[cfg(not(feature = "scalar-math"))]
+    use crate::BVec4A;
+{% else %}
+    use crate::{{ mask_t }};
+{% endif %}
+
 use crate::{
-    {{ mask_t }},
     {% if self_t != vec2_t %}
         {{ vec2_t }},
     {% endif %}
diff --git a/src/f32/coresimd/vec4.rs b/src/f32/coresimd/vec4.rs
index bbfa0d5a..dcf29722 100644
--- a/src/f32/coresimd/vec4.rs
+++ b/src/f32/coresimd/vec4.rs
@@ -1,6 +1,8 @@
 // Generated from vec.rs.tera template. Edit the template, not the generated file.
 
-use crate::{coresimd::*, f32::math, BVec4A, Vec2, Vec3, Vec3A};
+use crate::BVec4A;
+
+use crate::{coresimd::*, f32::math, Vec2, Vec3, Vec3A};
 
 #[cfg(not(target_arch = "spirv"))]
 use core::fmt;
diff --git a/src/f32/scalar/vec4.rs b/src/f32/scalar/vec4.rs
index 450d8b78..ecc92e2c 100644
--- a/src/f32/scalar/vec4.rs
+++ b/src/f32/scalar/vec4.rs
@@ -1,6 +1,11 @@
 // Generated from vec.rs.tera template. Edit the template, not the generated file.
 
-use crate::{f32::math, BVec4, Vec2, Vec3, Vec3A};
+#[cfg(feature = "scalar-math")]
+use crate::BVec4 as BVec4A;
+#[cfg(not(feature = "scalar-math"))]
+use crate::BVec4A;
+
+use crate::{f32::math, Vec2, Vec3, Vec3A};
 
 #[cfg(not(target_arch = "spirv"))]
 use core::fmt;
@@ -109,7 +114,7 @@ impl Vec4 {
     /// A true element in the mask uses the corresponding element from `if_true`, and false
     /// uses the element from `if_false`.
     #[inline]
-    pub fn select(mask: BVec4, if_true: Self, if_false: Self) -> Self {
+    pub fn select(mask: BVec4A, if_true: Self, if_false: Self) -> Self {
         Self {
             x: if mask.x { if_true.x } else { if_false.x },
             y: if mask.y { if_true.y } else { if_false.y },
@@ -237,8 +242,8 @@ impl Vec4 {
     /// In other words, this computes `[self.x == rhs.x, self.y == rhs.y, ..]` for all
     /// elements.
     #[inline]
-    pub fn cmpeq(self, rhs: Self) -> BVec4 {
-        BVec4::new(
+    pub fn cmpeq(self, rhs: Self) -> BVec4A {
+        BVec4A::new(
             self.x.eq(&rhs.x),
             self.y.eq(&rhs.y),
             self.z.eq(&rhs.z),
@@ -252,8 +257,8 @@ impl Vec4 {
     /// In other words this computes `[self.x != rhs.x, self.y != rhs.y, ..]` for all
     /// elements.
     #[inline]
-    pub fn cmpne(self, rhs: Self) -> BVec4 {
-        BVec4::new(
+    pub fn cmpne(self, rhs: Self) -> BVec4A {
+        BVec4A::new(
             self.x.ne(&rhs.x),
             self.y.ne(&rhs.y),
             self.z.ne(&rhs.z),
@@ -267,8 +272,8 @@ impl Vec4 {
     /// In other words this computes `[self.x >= rhs.x, self.y >= rhs.y, ..]` for all
     /// elements.
     #[inline]
-    pub fn cmpge(self, rhs: Self) -> BVec4 {
-        BVec4::new(
+    pub fn cmpge(self, rhs: Self) -> BVec4A {
+        BVec4A::new(
             self.x.ge(&rhs.x),
             self.y.ge(&rhs.y),
             self.z.ge(&rhs.z),
@@ -282,8 +287,8 @@ impl Vec4 {
     /// In other words this computes `[self.x > rhs.x, self.y > rhs.y, ..]` for all
     /// elements.
     #[inline]
-    pub fn cmpgt(self, rhs: Self) -> BVec4 {
-        BVec4::new(
+    pub fn cmpgt(self, rhs: Self) -> BVec4A {
+        BVec4A::new(
             self.x.gt(&rhs.x),
             self.y.gt(&rhs.y),
             self.z.gt(&rhs.z),
@@ -297,8 +302,8 @@ impl Vec4 {
     /// In other words this computes `[self.x <= rhs.x, self.y <= rhs.y, ..]` for all
     /// elements.
     #[inline]
-    pub fn cmple(self, rhs: Self) -> BVec4 {
-        BVec4::new(
+    pub fn cmple(self, rhs: Self) -> BVec4A {
+        BVec4A::new(
             self.x.le(&rhs.x),
             self.y.le(&rhs.y),
             self.z.le(&rhs.z),
@@ -312,8 +317,8 @@ impl Vec4 {
     /// In other words this computes `[self.x < rhs.x, self.y < rhs.y, ..]` for all
     /// elements.
     #[inline]
-    pub fn cmplt(self, rhs: Self) -> BVec4 {
-        BVec4::new(
+    pub fn cmplt(self, rhs: Self) -> BVec4A {
+        BVec4A::new(
             self.x.lt(&rhs.x),
             self.y.lt(&rhs.y),
             self.z.lt(&rhs.z),
@@ -387,8 +392,8 @@ impl Vec4 {
     ///
     /// In other words, this computes `[x.is_nan(), y.is_nan(), z.is_nan(), w.is_nan()]`.
     #[inline]
-    pub fn is_nan_mask(self) -> BVec4 {
-        BVec4::new(
+    pub fn is_nan_mask(self) -> BVec4A {
+        BVec4A::new(
             self.x.is_nan(),
             self.y.is_nan(),
             self.z.is_nan(),
diff --git a/src/f32/sse2/vec4.rs b/src/f32/sse2/vec4.rs
index 6e406f1f..3d0246a6 100644
--- a/src/f32/sse2/vec4.rs
+++ b/src/f32/sse2/vec4.rs
@@ -1,6 +1,8 @@
 // Generated from vec.rs.tera template. Edit the template, not the generated file.
 
-use crate::{f32::math, sse2::*, BVec4A, Vec2, Vec3, Vec3A};
+use crate::BVec4A;
+
+use crate::{f32::math, sse2::*, Vec2, Vec3, Vec3A};
 
 #[cfg(not(target_arch = "spirv"))]
 use core::fmt;
diff --git a/src/f32/wasm32/vec4.rs b/src/f32/wasm32/vec4.rs
index 7d98c7db..63ea0d57 100644
--- a/src/f32/wasm32/vec4.rs
+++ b/src/f32/wasm32/vec4.rs
@@ -1,6 +1,8 @@
 // Generated from vec.rs.tera template. Edit the template, not the generated file.
 
-use crate::{f32::math, wasm32::*, BVec4A, Vec2, Vec3, Vec3A};
+use crate::BVec4A;
+
+use crate::{f32::math, wasm32::*, Vec2, Vec3, Vec3A};
 
 #[cfg(not(target_arch = "spirv"))]
 use core::fmt;