From 505e3361479e83574cac51d6a61b40ff17c533f4 Mon Sep 17 00:00:00 2001 From: Cameron Hart Date: Sun, 3 Dec 2023 22:01:48 +1300 Subject: [PATCH] Use BVec4A as the Vec4 mask type even when SIMD is unavailable. When the scalar-math feature is enabled BVec4 is still used. --- codegen/templates/vec.rs.tera | 16 ++++++++++----- src/f32/coresimd/vec4.rs | 4 +++- src/f32/scalar/vec4.rs | 37 ++++++++++++++++++++--------------- src/f32/sse2/vec4.rs | 4 +++- src/f32/wasm32/vec4.rs | 4 +++- 5 files changed, 41 insertions(+), 24 deletions(-) diff --git a/codegen/templates/vec.rs.tera b/codegen/templates/vec.rs.tera index 6938f557..41e470f5 100644 --- a/codegen/templates/vec.rs.tera +++ b/codegen/templates/vec.rs.tera @@ -2,9 +2,8 @@ // Generated from {{template_path}} template. Edit the template, not the generated file. -{% if is_scalar %} - {% set mask_t = "BVec" ~ dim %} -{% else %} +{% set mask_t = "BVec" ~ dim ~ "A" %} +{% if not is_scalar %} {% set is_simd = true %} {% if is_sse2 %} {% set simd_t = "__m128" %} @@ -13,7 +12,6 @@ {% elif is_coresimd %} {% set simd_t = "f32x4" %} {% endif %} - {% set mask_t = "BVec" ~ dim ~ "A" %} {% endif %} {% if scalar_t == "f32" or scalar_t == "f64" %} @@ -120,8 +118,16 @@ {% set zero = "0" %} {% endif %} +{% if mask_t == "BVec4A" and is_scalar %} + #[cfg(feature = "scalar-math")] + use crate::BVec4 as BVec4A; + #[cfg(not(feature = "scalar-math"))] + use crate::BVec4A; +{% else %} + use crate::{{ mask_t }}; +{% endif %} + use crate::{ - {{ mask_t }}, {% if self_t != vec2_t %} {{ vec2_t }}, {% endif %} diff --git a/src/f32/coresimd/vec4.rs b/src/f32/coresimd/vec4.rs index bbfa0d5a..dcf29722 100644 --- a/src/f32/coresimd/vec4.rs +++ b/src/f32/coresimd/vec4.rs @@ -1,6 +1,8 @@ // Generated from vec.rs.tera template. Edit the template, not the generated file. -use crate::{coresimd::*, f32::math, BVec4A, Vec2, Vec3, Vec3A}; +use crate::BVec4A; + +use crate::{coresimd::*, f32::math, Vec2, Vec3, Vec3A}; #[cfg(not(target_arch = "spirv"))] use core::fmt; diff --git a/src/f32/scalar/vec4.rs b/src/f32/scalar/vec4.rs index 450d8b78..ecc92e2c 100644 --- a/src/f32/scalar/vec4.rs +++ b/src/f32/scalar/vec4.rs @@ -1,6 +1,11 @@ // Generated from vec.rs.tera template. Edit the template, not the generated file. -use crate::{f32::math, BVec4, Vec2, Vec3, Vec3A}; +#[cfg(feature = "scalar-math")] +use crate::BVec4 as BVec4A; +#[cfg(not(feature = "scalar-math"))] +use crate::BVec4A; + +use crate::{f32::math, Vec2, Vec3, Vec3A}; #[cfg(not(target_arch = "spirv"))] use core::fmt; @@ -109,7 +114,7 @@ impl Vec4 { /// A true element in the mask uses the corresponding element from `if_true`, and false /// uses the element from `if_false`. #[inline] - pub fn select(mask: BVec4, if_true: Self, if_false: Self) -> Self { + pub fn select(mask: BVec4A, if_true: Self, if_false: Self) -> Self { Self { x: if mask.x { if_true.x } else { if_false.x }, y: if mask.y { if_true.y } else { if_false.y }, @@ -237,8 +242,8 @@ impl Vec4 { /// In other words, this computes `[self.x == rhs.x, self.y == rhs.y, ..]` for all /// elements. #[inline] - pub fn cmpeq(self, rhs: Self) -> BVec4 { - BVec4::new( + pub fn cmpeq(self, rhs: Self) -> BVec4A { + BVec4A::new( self.x.eq(&rhs.x), self.y.eq(&rhs.y), self.z.eq(&rhs.z), @@ -252,8 +257,8 @@ impl Vec4 { /// In other words this computes `[self.x != rhs.x, self.y != rhs.y, ..]` for all /// elements. #[inline] - pub fn cmpne(self, rhs: Self) -> BVec4 { - BVec4::new( + pub fn cmpne(self, rhs: Self) -> BVec4A { + BVec4A::new( self.x.ne(&rhs.x), self.y.ne(&rhs.y), self.z.ne(&rhs.z), @@ -267,8 +272,8 @@ impl Vec4 { /// In other words this computes `[self.x >= rhs.x, self.y >= rhs.y, ..]` for all /// elements. #[inline] - pub fn cmpge(self, rhs: Self) -> BVec4 { - BVec4::new( + pub fn cmpge(self, rhs: Self) -> BVec4A { + BVec4A::new( self.x.ge(&rhs.x), self.y.ge(&rhs.y), self.z.ge(&rhs.z), @@ -282,8 +287,8 @@ impl Vec4 { /// In other words this computes `[self.x > rhs.x, self.y > rhs.y, ..]` for all /// elements. #[inline] - pub fn cmpgt(self, rhs: Self) -> BVec4 { - BVec4::new( + pub fn cmpgt(self, rhs: Self) -> BVec4A { + BVec4A::new( self.x.gt(&rhs.x), self.y.gt(&rhs.y), self.z.gt(&rhs.z), @@ -297,8 +302,8 @@ impl Vec4 { /// In other words this computes `[self.x <= rhs.x, self.y <= rhs.y, ..]` for all /// elements. #[inline] - pub fn cmple(self, rhs: Self) -> BVec4 { - BVec4::new( + pub fn cmple(self, rhs: Self) -> BVec4A { + BVec4A::new( self.x.le(&rhs.x), self.y.le(&rhs.y), self.z.le(&rhs.z), @@ -312,8 +317,8 @@ impl Vec4 { /// In other words this computes `[self.x < rhs.x, self.y < rhs.y, ..]` for all /// elements. #[inline] - pub fn cmplt(self, rhs: Self) -> BVec4 { - BVec4::new( + pub fn cmplt(self, rhs: Self) -> BVec4A { + BVec4A::new( self.x.lt(&rhs.x), self.y.lt(&rhs.y), self.z.lt(&rhs.z), @@ -387,8 +392,8 @@ impl Vec4 { /// /// In other words, this computes `[x.is_nan(), y.is_nan(), z.is_nan(), w.is_nan()]`. #[inline] - pub fn is_nan_mask(self) -> BVec4 { - BVec4::new( + pub fn is_nan_mask(self) -> BVec4A { + BVec4A::new( self.x.is_nan(), self.y.is_nan(), self.z.is_nan(), diff --git a/src/f32/sse2/vec4.rs b/src/f32/sse2/vec4.rs index 6e406f1f..3d0246a6 100644 --- a/src/f32/sse2/vec4.rs +++ b/src/f32/sse2/vec4.rs @@ -1,6 +1,8 @@ // Generated from vec.rs.tera template. Edit the template, not the generated file. -use crate::{f32::math, sse2::*, BVec4A, Vec2, Vec3, Vec3A}; +use crate::BVec4A; + +use crate::{f32::math, sse2::*, Vec2, Vec3, Vec3A}; #[cfg(not(target_arch = "spirv"))] use core::fmt; diff --git a/src/f32/wasm32/vec4.rs b/src/f32/wasm32/vec4.rs index 7d98c7db..63ea0d57 100644 --- a/src/f32/wasm32/vec4.rs +++ b/src/f32/wasm32/vec4.rs @@ -1,6 +1,8 @@ // Generated from vec.rs.tera template. Edit the template, not the generated file. -use crate::{f32::math, wasm32::*, BVec4A, Vec2, Vec3, Vec3A}; +use crate::BVec4A; + +use crate::{f32::math, wasm32::*, Vec2, Vec3, Vec3A}; #[cfg(not(target_arch = "spirv"))] use core::fmt;