bytecodealliance · cfallin · Feb 15, 2025 · Feb 3, 2025 · Feb 8, 2025 · Feb 8, 2025
@@ -1233,6 +1233,9 @@ fn gen_builder(
 
         There is also a method per instruction format. These methods all
         return an `Inst`.
+
+        When an address to a load or store is specified, its integer
+        size is required to be equal to the platform's pointer width.
     "#,
     );
     fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {");

@@ -663,6 +663,40 @@ impl<'a> Verifier<'a> {
             } => {
                 self.verify_bitcast(inst, flags, arg, errors)?;
             }
+            LoadNoOffset { opcode, arg, .. } if opcode.can_load() => {
+                self.verify_is_address(inst, arg, errors)?;
+            }
+            Load { opcode, arg, .. } if opcode.can_load() => {
+                self.verify_is_address(inst, arg, errors)?;
+            }
+            AtomicCas {
+                opcode,
+                args: [p, _, _],
+                ..
+            } if opcode.can_load() || opcode.can_store() => {
+                self.verify_is_address(inst, p, errors)?;
+            }
+            AtomicRmw {
+                opcode,
+                args: [p, _],
+                ..
+            } if opcode.can_load() || opcode.can_store() => {
+                self.verify_is_address(inst, p, errors)?;
+            }
+            Store {
+                opcode,
+                args: [_, p],
+                ..
+            } if opcode.can_store() => {
+                self.verify_is_address(inst, p, errors)?;
+            }
+            StoreNoOffset {
+                opcode,
+                args: [_, p],
+                ..
+            } if opcode.can_store() => {
+                self.verify_is_address(inst, p, errors)?;
+            }
             UnaryConst {
                 opcode: opcode @ (Opcode::Vconst | Opcode::F128const),
                 constant_handle,
@@ -1046,6 +1080,31 @@ impl<'a> Verifier<'a> {
         }
     }
 
+    fn verify_is_address(
+        &self,
+        loc_inst: Inst,
+        v: Value,
+        errors: &mut VerifierErrors,
+    ) -> VerifierStepResult {
+        if let Some(isa) = self.isa {
+            let pointer_width = isa.triple().pointer_width()?;
+            let value_type = self.func.dfg.value_type(v);
+            let expected_width = pointer_width.bits() as u32;
+            let value_width = value_type.bits();
+            if expected_width != value_width {
+                errors.nonfatal((
+                    loc_inst,
+                    self.context(loc_inst),
+                    format!("invalid pointer width (got {value_width}, expected {expected_width}) encountered {v}"),
+                ))
+            } else {
+                Ok(())
+            }
+        } else {
+            Ok(())
+        }
+    }
+
     fn domtree_integrity(
         &self,
         domtree: &DominatorTree,

@@ -73,21 +73,3 @@ block0(v0: f64):
 ; run: %fdemote_is_nan(-sNaN:0x1) == 1
 ; run: %fdemote_is_nan(+sNaN:0x4000000000001) == 1
 ; run: %fdemote_is_nan(-sNaN:0x4000000000001) == 1
-
-
-;; Tests a fdemote+load combo which some backends may optimize
-function %fdemote_load(i64, f64) -> f32 {
-    ss0 = explicit_slot 16
-
-block0(v1: i64, v2: f64):
-    v3 = stack_addr.i64 ss0
-    store.f64 v2, v3
-    v4 = load.f64 v3
-    v5 = fdemote.f32 v4
-    return v5
-}
-; run: %fdemote_load(0, 0x0.0) == 0x0.0
-; run: %fdemote_load(1, 0x0.1) == 0x0.1
-; run: %fdemote_load(2, 0x0.2) == 0x0.2
-; run: %fdemote_load(3, 0x3.2) == 0x3.2
-; run: %fdemote_load(0x8, 0x3.2) == 0x3.2
@@ -0,0 +1,21 @@
+test interpret
+test run
+target pulley32
+target pulley32be
+
+;; Tests a fdemote+load combo which some backends may optimize
+function %fdemote_load(i32, f64) -> f32 {
+    ss0 = explicit_slot 16
+
+block0(v1: i32, v2: f64):
+    v3 = stack_addr.i32 ss0
+    store.f64 v2, v3
+    v4 = load.f64 v3
+    v5 = fdemote.f32 v4
+    return v5
+}
+; run: %fdemote_load(0, 0x0.0) == 0x0.0
+; run: %fdemote_load(1, 0x0.1) == 0x0.1
+; run: %fdemote_load(2, 0x0.2) == 0x0.2
+; run: %fdemote_load(3, 0x3.2) == 0x3.2
+; run: %fdemote_load(0x8, 0x3.2) == 0x3.2
@@ -0,0 +1,26 @@
+test interpret
+test run
+target x86_64
+target x86_64 has_avx
+target s390x
+target aarch64
+target riscv64
+target pulley64
+target pulley64be
+
+;; Tests a fdemote+load combo which some backends may optimize
+function %fdemote_load(i64, f64) -> f32 {
+    ss0 = explicit_slot 16
+
+block0(v1: i64, v2: f64):
+    v3 = stack_addr.i64 ss0
+    store.f64 v2, v3
+    v4 = load.f64 v3
+    v5 = fdemote.f32 v4
+    return v5
+}
+; run: %fdemote_load(0, 0x0.0) == 0x0.0
+; run: %fdemote_load(1, 0x0.1) == 0x0.1
+; run: %fdemote_load(2, 0x0.2) == 0x0.2
+; run: %fdemote_load(3, 0x3.2) == 0x3.2
+; run: %fdemote_load(0x8, 0x3.2) == 0x3.2
@@ -83,20 +83,3 @@ block0(v0: f32):
 ; run: %fpromote_is_nan(+sNaN:0x200001) == 1
 ; run: %fpromote_is_nan(-sNaN:0x200001) == 1
 
-
-;; Tests a fpromote+load combo which some backends may optimize
-function %fpromote_load(i64, f32) -> f64 {
-    ss0 = explicit_slot 16
-
-block0(v1: i64, v2: f32):
-    v3 = stack_addr.i64 ss0
-    store.f32 v2, v3
-    v4 = load.f32 v3
-    v5 = fpromote.f64 v4
-    return v5
-}
-; run: %fpromote_load(0, 0x0.0) == 0x0.0
-; run: %fpromote_load(1, 0x0.1) == 0x0.1
-; run: %fpromote_load(2, 0x0.2) == 0x0.2
-; run: %fpromote_load(3, 0x3.2) == 0x3.2
-; run: %fpromote_load(0xC, 0x3.2) == 0x3.2
@@ -0,0 +1,21 @@
+test interpret
+test run
+target pulley32
+target pulley32be
+
+;; Tests a fpromote+load combo which some backends may optimize
+function %fpromote_load(i64, f32) -> f64 {
+    ss0 = explicit_slot 16
+
+block0(v1: i64, v2: f32):
+    v3 = stack_addr.i32 ss0
+    store.f32 v2, v3
+    v4 = load.f32 v3
+    v5 = fpromote.f64 v4
+    return v5
+}
+; run: %fpromote_load(0, 0x0.0) == 0x0.0
+; run: %fpromote_load(1, 0x0.1) == 0x0.1
+; run: %fpromote_load(2, 0x0.2) == 0x0.2
+; run: %fpromote_load(3, 0x3.2) == 0x3.2
+; run: %fpromote_load(0xC, 0x3.2) == 0x3.2
@@ -0,0 +1,27 @@
+test interpret
+test run
+target x86_64
+target x86_64 has_avx
+target s390x
+target aarch64
+target riscv64
+target riscv64 has_c has_zcb
+target pulley64
+target pulley64be
+
+;; Tests a fpromote+load combo which some backends may optimize
+function %fpromote_load(i64, f32) -> f64 {
+    ss0 = explicit_slot 16
+
+block0(v1: i64, v2: f32):
+    v3 = stack_addr.i64 ss0
+    store.f32 v2, v3
+    v4 = load.f32 v3
+    v5 = fpromote.f64 v4
+    return v5
+}
+; run: %fpromote_load(0, 0x0.0) == 0x0.0
+; run: %fpromote_load(1, 0x0.1) == 0x0.1
+; run: %fpromote_load(2, 0x0.2) == 0x0.2
+; run: %fpromote_load(3, 0x3.2) == 0x3.2
+; run: %fpromote_load(0xC, 0x3.2) == 0x3.2
@@ -43,72 +43,6 @@ block0(v0: i64x2):
 }
 ; run: %extractlane_1([0 4294967297]) == 4294967297
 
-function %extractlane_i8x16_through_stack(i8x16) -> i8 {
-    ss0 = explicit_slot 8
-block0(v0: i8x16):
-    v2 = stack_addr.i64 ss0
-    v3 = extractlane v0, 1
-    store v3, v2
-    v4 = load.i8 v2
-    return v4
-}
-; run: %extractlane_i8x16_through_stack([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 2
-
-function %extractlane_i16x8_through_stack(i16x8) -> i16 {
-    ss0 = explicit_slot 8
-block0(v0: i16x8):
-    v2 = stack_addr.i64 ss0
-    v3 = extractlane v0, 2
-    store v3, v2
-    v4 = load.i16 v2
-    return v4
-}
-; run: %extractlane_i16x8_through_stack([1 2 3 4 5 6 7 8]) == 3
-
-function %extractlane_i32x4_through_stack(i32x4) -> i32 {
-    ss0 = explicit_slot 8
-block0(v0: i32x4):
-    v2 = stack_addr.i64 ss0
-    v3 = extractlane v0, 3
-    store v3, v2
-    v4 = load.i32 v2
-    return v4
-}
-; run: %extractlane_i32x4_through_stack([1 2 3 4]) == 4
-
-function %extractlane_i64x2_through_stack(i64x2) -> i64 {
-    ss0 = explicit_slot 8
-block0(v0: i64x2):
-    v2 = stack_addr.i64 ss0
-    v3 = extractlane v0, 0
-    store v3, v2
-    v4 = load.i64 v2
-    return v4
-}
-; run: %extractlane_i64x2_through_stack([1 2]) == 1
-
-function %extractlane_f32x4_through_stack(f32x4) -> f32 {
-    ss0 = explicit_slot 8
-block0(v0: f32x4):
-    v2 = stack_addr.i64 ss0
-    v3 = extractlane v0, 3
-    store v3, v2
-    v4 = load.f32 v2
-    return v4
-}
-; run: %extractlane_f32x4_through_stack([0x1.0 0x2.0 0x3.0 0x4.0]) == 0x4.0
-
-function %extractlane_f64x2_through_stack(f64x2) -> f64 {
-    ss0 = explicit_slot 8
-block0(v0: f64x2):
-    v2 = stack_addr.i64 ss0
-    v3 = extractlane v0, 0
-    store v3, v2
-    v4 = load.f64 v2
-    return v4
-}
-; run: %extractlane_f64x2_through_stack([0x1.0 0x2.0]) == 0x1.0
-
 
 function %unaligned_extractlane() -> f64 {
     ss0 = explicit_slot 24

@@ -0,0 +1,70 @@
+test interpret
+test run
+target pulley32
+target pulley32be
+
+function %extractlane_i8x16_through_stack(i8x16) -> i8 {
+    ss0 = explicit_slot 8
+block0(v0: i8x16):
+    v2 = stack_addr.i32 ss0
+    v3 = extractlane v0, 1
+    store v3, v2
+    v4 = load.i8 v2
+    return v4
+}
+; run: %extractlane_i8x16_through_stack([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 2
+
+function %extractlane_i16x8_through_stack(i16x8) -> i16 {
+    ss0 = explicit_slot 8
+block0(v0: i16x8):
+    v2 = stack_addr.i32 ss0
+    v3 = extractlane v0, 2
+    store v3, v2
+    v4 = load.i16 v2
+    return v4
+}
+; run: %extractlane_i16x8_through_stack([1 2 3 4 5 6 7 8]) == 3
+
+function %extractlane_i32x4_through_stack(i32x4) -> i32 {
+    ss0 = explicit_slot 8
+block0(v0: i32x4):
+    v2 = stack_addr.i32 ss0
+    v3 = extractlane v0, 3
+    store v3, v2
+    v4 = load.i32 v2
+    return v4
+}
+; run: %extractlane_i32x4_through_stack([1 2 3 4]) == 4
+
+function %extractlane_i64x2_through_stack(i64x2) -> i64 {
+    ss0 = explicit_slot 8
+block0(v0: i64x2):
+    v2 = stack_addr.i32 ss0
+    v3 = extractlane v0, 0
+    store v3, v2
+    v4 = load.i64 v2
+    return v4
+}
+; run: %extractlane_i64x2_through_stack([1 2]) == 1
+
+function %extractlane_f32x4_through_stack(f32x4) -> f32 {
+    ss0 = explicit_slot 8
+block0(v0: f32x4):
+    v2 = stack_addr.i32 ss0
+    v3 = extractlane v0, 3
+    store v3, v2
+    v4 = load.f32 v2
+    return v4
+}
+; run: %extractlane_f32x4_through_stack([0x1.0 0x2.0 0x3.0 0x4.0]) == 0x4.0
+
+function %extractlane_f64x2_through_stack(f64x2) -> f64 {
+    ss0 = explicit_slot 8
+block0(v0: f64x2):
+    v2 = stack_addr.i32 ss0
+    v3 = extractlane v0, 0
+    store v3, v2
+    v4 = load.f64 v2
+    return v4
+}
+; run: %extractlane_f64x2_through_stack([0x1.0 0x2.0]) == 0x1.0