diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs index 97829f666219..82e80c722fc8 100644 --- a/cranelift/codegen/meta/src/gen_inst.rs +++ b/cranelift/codegen/meta/src/gen_inst.rs @@ -1233,6 +1233,9 @@ fn gen_builder( There is also a method per instruction format. These methods all return an `Inst`. + + When an address to a load or store is specified, its integer + size is required to be equal to the platform's pointer width. "#, ); fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {"); diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index 33d9132f8e68..ac95cf0cc2bb 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -663,6 +663,40 @@ impl<'a> Verifier<'a> { } => { self.verify_bitcast(inst, flags, arg, errors)?; } + LoadNoOffset { opcode, arg, .. } if opcode.can_load() => { + self.verify_is_address(inst, arg, errors)?; + } + Load { opcode, arg, .. } if opcode.can_load() => { + self.verify_is_address(inst, arg, errors)?; + } + AtomicCas { + opcode, + args: [p, _, _], + .. + } if opcode.can_load() || opcode.can_store() => { + self.verify_is_address(inst, p, errors)?; + } + AtomicRmw { + opcode, + args: [p, _], + .. + } if opcode.can_load() || opcode.can_store() => { + self.verify_is_address(inst, p, errors)?; + } + Store { + opcode, + args: [_, p], + .. + } if opcode.can_store() => { + self.verify_is_address(inst, p, errors)?; + } + StoreNoOffset { + opcode, + args: [_, p], + .. + } if opcode.can_store() => { + self.verify_is_address(inst, p, errors)?; + } UnaryConst { opcode: opcode @ (Opcode::Vconst | Opcode::F128const), constant_handle, @@ -1046,6 +1080,31 @@ impl<'a> Verifier<'a> { } } + fn verify_is_address( + &self, + loc_inst: Inst, + v: Value, + errors: &mut VerifierErrors, + ) -> VerifierStepResult { + if let Some(isa) = self.isa { + let pointer_width = isa.triple().pointer_width()?; + let value_type = self.func.dfg.value_type(v); + let expected_width = pointer_width.bits() as u32; + let value_width = value_type.bits(); + if expected_width != value_width { + errors.nonfatal(( + loc_inst, + self.context(loc_inst), + format!("invalid pointer width (got {value_width}, expected {expected_width}) encountered {v}"), + )) + } else { + Ok(()) + } + } else { + Ok(()) + } + } + fn domtree_integrity( &self, domtree: &DominatorTree, diff --git a/cranelift/filetests/filetests/runtests/fdemote.clif b/cranelift/filetests/filetests/runtests/fdemote.clif index 74bc4c9cb03a..916d43872364 100644 --- a/cranelift/filetests/filetests/runtests/fdemote.clif +++ b/cranelift/filetests/filetests/runtests/fdemote.clif @@ -73,21 +73,3 @@ block0(v0: f64): ; run: %fdemote_is_nan(-sNaN:0x1) == 1 ; run: %fdemote_is_nan(+sNaN:0x4000000000001) == 1 ; run: %fdemote_is_nan(-sNaN:0x4000000000001) == 1 - - -;; Tests a fdemote+load combo which some backends may optimize -function %fdemote_load(i64, f64) -> f32 { - ss0 = explicit_slot 16 - -block0(v1: i64, v2: f64): - v3 = stack_addr.i64 ss0 - store.f64 v2, v3 - v4 = load.f64 v3 - v5 = fdemote.f32 v4 - return v5 -} -; run: %fdemote_load(0, 0x0.0) == 0x0.0 -; run: %fdemote_load(1, 0x0.1) == 0x0.1 -; run: %fdemote_load(2, 0x0.2) == 0x0.2 -; run: %fdemote_load(3, 0x3.2) == 0x3.2 -; run: %fdemote_load(0x8, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/fdemote_32.clif b/cranelift/filetests/filetests/runtests/fdemote_32.clif new file mode 100644 index 000000000000..497bb9cc2b1f --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fdemote_32.clif @@ -0,0 +1,21 @@ +test interpret +test run +target pulley32 +target pulley32be + +;; Tests a fdemote+load combo which some backends may optimize +function %fdemote_load(i32, f64) -> f32 { + ss0 = explicit_slot 16 + +block0(v1: i32, v2: f64): + v3 = stack_addr.i32 ss0 + store.f64 v2, v3 + v4 = load.f64 v3 + v5 = fdemote.f32 v4 + return v5 +} +; run: %fdemote_load(0, 0x0.0) == 0x0.0 +; run: %fdemote_load(1, 0x0.1) == 0x0.1 +; run: %fdemote_load(2, 0x0.2) == 0x0.2 +; run: %fdemote_load(3, 0x3.2) == 0x3.2 +; run: %fdemote_load(0x8, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/fdemote_64.clif b/cranelift/filetests/filetests/runtests/fdemote_64.clif new file mode 100644 index 000000000000..37710bd149ac --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fdemote_64.clif @@ -0,0 +1,26 @@ +test interpret +test run +target x86_64 +target x86_64 has_avx +target s390x +target aarch64 +target riscv64 +target pulley64 +target pulley64be + +;; Tests a fdemote+load combo which some backends may optimize +function %fdemote_load(i64, f64) -> f32 { + ss0 = explicit_slot 16 + +block0(v1: i64, v2: f64): + v3 = stack_addr.i64 ss0 + store.f64 v2, v3 + v4 = load.f64 v3 + v5 = fdemote.f32 v4 + return v5 +} +; run: %fdemote_load(0, 0x0.0) == 0x0.0 +; run: %fdemote_load(1, 0x0.1) == 0x0.1 +; run: %fdemote_load(2, 0x0.2) == 0x0.2 +; run: %fdemote_load(3, 0x3.2) == 0x3.2 +; run: %fdemote_load(0x8, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/fpromote.clif b/cranelift/filetests/filetests/runtests/fpromote.clif index 37ba3970e8cb..7c13fcf4e919 100644 --- a/cranelift/filetests/filetests/runtests/fpromote.clif +++ b/cranelift/filetests/filetests/runtests/fpromote.clif @@ -83,20 +83,3 @@ block0(v0: f32): ; run: %fpromote_is_nan(+sNaN:0x200001) == 1 ; run: %fpromote_is_nan(-sNaN:0x200001) == 1 - -;; Tests a fpromote+load combo which some backends may optimize -function %fpromote_load(i64, f32) -> f64 { - ss0 = explicit_slot 16 - -block0(v1: i64, v2: f32): - v3 = stack_addr.i64 ss0 - store.f32 v2, v3 - v4 = load.f32 v3 - v5 = fpromote.f64 v4 - return v5 -} -; run: %fpromote_load(0, 0x0.0) == 0x0.0 -; run: %fpromote_load(1, 0x0.1) == 0x0.1 -; run: %fpromote_load(2, 0x0.2) == 0x0.2 -; run: %fpromote_load(3, 0x3.2) == 0x3.2 -; run: %fpromote_load(0xC, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/fpromote_32.clif b/cranelift/filetests/filetests/runtests/fpromote_32.clif new file mode 100644 index 000000000000..f5e3dcb6fbbd --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fpromote_32.clif @@ -0,0 +1,21 @@ +test interpret +test run +target pulley32 +target pulley32be + +;; Tests a fpromote+load combo which some backends may optimize +function %fpromote_load(i64, f32) -> f64 { + ss0 = explicit_slot 16 + +block0(v1: i64, v2: f32): + v3 = stack_addr.i32 ss0 + store.f32 v2, v3 + v4 = load.f32 v3 + v5 = fpromote.f64 v4 + return v5 +} +; run: %fpromote_load(0, 0x0.0) == 0x0.0 +; run: %fpromote_load(1, 0x0.1) == 0x0.1 +; run: %fpromote_load(2, 0x0.2) == 0x0.2 +; run: %fpromote_load(3, 0x3.2) == 0x3.2 +; run: %fpromote_load(0xC, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/fpromote_64.clif b/cranelift/filetests/filetests/runtests/fpromote_64.clif new file mode 100644 index 000000000000..6b0b3daab294 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fpromote_64.clif @@ -0,0 +1,27 @@ +test interpret +test run +target x86_64 +target x86_64 has_avx +target s390x +target aarch64 +target riscv64 +target riscv64 has_c has_zcb +target pulley64 +target pulley64be + +;; Tests a fpromote+load combo which some backends may optimize +function %fpromote_load(i64, f32) -> f64 { + ss0 = explicit_slot 16 + +block0(v1: i64, v2: f32): + v3 = stack_addr.i64 ss0 + store.f32 v2, v3 + v4 = load.f32 v3 + v5 = fpromote.f64 v4 + return v5 +} +; run: %fpromote_load(0, 0x0.0) == 0x0.0 +; run: %fpromote_load(1, 0x0.1) == 0x0.1 +; run: %fpromote_load(2, 0x0.2) == 0x0.2 +; run: %fpromote_load(3, 0x3.2) == 0x3.2 +; run: %fpromote_load(0xC, 0x3.2) == 0x3.2 diff --git a/cranelift/filetests/filetests/runtests/simd-extractlane.clif b/cranelift/filetests/filetests/runtests/simd-extractlane.clif index 0d35960ac752..ce90b6897a67 100644 --- a/cranelift/filetests/filetests/runtests/simd-extractlane.clif +++ b/cranelift/filetests/filetests/runtests/simd-extractlane.clif @@ -43,72 +43,6 @@ block0(v0: i64x2): } ; run: %extractlane_1([0 4294967297]) == 4294967297 -function %extractlane_i8x16_through_stack(i8x16) -> i8 { - ss0 = explicit_slot 8 -block0(v0: i8x16): - v2 = stack_addr.i64 ss0 - v3 = extractlane v0, 1 - store v3, v2 - v4 = load.i8 v2 - return v4 -} -; run: %extractlane_i8x16_through_stack([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 2 - -function %extractlane_i16x8_through_stack(i16x8) -> i16 { - ss0 = explicit_slot 8 -block0(v0: i16x8): - v2 = stack_addr.i64 ss0 - v3 = extractlane v0, 2 - store v3, v2 - v4 = load.i16 v2 - return v4 -} -; run: %extractlane_i16x8_through_stack([1 2 3 4 5 6 7 8]) == 3 - -function %extractlane_i32x4_through_stack(i32x4) -> i32 { - ss0 = explicit_slot 8 -block0(v0: i32x4): - v2 = stack_addr.i64 ss0 - v3 = extractlane v0, 3 - store v3, v2 - v4 = load.i32 v2 - return v4 -} -; run: %extractlane_i32x4_through_stack([1 2 3 4]) == 4 - -function %extractlane_i64x2_through_stack(i64x2) -> i64 { - ss0 = explicit_slot 8 -block0(v0: i64x2): - v2 = stack_addr.i64 ss0 - v3 = extractlane v0, 0 - store v3, v2 - v4 = load.i64 v2 - return v4 -} -; run: %extractlane_i64x2_through_stack([1 2]) == 1 - -function %extractlane_f32x4_through_stack(f32x4) -> f32 { - ss0 = explicit_slot 8 -block0(v0: f32x4): - v2 = stack_addr.i64 ss0 - v3 = extractlane v0, 3 - store v3, v2 - v4 = load.f32 v2 - return v4 -} -; run: %extractlane_f32x4_through_stack([0x1.0 0x2.0 0x3.0 0x4.0]) == 0x4.0 - -function %extractlane_f64x2_through_stack(f64x2) -> f64 { - ss0 = explicit_slot 8 -block0(v0: f64x2): - v2 = stack_addr.i64 ss0 - v3 = extractlane v0, 0 - store v3, v2 - v4 = load.f64 v2 - return v4 -} -; run: %extractlane_f64x2_through_stack([0x1.0 0x2.0]) == 0x1.0 - function %unaligned_extractlane() -> f64 { ss0 = explicit_slot 24 diff --git a/cranelift/filetests/filetests/runtests/simd-extractlane_32.clif b/cranelift/filetests/filetests/runtests/simd-extractlane_32.clif new file mode 100644 index 000000000000..32c7177781a5 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-extractlane_32.clif @@ -0,0 +1,70 @@ +test interpret +test run +target pulley32 +target pulley32be + +function %extractlane_i8x16_through_stack(i8x16) -> i8 { + ss0 = explicit_slot 8 +block0(v0: i8x16): + v2 = stack_addr.i32 ss0 + v3 = extractlane v0, 1 + store v3, v2 + v4 = load.i8 v2 + return v4 +} +; run: %extractlane_i8x16_through_stack([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 2 + +function %extractlane_i16x8_through_stack(i16x8) -> i16 { + ss0 = explicit_slot 8 +block0(v0: i16x8): + v2 = stack_addr.i32 ss0 + v3 = extractlane v0, 2 + store v3, v2 + v4 = load.i16 v2 + return v4 +} +; run: %extractlane_i16x8_through_stack([1 2 3 4 5 6 7 8]) == 3 + +function %extractlane_i32x4_through_stack(i32x4) -> i32 { + ss0 = explicit_slot 8 +block0(v0: i32x4): + v2 = stack_addr.i32 ss0 + v3 = extractlane v0, 3 + store v3, v2 + v4 = load.i32 v2 + return v4 +} +; run: %extractlane_i32x4_through_stack([1 2 3 4]) == 4 + +function %extractlane_i64x2_through_stack(i64x2) -> i64 { + ss0 = explicit_slot 8 +block0(v0: i64x2): + v2 = stack_addr.i32 ss0 + v3 = extractlane v0, 0 + store v3, v2 + v4 = load.i64 v2 + return v4 +} +; run: %extractlane_i64x2_through_stack([1 2]) == 1 + +function %extractlane_f32x4_through_stack(f32x4) -> f32 { + ss0 = explicit_slot 8 +block0(v0: f32x4): + v2 = stack_addr.i32 ss0 + v3 = extractlane v0, 3 + store v3, v2 + v4 = load.f32 v2 + return v4 +} +; run: %extractlane_f32x4_through_stack([0x1.0 0x2.0 0x3.0 0x4.0]) == 0x4.0 + +function %extractlane_f64x2_through_stack(f64x2) -> f64 { + ss0 = explicit_slot 8 +block0(v0: f64x2): + v2 = stack_addr.i32 ss0 + v3 = extractlane v0, 0 + store v3, v2 + v4 = load.f64 v2 + return v4 +} +; run: %extractlane_f64x2_through_stack([0x1.0 0x2.0]) == 0x1.0 diff --git a/cranelift/filetests/filetests/runtests/simd-extractlane_64.clif b/cranelift/filetests/filetests/runtests/simd-extractlane_64.clif new file mode 100644 index 000000000000..4a7e542c430c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-extractlane_64.clif @@ -0,0 +1,79 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +set enable_multi_ret_implicit_sret +target riscv64 has_v +target riscv64 has_v has_c has_zcb +target pulley64 +target pulley64be + +function %extractlane_i8x16_through_stack(i8x16) -> i8 { + ss0 = explicit_slot 8 +block0(v0: i8x16): + v2 = stack_addr.i64 ss0 + v3 = extractlane v0, 1 + store v3, v2 + v4 = load.i8 v2 + return v4 +} +; run: %extractlane_i8x16_through_stack([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 2 + +function %extractlane_i16x8_through_stack(i16x8) -> i16 { + ss0 = explicit_slot 8 +block0(v0: i16x8): + v2 = stack_addr.i64 ss0 + v3 = extractlane v0, 2 + store v3, v2 + v4 = load.i16 v2 + return v4 +} +; run: %extractlane_i16x8_through_stack([1 2 3 4 5 6 7 8]) == 3 + +function %extractlane_i32x4_through_stack(i32x4) -> i32 { + ss0 = explicit_slot 8 +block0(v0: i32x4): + v2 = stack_addr.i64 ss0 + v3 = extractlane v0, 3 + store v3, v2 + v4 = load.i32 v2 + return v4 +} +; run: %extractlane_i32x4_through_stack([1 2 3 4]) == 4 + +function %extractlane_i64x2_through_stack(i64x2) -> i64 { + ss0 = explicit_slot 8 +block0(v0: i64x2): + v2 = stack_addr.i64 ss0 + v3 = extractlane v0, 0 + store v3, v2 + v4 = load.i64 v2 + return v4 +} +; run: %extractlane_i64x2_through_stack([1 2]) == 1 + +function %extractlane_f32x4_through_stack(f32x4) -> f32 { + ss0 = explicit_slot 8 +block0(v0: f32x4): + v2 = stack_addr.i64 ss0 + v3 = extractlane v0, 3 + store v3, v2 + v4 = load.f32 v2 + return v4 +} +; run: %extractlane_f32x4_through_stack([0x1.0 0x2.0 0x3.0 0x4.0]) == 0x4.0 + +function %extractlane_f64x2_through_stack(f64x2) -> f64 { + ss0 = explicit_slot 8 +block0(v0: f64x2): + v2 = stack_addr.i64 ss0 + v3 = extractlane v0, 0 + store v3, v2 + v4 = load.f64 v2 + return v4 +} +; run: %extractlane_f64x2_through_stack([0x1.0 0x2.0]) == 0x1.0 diff --git a/cranelift/filetests/filetests/runtests/simd-insertlane.clif b/cranelift/filetests/filetests/runtests/simd-insertlane.clif index 7792f735f224..304f5e74f94d 100644 --- a/cranelift/filetests/filetests/runtests/simd-insertlane.clif +++ b/cranelift/filetests/filetests/runtests/simd-insertlane.clif @@ -84,95 +84,6 @@ block0(v0: f64x2, v1: f64): } ; run: %insertlane_1_in_f64x2([0x1.0 0x2.0], 0x3.0) == [0x1.0 0x3.0] -function %insertlane_i8x16_through_stack(i8x16, i8) -> i8x16 { - ss0 = explicit_slot 8 -block0(v0: i8x16, v1: i8): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.i8 v2 - v4 = insertlane v0, v3, 1 - return v4 -} -; run: %insertlane_i8x16_through_stack([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], 2) == [1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1] - -function %insertlane_i16x8_through_stack(i16x8, i16) -> i16x8 { - ss0 = explicit_slot 8 -block0(v0: i16x8, v1: i16): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.i16 v2 - v4 = insertlane v0, v3, 2 - return v4 -} -; run: %insertlane_i16x8_through_stack([1 1 1 1 1 1 1 1], 2) == [1 1 2 1 1 1 1 1] - -function %insertlane_i32x4_through_stack(i32x4, i32) -> i32x4 { - ss0 = explicit_slot 8 -block0(v0: i32x4, v1: i32): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.i32 v2 - v4 = insertlane v0, v3, 3 - return v4 -} -; run: %insertlane_i32x4_through_stack([1 1 1 1], 2) == [1 1 1 2] - -function %insertlane_i64x2_through_stack(i64x2, i64) -> i64x2 { - ss0 = explicit_slot 8 -block0(v0: i64x2, v1: i64): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.i64 v2 - v4 = insertlane v0, v3, 0 - return v4 -} -; run: %insertlane_i64x2_through_stack([1 1], 2) == [2 1] - -function %insertlane_f32x4_through_stack(f32x4, f32) -> f32x4 { - ss0 = explicit_slot 8 -block0(v0: f32x4, v1: f32): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.f32 v2 - v4 = insertlane v0, v3, 3 - return v4 -} -; run: %insertlane_f32x4_through_stack([0x1.0 0x1.0 0x1.0 0x1.0], 0x2.0) == [0x1.0 0x1.0 0x1.0 0x2.0] - -function %insertlane_f32x4_through_stack2(f32x4, f32) -> f32x4 { - ss0 = explicit_slot 8 -block0(v0: f32x4, v1: f32): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.f32 v2 - v4 = insertlane v0, v3, 0 - return v4 -} -; run: %insertlane_f32x4_through_stack2([0x1.0 0x1.0 0x1.0 0x1.0], 0x2.0) == [0x2.0 0x1.0 0x1.0 0x1.0] - -function %insertlane_f64x2_through_stack(f64x2, f64) -> f64x2 { - ss0 = explicit_slot 8 -block0(v0: f64x2, v1: f64): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.f64 v2 - v4 = insertlane v0, v3, 0 - return v4 -} -; run: %insertlane_f64x2_through_stack([0x1.0 0x1.0], 0x2.0) == [0x2.0 0x1.0] - -function %insertlane_f64x2_through_stack2(f64x2, f64) -> f64x2 { - ss0 = explicit_slot 8 -block0(v0: f64x2, v1: f64): - v2 = stack_addr.i64 ss0 - store v1, v2 - v3 = load.f64 v2 - v4 = insertlane v0, v3, 1 - return v4 -} -; run: %insertlane_f64x2_through_stack2([0x1.0 0x1.0], 0x2.0) == [0x1.0 0x2.0] - - function %insertlane_const_15(i8x16) -> i8x16 { block0(v0: i8x16): v1 = iconst.i8 10 diff --git a/cranelift/filetests/filetests/runtests/simd-insertlane_32.clif b/cranelift/filetests/filetests/runtests/simd-insertlane_32.clif new file mode 100644 index 000000000000..e54a00784cdb --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-insertlane_32.clif @@ -0,0 +1,93 @@ +test interpret +test run +target pulley32 +target pulley32be + +function %insertlane_i8x16_through_stack(i8x16, i8) -> i8x16 { + ss0 = explicit_slot 8 +block0(v0: i8x16, v1: i8): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.i8 v2 + v4 = insertlane v0, v3, 1 + return v4 +} +; run: %insertlane_i8x16_through_stack([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], 2) == [1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +function %insertlane_i16x8_through_stack(i16x8, i16) -> i16x8 { + ss0 = explicit_slot 8 +block0(v0: i16x8, v1: i16): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.i16 v2 + v4 = insertlane v0, v3, 2 + return v4 +} +; run: %insertlane_i16x8_through_stack([1 1 1 1 1 1 1 1], 2) == [1 1 2 1 1 1 1 1] + +function %insertlane_i32x4_through_stack(i32x4, i32) -> i32x4 { + ss0 = explicit_slot 8 +block0(v0: i32x4, v1: i32): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.i32 v2 + v4 = insertlane v0, v3, 3 + return v4 +} +; run: %insertlane_i32x4_through_stack([1 1 1 1], 2) == [1 1 1 2] + +function %insertlane_i64x2_through_stack(i64x2, i64) -> i64x2 { + ss0 = explicit_slot 8 +block0(v0: i64x2, v1: i64): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.i64 v2 + v4 = insertlane v0, v3, 0 + return v4 +} +; run: %insertlane_i64x2_through_stack([1 1], 2) == [2 1] + +function %insertlane_f32x4_through_stack(f32x4, f32) -> f32x4 { + ss0 = explicit_slot 8 +block0(v0: f32x4, v1: f32): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.f32 v2 + v4 = insertlane v0, v3, 3 + return v4 +} +; run: %insertlane_f32x4_through_stack([0x1.0 0x1.0 0x1.0 0x1.0], 0x2.0) == [0x1.0 0x1.0 0x1.0 0x2.0] + +function %insertlane_f32x4_through_stack2(f32x4, f32) -> f32x4 { + ss0 = explicit_slot 8 +block0(v0: f32x4, v1: f32): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.f32 v2 + v4 = insertlane v0, v3, 0 + return v4 +} +; run: %insertlane_f32x4_through_stack2([0x1.0 0x1.0 0x1.0 0x1.0], 0x2.0) == [0x2.0 0x1.0 0x1.0 0x1.0] + +function %insertlane_f64x2_through_stack(f64x2, f64) -> f64x2 { + ss0 = explicit_slot 8 +block0(v0: f64x2, v1: f64): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.f64 v2 + v4 = insertlane v0, v3, 0 + return v4 +} +; run: %insertlane_f64x2_through_stack([0x1.0 0x1.0], 0x2.0) == [0x2.0 0x1.0] + +function %insertlane_f64x2_through_stack2(f64x2, f64) -> f64x2 { + ss0 = explicit_slot 8 +block0(v0: f64x2, v1: f64): + v2 = stack_addr.i32 ss0 + store v1, v2 + v3 = load.f64 v2 + v4 = insertlane v0, v3, 1 + return v4 +} +; run: %insertlane_f64x2_through_stack2([0x1.0 0x1.0], 0x2.0) == [0x1.0 0x2.0] + diff --git a/cranelift/filetests/filetests/runtests/simd-insertlane_64.clif b/cranelift/filetests/filetests/runtests/simd-insertlane_64.clif new file mode 100644 index 000000000000..f299a015b9db --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-insertlane_64.clif @@ -0,0 +1,102 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +set enable_multi_ret_implicit_sret +target riscv64 has_v +target riscv64 has_v has_c has_zcb +target pulley64 +target pulley64be + +function %insertlane_i8x16_through_stack(i8x16, i8) -> i8x16 { + ss0 = explicit_slot 8 +block0(v0: i8x16, v1: i8): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.i8 v2 + v4 = insertlane v0, v3, 1 + return v4 +} +; run: %insertlane_i8x16_through_stack([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], 2) == [1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +function %insertlane_i16x8_through_stack(i16x8, i16) -> i16x8 { + ss0 = explicit_slot 8 +block0(v0: i16x8, v1: i16): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.i16 v2 + v4 = insertlane v0, v3, 2 + return v4 +} +; run: %insertlane_i16x8_through_stack([1 1 1 1 1 1 1 1], 2) == [1 1 2 1 1 1 1 1] + +function %insertlane_i32x4_through_stack(i32x4, i32) -> i32x4 { + ss0 = explicit_slot 8 +block0(v0: i32x4, v1: i32): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.i32 v2 + v4 = insertlane v0, v3, 3 + return v4 +} +; run: %insertlane_i32x4_through_stack([1 1 1 1], 2) == [1 1 1 2] + +function %insertlane_i64x2_through_stack(i64x2, i64) -> i64x2 { + ss0 = explicit_slot 8 +block0(v0: i64x2, v1: i64): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.i64 v2 + v4 = insertlane v0, v3, 0 + return v4 +} +; run: %insertlane_i64x2_through_stack([1 1], 2) == [2 1] + +function %insertlane_f32x4_through_stack(f32x4, f32) -> f32x4 { + ss0 = explicit_slot 8 +block0(v0: f32x4, v1: f32): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.f32 v2 + v4 = insertlane v0, v3, 3 + return v4 +} +; run: %insertlane_f32x4_through_stack([0x1.0 0x1.0 0x1.0 0x1.0], 0x2.0) == [0x1.0 0x1.0 0x1.0 0x2.0] + +function %insertlane_f32x4_through_stack2(f32x4, f32) -> f32x4 { + ss0 = explicit_slot 8 +block0(v0: f32x4, v1: f32): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.f32 v2 + v4 = insertlane v0, v3, 0 + return v4 +} +; run: %insertlane_f32x4_through_stack2([0x1.0 0x1.0 0x1.0 0x1.0], 0x2.0) == [0x2.0 0x1.0 0x1.0 0x1.0] + +function %insertlane_f64x2_through_stack(f64x2, f64) -> f64x2 { + ss0 = explicit_slot 8 +block0(v0: f64x2, v1: f64): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.f64 v2 + v4 = insertlane v0, v3, 0 + return v4 +} +; run: %insertlane_f64x2_through_stack([0x1.0 0x1.0], 0x2.0) == [0x2.0 0x1.0] + +function %insertlane_f64x2_through_stack2(f64x2, f64) -> f64x2 { + ss0 = explicit_slot 8 +block0(v0: f64x2, v1: f64): + v2 = stack_addr.i64 ss0 + store v1, v2 + v3 = load.f64 v2 + v4 = insertlane v0, v3, 1 + return v4 +} +; run: %insertlane_f64x2_through_stack2([0x1.0 0x1.0], 0x2.0) == [0x1.0 0x2.0] + diff --git a/cranelift/filetests/filetests/verifier/pointer_width_32.clif b/cranelift/filetests/filetests/verifier/pointer_width_32.clif new file mode 100644 index 000000000000..251fda4bb503 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/pointer_width_32.clif @@ -0,0 +1,61 @@ +test verifier +target pulley32 + +function %error_i8_load_store_i32(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0 ; error: invalid pointer width (got 64, expected 32) encountered v0 + store.i8 v1, v0 ; error: invalid pointer width (got 64, expected 32) encountered v0 + return v1 +} + +function %error_i8_load_offset_i32(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0+16 ; error: invalid pointer width (got 64, expected 32) encountered v0 + store.i8 v1, v0+16 ; error: invalid pointer width (got 64, expected 32) encountered v0 + return v1 +} + +function %error_i64_atomic_store_load(i64) -> i64 { + ss0 = explicit_slot 8 + +block0(v0: i64): + v1 = stack_addr.i64 ss0 + atomic_store.i64 v0, v1; error: invalid pointer width (got 64, expected 32) encountered v1 + v2 = atomic_load.i64 v1; error: invalid pointer width (got 64, expected 32) encountered v1 + return v2 +} + +function %error_atomic_cas(i128, i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128, v2: i128): + stack_store.i128 v0, ss0 + v3 = stack_addr.i64 ss0 + v4 = atomic_cas.i128 v3, v1, v2; error: invalid pointer width (got 64, expected 32) encountered v3 + v5 = stack_load.i128 ss0 + return v5, v4 +} + +function %error_atomic_rmw_add_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss0 + store.i64 little v0, v2 ; error: invalid pointer width (got 64, expected 32) encountered v2 + + v3 = atomic_rmw.i64 little add v2, v1; error: invalid pointer width (got 64, expected 32) encountered v2 + + v4 = load.i64 little v2 ; error: invalid pointer width (got 64, expected 32) encountered v2 + return v3, v4 +} + +function %error_fmsub_f32x4(f32x4, f32x4, f32x4) -> f32x4 { + ss0 = explicit_slot 16 +block0(v0: f32x4, v1: f32x4, v2: f32x4): + v3 = stack_addr.i64 ss0 + store.f32x4 v0, v3 ; error: invalid pointer width (got 64, expected 32) encountered v3 + v4 = load.f32x4 v3 ; error: invalid pointer width (got 64, expected 32) encountered v3 + v5 = fneg v2 + v6 = fma v4, v1, v5 + return v6 +} diff --git a/cranelift/filetests/filetests/verifier/pointer_width_64.clif b/cranelift/filetests/filetests/verifier/pointer_width_64.clif new file mode 100644 index 000000000000..e0a9587f4e4c --- /dev/null +++ b/cranelift/filetests/filetests/verifier/pointer_width_64.clif @@ -0,0 +1,61 @@ +test verifier +target pulley64 + +function %error_i8_load_store_i32(i32) -> i8 { +block0(v0: i32): + v1 = load.i8 v0 ; error: invalid pointer width (got 32, expected 64) encountered v0 + store.i8 v1, v0 ; error: invalid pointer width (got 32, expected 64) encountered v0 + return v1 +} + +function %error_i8_load_offset_i32(i32) -> i8 { +block0(v0: i32): + v1 = load.i8 v0+16 ; error: invalid pointer width (got 32, expected 64) encountered v0 + store.i8 v1, v0+16 ; error: invalid pointer width (got 32, expected 64) encountered v0 + return v1 +} + +function %error_i64_atomic_store_load(i64) -> i64 { + ss0 = explicit_slot 8 + +block0(v0: i64): + v1 = stack_addr.i32 ss0 + atomic_store.i64 v0, v1; error: invalid pointer width (got 32, expected 64) encountered v1 + v2 = atomic_load.i64 v1; error: invalid pointer width (got 32, expected 64) encountered v1 + return v2 +} + +function %error_atomic_cas(i128, i128, i128) -> i128, i128 { + ss0 = explicit_slot 16 + +block0(v0: i128, v1: i128, v2: i128): + stack_store.i128 v0, ss0 + v3 = stack_addr.i32 ss0 + v4 = atomic_cas.i128 v3, v1, v2; error: invalid pointer width (got 32, expected 64) encountered v3 + v5 = stack_load.i128 ss0 + return v5, v4 +} + +function %error_atomic_rmw_add_i64(i64, i64) -> i64, i64 { + ss0 = explicit_slot 8 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i32 ss0 + store.i64 little v0, v2 ; error: invalid pointer width (got 32, expected 64) encountered v2 + + v3 = atomic_rmw.i64 little add v2, v1; error: invalid pointer width (got 32, expected 64) encountered v2 + + v4 = load.i64 little v2 ; error: invalid pointer width (got 32, expected 64) encountered v2 + return v3, v4 +} + +function %error_fmsub_f32x4(f32x4, f32x4, f32x4) -> f32x4 { + ss0 = explicit_slot 16 +block0(v0: f32x4, v1: f32x4, v2: f32x4): + v3 = stack_addr.i32 ss0 + store.f32x4 v0, v3 ; error: invalid pointer width (got 32, expected 64) encountered v3 + v4 = load.f32x4 v3 ; error: invalid pointer width (got 32, expected 64) encountered v3 + v5 = fneg v2 + v6 = fma v4, v1, v5 + return v6 +}