Skip to content

Commit

Permalink
Winch: i32x4.dot_16x8_s on x64 with AVX (#10220)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffcharles authored Feb 12, 2025
1 parent 42ac8f3 commit ec9f885
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 2 deletions.
4 changes: 2 additions & 2 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,6 @@ impl WastTest {
"spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
"spec_testsuite/simd_i16x8_extmul_i8x16.wast",
"spec_testsuite/simd_i32x4_arith2.wast",
"spec_testsuite/simd_i32x4_dot_i16x8.wast",
"spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast",
"spec_testsuite/simd_i32x4_extmul_i16x8.wast",
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
Expand Down Expand Up @@ -493,9 +492,10 @@ impl WastTest {
"spec_testsuite/simd_i8x16_sat_arith.wast",
"spec_testsuite/simd_i64x2_arith.wast",
"spec_testsuite/simd_i16x8_arith.wast",
"spec_testsuite/simd_i32x4_arith.wast",
"spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
"spec_testsuite/simd_i16x8_sat_arith.wast",
"spec_testsuite/simd_i32x4_arith.wast",
"spec_testsuite/simd_i32x4_dot_i16x8.wast",
"spec_testsuite/simd_i8x16_arith.wast",
"spec_testsuite/simd_bit_shift.wast",
"spec_testsuite/simd_lane.wast",
Expand Down
47 changes: 47 additions & 0 deletions tests/disas/winch/x64/i32x4_dot_i16x8_s/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result v128)
(i32x4.dot_i16x8_s (v128.const i32x4 0 1 2 3) (v128.const i32x4 3 2 1 0))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x4a
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x1c(%rip), %xmm0
;; movdqu 0x24(%rip), %xmm1
;; vpmaddwd %xmm0, %xmm1, %xmm1
;; movdqa %xmm1, %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 4a: ud2
;; 4c: addb %al, (%rax)
;; 4e: addb %al, (%rax)
;; 50: addl (%rax), %eax
;; 52: addb %al, (%rax)
;; 54: addb (%rax), %al
;; 56: addb %al, (%rax)
;; 58: addl %eax, (%rax)
;; 5a: addb %al, (%rax)
;; 5c: addb %al, (%rax)
;; 5e: addb %al, (%rax)
;; 60: addb %al, (%rax)
;; 62: addb %al, (%rax)
;; 64: addl %eax, (%rax)
;; 66: addb %al, (%rax)
;; 68: addb (%rax), %al
;; 6a: addb %al, (%rax)
;; 6c: addl (%rax), %eax
;; 6e: addb %al, (%rax)
4 changes: 4 additions & 0 deletions winch/codegen/src/isa/aarch64/masm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,10 @@ impl Masm for MacroAssembler {
fn v128_bitmask(&mut self, _src: Reg, _dst: WritableReg, _size: OperandSize) -> Result<()> {
bail!(CodeGenError::unimplemented_masm_instruction())
}

fn v128_dot(&mut self, _lhs: Reg, _rhs: Reg, _dst: WritableReg) -> Result<()> {
bail!(CodeGenError::unimplemented_masm_instruction())
}
}

impl MacroAssembler {
Expand Down
6 changes: 6 additions & 0 deletions winch/codegen/src/isa/x64/masm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2542,6 +2542,12 @@ impl Masm for MacroAssembler {
}
Ok(())
}

fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vex_rr(AvxOpcode::Vpmaddwd, lhs, rhs, dst);
Ok(())
}
}

impl MacroAssembler {
Expand Down
4 changes: 4 additions & 0 deletions winch/codegen/src/masm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1881,4 +1881,8 @@ pub(crate) trait MacroAssembler {
/// Extracts the high bit of each lane in `src` and produces a scalar mask
/// with all bits concatenated in `dst`.
fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;

/// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
/// adjacent pairs of the 32-bit results.
fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
}
9 changes: 9 additions & 0 deletions winch/codegen/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ macro_rules! def_unsupported {
(emit I16x8Bitmask $($rest:tt)*) => {};
(emit I32x4Bitmask $($rest:tt)*) => {};
(emit I64x2Bitmask $($rest:tt)*) => {};
(emit I32x4DotI16x8S $($rest:tt)*) => {};

(emit $unsupported:tt $($rest:tt)*) => {$($rest)*};
}
Expand Down Expand Up @@ -4125,6 +4126,14 @@ where
})
}

fn visit_i32x4_dot_i16x8_s(&mut self) -> Self::Output {
self.context
.binop(self.masm, OperandSize::S32, |masm, dst, src, _size| {
masm.v128_dot(dst, src, writable!(dst))?;
Ok(TypedReg::v128(dst))
})
}

wasmparser::for_each_visit_simd_operator!(def_unsupported);
}

Expand Down

0 comments on commit ec9f885

Please sign in to comment.