Skip to content

Commit

Permalink
Winch: Add trunc_sat instructions for x64 with AVX (#10226)
Browse files Browse the repository at this point in the history
* Winch: Add `trunc_sat` instructions for x64 with AVX

* Free temp register

* Move implementations into helper methods

* Remove duplicate Wast test entries
  • Loading branch information
jeffcharles authored Feb 13, 2025
1 parent b0b5d8f commit 7f93c1e
Show file tree
Hide file tree
Showing 12 changed files with 685 additions and 26 deletions.
4 changes: 2 additions & 2 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,6 @@ impl WastTest {
"spec_testsuite/simd_f64x2_pmin_pmax.wast",
"spec_testsuite/simd_f64x2_rounding.wast",
"spec_testsuite/simd_i16x8_arith2.wast",
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
"spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
"spec_testsuite/simd_i8x16_arith2.wast",
"spec_testsuite/simd_load.wast",
"spec_testsuite/simd_load_zero.wast",
Expand Down Expand Up @@ -490,6 +488,8 @@ impl WastTest {
"spec_testsuite/simd_i16x8_sat_arith.wast",
"spec_testsuite/simd_i32x4_arith.wast",
"spec_testsuite/simd_i32x4_dot_i16x8.wast",
"spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
"spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
"spec_testsuite/simd_i8x16_arith.wast",
"spec_testsuite/simd_bit_shift.wast",
"spec_testsuite/simd_lane.wast",
Expand Down
39 changes: 39 additions & 0 deletions tests/disas/winch/x64/i32x4_trunc_sat_f32x4_s/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result v128)
(i32x4.trunc_sat_f32x4_s (v128.const f32x4 1 2 3 4))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x59
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x2c(%rip), %xmm0
;; vcmpeqps %xmm0, %xmm0, %xmm15
;; vandps %xmm0, %xmm15, %xmm0
;; vpxor %xmm0, %xmm15, %xmm15
;; vcvttps2dq %xmm0, %xmm0
;; vpand %xmm0, %xmm15, %xmm15
;; vpsrad $0x1f, %xmm15, %xmm15
;; vpxor %xmm0, %xmm15, %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 59: ud2
;; 5b: addb %al, (%rax)
;; 5d: addb %al, (%rax)
;; 5f: addb %al, (%rax)
;; 61: addb %al, 0x3f(%rax)
;; 67: addb %al, (%rax)
;; 6a: addb %al, (%rax)
48 changes: 48 additions & 0 deletions tests/disas/winch/x64/i32x4_trunc_sat_f32x4_u/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result v128)
(i32x4.trunc_sat_f32x4_u (v128.const f32x4 1 2 3 4))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x76
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x4c(%rip), %xmm0
;; vxorps %xmm0, %xmm0, %xmm15
;; vmaxps %xmm15, %xmm0, %xmm0
;; vpcmpeqd %xmm15, %xmm15, %xmm15
;; vpsrld $1, %xmm15, %xmm15
;; vcvtdq2ps %xmm15, %xmm15
;; vcvttps2dq %xmm0, %xmm1
;; vsubps %xmm15, %xmm0, %xmm0
;; vcmpleps %xmm0, %xmm15, %xmm15
;; vcvttps2dq %xmm0, %xmm0
;; vpxor %xmm0, %xmm15, %xmm15
;; vpxor %xmm0, %xmm0, %xmm0
;; vpmaxsd %xmm0, %xmm15, %xmm0
;; vpaddd %xmm1, %xmm0, %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 76: ud2
;; 78: addb %al, (%rax)
;; 7a: addb %al, (%rax)
;; 7c: addb %al, (%rax)
;; 7e: addb %al, (%rax)
;; 80: addb %al, (%rax)
;; 82: cmpb $0, (%rdi)
;; 85: addb %al, (%rax)
;; 87: addb %al, (%rax)
;; 8a: addb %al, (%rax)
44 changes: 44 additions & 0 deletions tests/disas/winch/x64/i32x4_trunc_sat_f64x2_s_zero/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result v128)
(i32x4.trunc_sat_f64x2_s_zero (v128.const f32x4 1 2 3 4))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x50
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x2c(%rip), %xmm0
;; vcmpeqpd %xmm0, %xmm0, %xmm15
;; vandpd 0x2f(%rip), %xmm15, %xmm15
;; vminpd %xmm15, %xmm0, %xmm0
;; vcvttpd2dq %xmm0, %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 50: ud2
;; 52: addb %al, (%rax)
;; 54: addb %al, (%rax)
;; 56: addb %al, (%rax)
;; 58: addb %al, (%rax)
;; 5a: addb %al, (%rax)
;; 5c: addb %al, (%rax)
;; 5e: addb %al, (%rax)
;; 60: addb %al, (%rax)
;; 62: cmpb $0, (%rdi)
;; 65: addb %al, (%rax)
;; 67: addb %al, (%rax)
;; 6a: addb %al, (%rax)
;; 6e: addb $0, (%rax)
;; 72: sarb $0xff, %bh
46 changes: 46 additions & 0 deletions tests/disas/winch/x64/i32x4_trunc_sat_f64x2_u_zero/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result v128)
(i32x4.trunc_sat_f64x2_u_zero (v128.const f32x4 1 2 3 4))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x60
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x3c(%rip), %xmm0
;; vxorpd %xmm15, %xmm15, %xmm15
;; vmaxpd %xmm15, %xmm0, %xmm0
;; vminpd 0x3a(%rip), %xmm0, %xmm0
;; vroundpd $3, %xmm0, %xmm0
;; vaddpd 0x3c(%rip), %xmm0, %xmm0
;; vshufps $0x88, %xmm15, %xmm0, %xmm0
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 60: ud2
;; 62: addb %al, (%rax)
;; 64: addb %al, (%rax)
;; 66: addb %al, (%rax)
;; 68: addb %al, (%rax)
;; 6a: addb %al, (%rax)
;; 6c: addb %al, (%rax)
;; 6e: addb %al, (%rax)
;; 70: addb %al, (%rax)
;; 72: cmpb $0, (%rdi)
;; 75: addb %al, (%rax)
;; 77: addb %al, (%rax)
;; 7a: addb %al, (%rax)
;; 7e: addb $0, (%rax)
;; 82: loopne 0x83
14 changes: 7 additions & 7 deletions tests/misc_testsuite/winch/_simd_load.wast
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@
;; )
;; (assert_return (invoke "as-f32x4.min-operand") (v128.const i32x4 0xaaaaaaaa 0xaaaaaaaa 0xaaaaaaaa 0xaaaaaaaa)) ;; signed 1010 < 0010

;; (module (memory 1)
;; (data (offset (i32.const 0)) "\00\00\00\43\00\00\80\3f\66\66\e6\3f\00\00\80\bf") ;; 128 1.0 1.8 -1
;; (func (export "as-i32x4.trunc_sat_f32x4_s-operand") (result v128)
;; (i32x4.trunc_sat_f32x4_s (v128.load (i32.const 0)))
;; )
;; )
;; (assert_return (invoke "as-i32x4.trunc_sat_f32x4_s-operand") (v128.const i32x4 128 1 1 -1)) ;; 128 1.0 1.8 -1 -> 128 1 1 -1
(module (memory 1)
(data (offset (i32.const 0)) "\00\00\00\43\00\00\80\3f\66\66\e6\3f\00\00\80\bf") ;; 128 1.0 1.8 -1
(func (export "as-i32x4.trunc_sat_f32x4_s-operand") (result v128)
(i32x4.trunc_sat_f32x4_s (v128.load (i32.const 0)))
)
)
(assert_return (invoke "as-i32x4.trunc_sat_f32x4_s-operand") (v128.const i32x4 128 1 1 -1)) ;; 128 1.0 1.8 -1 -> 128 1 1 -1

(module (memory 1)
(data (offset (i32.const 0)) "\02\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00")
Expand Down
6 changes: 3 additions & 3 deletions tests/misc_testsuite/winch/_simd_splat.wast
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@
;; Conversions
(func (export "as-f32x4_convert_s_i32x4-operand") (param i32) (result v128)
(f32x4.convert_i32x4_s (i32x4.splat (local.get 0))))
;; (func (export "as-i32x4_trunc_s_f32x4_sat-operand") (param f32) (result v128)
;; (i32x4.trunc_sat_f32x4_s (f32x4.splat (local.get 0))))
(func (export "as-i32x4_trunc_s_f32x4_sat-operand") (param f32) (result v128)
(i32x4.trunc_sat_f32x4_s (f32x4.splat (local.get 0))))
)

(assert_return (invoke "as-i8x16_extract_lane_s-operand-first" (i32.const 42)) (i32.const 42))
Expand Down Expand Up @@ -341,7 +341,7 @@
;; (assert_return (invoke "as-f32x4_div-operands" (f32.const 1.0) (f32.const 8.0)) (v128.const f32x4 0.125 0.125 0.125 0.125))

(assert_return (invoke "as-f32x4_convert_s_i32x4-operand" (i32.const 12345)) (v128.const f32x4 12345.0 12345.0 12345.0 12345.0))
;; (assert_return (invoke "as-i32x4_trunc_s_f32x4_sat-operand" (f32.const 1.1)) (v128.const i32x4 1 1 1 1))
(assert_return (invoke "as-i32x4_trunc_s_f32x4_sat-operand" (f32.const 1.1)) (v128.const i32x4 1 1 1 1))


;; As the argument of control constructs and WASM instructions
Expand Down
12 changes: 10 additions & 2 deletions winch/codegen/src/isa/aarch64/masm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ use crate::{
Imm as I, IntCmpKind, LoadKind, MacroAssembler as Masm, MaxKind, MinKind, MulWideKind,
OperandSize, RegImm, RemKind, ReplaceLaneKind, RmwOp, RoundingMode, SPOffset, ShiftKind,
SplatKind, StackSlot, StoreKind, TrapCode, TruncKind, V128AbsKind, V128ConvertKind,
V128ExtendKind, V128NarrowKind, VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS,
UNTRUSTED_FLAGS,
V128ExtendKind, V128NarrowKind, V128TruncSatKind, VectorCompareKind, VectorEqualityKind,
Zero, TRUSTED_FLAGS, UNTRUSTED_FLAGS,
},
stack::TypedReg,
};
Expand Down Expand Up @@ -1194,6 +1194,14 @@ impl Masm for MacroAssembler {
bail!(CodeGenError::unimplemented_masm_instruction())
}

fn v128_trunc_sat(
&mut self,
_context: &mut CodeGenContext<Emission>,
_kind: V128TruncSatKind,
) -> Result<()> {
bail!(CodeGenError::unimplemented_masm_instruction())
}

fn v128_min(
&mut self,
_src1: Reg,
Expand Down
Loading

0 comments on commit 7f93c1e

Please sign in to comment.