Skip to content

Commit

Permalink
Winch: Add all_true and bitmask implementations for x64 with AVX (#10210
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jeffcharles authored Feb 11, 2025
1 parent 73d6d6b commit 6eb3155
Show file tree
Hide file tree
Showing 18 changed files with 527 additions and 1,288 deletions.
9 changes: 4 additions & 5 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,12 +418,9 @@ impl WastTest {
"spec_testsuite/table_set.wast",
"spec_testsuite/table_size.wast",
// simd-related failures
"annotations/simd_lane.wast",
"memory64/simd.wast",
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/canonicalize-nan.wast",
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
"spec_testsuite/simd_boolean.wast",
"spec_testsuite/simd_f32x4.wast",
"spec_testsuite/simd_f32x4_arith.wast",
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
Expand All @@ -444,7 +441,6 @@ impl WastTest {
"spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast",
"spec_testsuite/simd_i64x2_extmul_i32x4.wast",
"spec_testsuite/simd_i8x16_arith2.wast",
"spec_testsuite/simd_lane.wast",
"spec_testsuite/simd_load.wast",
"spec_testsuite/simd_load_zero.wast",
"spec_testsuite/simd_splat.wast",
Expand All @@ -458,16 +454,18 @@ impl WastTest {
#[cfg(target_arch = "x86_64")]
if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) {
let unsupported = [
"annotations/simd_lane.wast",
"misc_testsuite/int-to-float-splat.wast",
"misc_testsuite/issue6562.wast",
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/cvt-from-uint.wast",
"misc_testsuite/simd/issue6725-no-egraph-panic.wast",
"misc_testsuite/simd/replace-lane-preserve.wast",
"misc_testsuite/simd/spillslot-size-fuzzbug.wast",
"misc_testsuite/winch/_simd_lane.wast",
"misc_testsuite/winch/_simd_load.wast",
"misc_testsuite/winch/_simd_splat.wast",
"spec_testsuite/simd_align.wast",
"spec_testsuite/simd_boolean.wast",
"spec_testsuite/simd_conversions.wast",
"spec_testsuite/simd_f32x4_cmp.wast",
"spec_testsuite/simd_f64x2_cmp.wast",
Expand Down Expand Up @@ -500,6 +498,7 @@ impl WastTest {
"spec_testsuite/simd_i16x8_sat_arith.wast",
"spec_testsuite/simd_i8x16_arith.wast",
"spec_testsuite/simd_bit_shift.wast",
"spec_testsuite/simd_lane.wast",
];

if unsupported.iter().any(|part| self.path.ends_with(part)) {
Expand Down
43 changes: 43 additions & 0 deletions tests/disas/winch/x64/i16x8_all_true/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i16x8.all_true (v128.const i16x8 0 1 2 3 4 5 6 7))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x51
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x2c(%rip), %xmm0
;; vpxor %xmm15, %xmm15, %xmm15
;; vpcmpeqw %xmm0, %xmm15, %xmm0
;; vptest %xmm0, %xmm0
;; movl $0, %eax
;; sete %al
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 51: ud2
;; 53: addb %al, (%rax)
;; 55: addb %al, (%rax)
;; 57: addb %al, (%rax)
;; 59: addb %al, (%rax)
;; 5b: addb %al, (%rax)
;; 5d: addb %al, (%rax)
;; 5f: addb %al, (%rax)
;; 61: addb %al, (%rcx)
;; 63: addb %al, (%rdx)
;; 65: addb %al, (%rbx)
;; 67: addb %al, (%rax, %rax)
;; 6a: addl $0x7000600, %eax
39 changes: 39 additions & 0 deletions tests/disas/winch/x64/i16x8_bitmask/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i16x8.bitmask (v128.const i16x8 0 1 2 3 4 5 6 7))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x45
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x1c(%rip), %xmm0
;; vpacksswb %xmm0, %xmm0, %xmm0
;; vpmovmskb %xmm0, %eax
;; shrl $8, %eax
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 45: ud2
;; 47: addb %al, (%rax)
;; 49: addb %al, (%rax)
;; 4b: addb %al, (%rax)
;; 4d: addb %al, (%rax)
;; 4f: addb %al, (%rax)
;; 51: addb %al, (%rcx)
;; 53: addb %al, (%rdx)
;; 55: addb %al, (%rbx)
;; 57: addb %al, (%rax, %rax)
;; 5a: addl $0x7000600, %eax
45 changes: 45 additions & 0 deletions tests/disas/winch/x64/i32x4_all_true/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i32x4.all_true (v128.const i32x4 0 1 2 3))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x51
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x2c(%rip), %xmm0
;; vpxor %xmm15, %xmm15, %xmm15
;; vpcmpeqd %xmm0, %xmm15, %xmm0
;; vptest %xmm0, %xmm0
;; movl $0, %eax
;; sete %al
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 51: ud2
;; 53: addb %al, (%rax)
;; 55: addb %al, (%rax)
;; 57: addb %al, (%rax)
;; 59: addb %al, (%rax)
;; 5b: addb %al, (%rax)
;; 5d: addb %al, (%rax)
;; 5f: addb %al, (%rax)
;; 61: addb %al, (%rax)
;; 63: addb %al, (%rcx)
;; 65: addb %al, (%rax)
;; 67: addb %al, (%rdx)
;; 69: addb %al, (%rax)
;; 6b: addb %al, (%rbx)
;; 6d: addb %al, (%rax)
35 changes: 35 additions & 0 deletions tests/disas/winch/x64/i32x4_bitmask/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i32x4.bitmask (v128.const i32x4 0 1 2 3))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x3e
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0xc(%rip), %xmm0
;; vmovmskps %xmm0, %eax
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 3e: ud2
;; 40: addb %al, (%rax)
;; 42: addb %al, (%rax)
;; 44: addl %eax, (%rax)
;; 46: addb %al, (%rax)
;; 48: addb (%rax), %al
;; 4a: addb %al, (%rax)
;; 4c: addl (%rax), %eax
;; 4e: addb %al, (%rax)
45 changes: 45 additions & 0 deletions tests/disas/winch/x64/i64x2_all_true/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i64x2.all_true (v128.const i64x2 0 1))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x52
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x2c(%rip), %xmm0
;; vpxor %xmm15, %xmm15, %xmm15
;; vpcmpeqq %xmm0, %xmm15, %xmm0
;; vptest %xmm0, %xmm0
;; movl $0, %eax
;; sete %al
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 52: ud2
;; 54: addb %al, (%rax)
;; 56: addb %al, (%rax)
;; 58: addb %al, (%rax)
;; 5a: addb %al, (%rax)
;; 5c: addb %al, (%rax)
;; 5e: addb %al, (%rax)
;; 60: addb %al, (%rax)
;; 62: addb %al, (%rax)
;; 64: addb %al, (%rax)
;; 66: addb %al, (%rax)
;; 68: addl %eax, (%rax)
;; 6a: addb %al, (%rax)
;; 6c: addb %al, (%rax)
;; 6e: addb %al, (%rax)
42 changes: 42 additions & 0 deletions tests/disas/winch/x64/i64x2_bitmask/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i64x2.bitmask (v128.const i64x2 0 1))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x3f
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x1c(%rip), %xmm0
;; vmovmskpd %xmm0, %eax
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 3f: ud2
;; 41: addb %al, (%rax)
;; 43: addb %al, (%rax)
;; 45: addb %al, (%rax)
;; 47: addb %al, (%rax)
;; 49: addb %al, (%rax)
;; 4b: addb %al, (%rax)
;; 4d: addb %al, (%rax)
;; 4f: addb %al, (%rax)
;; 51: addb %al, (%rax)
;; 53: addb %al, (%rax)
;; 55: addb %al, (%rax)
;; 57: addb %al, (%rcx)
;; 59: addb %al, (%rax)
;; 5b: addb %al, (%rax)
;; 5d: addb %al, (%rax)
42 changes: 42 additions & 0 deletions tests/disas/winch/x64/i8x16_all_true/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i8x16.all_true (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x51
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0x2c(%rip), %xmm0
;; vpxor %xmm15, %xmm15, %xmm15
;; vpcmpeqb %xmm0, %xmm15, %xmm0
;; vptest %xmm0, %xmm0
;; movl $0, %eax
;; sete %al
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 51: ud2
;; 53: addb %al, (%rax)
;; 55: addb %al, (%rax)
;; 57: addb %al, (%rax)
;; 59: addb %al, (%rax)
;; 5b: addb %al, (%rax)
;; 5d: addb %al, (%rax)
;; 5f: addb %al, (%rax)
;; 61: addl %eax, (%rdx)
;; 63: addl 0x9080706(, %rax), %eax
;; 6a: orb (%rbx), %cl
;; 6c: orb $0xd, %al
30 changes: 30 additions & 0 deletions tests/disas/winch/x64/i8x16_bitmask/const_avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
;;! target = "x86_64"
;;! test = "winch"
;;! flags = [ "-Ccranelift-has-avx" ]

(module
(func (result i32)
(i8x16.bitmask (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
)
)
;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r11
;; movq 0x10(%r11), %r11
;; addq $0x10, %r11
;; cmpq %rsp, %r11
;; ja 0x3e
;; 1c: movq %rdi, %r14
;; subq $0x10, %rsp
;; movq %rdi, 8(%rsp)
;; movq %rsi, (%rsp)
;; movdqu 0xc(%rip), %xmm0
;; vpmovmskb %xmm0, %eax
;; addq $0x10, %rsp
;; popq %rbp
;; retq
;; 3e: ud2
;; 40: addb %al, (%rcx)
;; 42: addb (%rbx), %al
;; 44: addb $5, %al
Loading

0 comments on commit 6eb3155

Please sign in to comment.