forked from bytecodealliance/wasmtime
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cranelift: always spill i32 with i64 stores;
Fixes bytecodealliance#2839. See also the issue description and comments in this commits for details of what the fix is about here.
- Loading branch information
Showing
2 changed files
with
140 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
test compile | ||
target x86_64 machinst | ||
|
||
;; The goal of this test is to ensure that stack spills of an integer value, | ||
;; which width is less than the machine word's size, cause the full word to be | ||
;; stored, and not only the lower bits. | ||
|
||
;; Because of unsigned extensions which can be transformed into simple moves, | ||
;; the source vreg of the extension operation can be coalesced with its | ||
;; destination vreg, and if it happens to be spill, then the reload may use a | ||
;; reload of a different, larger size. | ||
|
||
function %f0(i32, i32, i32) -> i64 { | ||
fn0 = %g(i32) -> i64 | ||
|
||
; check: pushq %rbp | ||
; nextln: movq %rsp, %rbp | ||
; nextln: subq $$64, %rsp | ||
|
||
;; Stash all the callee-saved registers. | ||
|
||
; nextln: movq %r12, 16(%rsp) | ||
; nextln: movq %r13, 24(%rsp) | ||
; nextln: movq %r14, 32(%rsp) | ||
; nextln: movq %rbx, 40(%rsp) | ||
; nextln: movq %r15, 48(%rsp) | ||
|
||
block0(v0: i32, v1: i32, v2: i32): | ||
;; First, create enough virtual registers so that the call instructions | ||
;; causes at least one of them to be spilled onto the stack. | ||
|
||
v3 = iadd.i32 v0, v1 | ||
v4 = iadd.i32 v1, v2 | ||
v5 = iadd.i32 v0, v2 | ||
v6 = iadd.i32 v3, v0 | ||
v7 = iadd.i32 v4, v0 | ||
v8 = iadd.i32 v5, v0 | ||
|
||
; nextln: movq %rdi, %r12 | ||
; nextln: addl %esi, %r12d | ||
; nextln: movq %rsi, %r13 | ||
; nextln: addl %edx, %r13d | ||
; nextln: movq %rdi, %r14 | ||
; nextln: addl %edx, %r14d | ||
; nextln: movq %r12, %rbx | ||
; nextln: addl %edi, %ebx | ||
; nextln: movq %r13, %r15 | ||
; nextln: addl %edi, %r15d | ||
; nextln: movq %r14, %rsi | ||
|
||
;; This should be movq below, not movl. | ||
; nextln: movq %rsi, rsp(0 + virtual offset) | ||
|
||
; nextln: movslq rsp(0 + virtual offset), %rsi | ||
; nextln: addl %edi, %esi | ||
|
||
;; Put an effectful instruction so that the live-ranges of the adds and | ||
;; uextends are split here, and to prevent the uextend to be emitted | ||
;; before the call. This will effectively causing the above i32 to be | ||
;; spilled as an i32, and not a full i64. | ||
|
||
v300 = call fn0(v0) | ||
|
||
;; This should be movq below, not movl. | ||
; nextln: movq %rsi, rsp(0 + virtual offset) | ||
|
||
; nextln: load_ext_name %g+0, %rsi | ||
; nextln: call *%rsi | ||
|
||
v31 = uextend.i64 v3 | ||
v41 = uextend.i64 v4 | ||
v51 = uextend.i64 v5 | ||
v61 = uextend.i64 v6 | ||
v71 = uextend.i64 v7 | ||
v81 = uextend.i64 v8 | ||
|
||
;; None of the uextends are generated here yet. | ||
|
||
;; At this point, I'd expect that this second call below would be not | ||
;; necessary, but if it is removed, the uextend is applied before the call, | ||
;; and the i64 is spilled (then reloaded), causing the bug to not appear. So | ||
;; an additional call it is! | ||
|
||
v100 = call fn0(v3) | ||
|
||
; nextln: movq %r12, %rsi | ||
; nextln: movq %rsi, rsp(8 + virtual offset) | ||
; nextln: nop len=0 | ||
; nextln: movq %r12, %rdi | ||
; nextln: load_ext_name %g+0, %rsi | ||
; nextln: call *%rsi | ||
|
||
;; Cause reloads of all the values. Most are in registers, but one of them | ||
;; is on the stack. Make sure they're all used in the final computation. | ||
|
||
v101 = iadd.i64 v100, v31 | ||
v102 = iadd.i64 v101, v41 | ||
v103 = iadd.i64 v102, v51 | ||
v104 = iadd.i64 v103, v61 | ||
v105 = iadd.i64 v104, v71 | ||
v200 = iadd.i64 v105, v81 | ||
|
||
; nextln: movq %rax, %rsi | ||
; nextln: movq rsp(8 + virtual offset), %rdi | ||
; nextln: addq %rdi, %rsi | ||
; nextln: addq %r13, %rsi | ||
; nextln: addq %r14, %rsi | ||
; nextln: addq %rbx, %rsi | ||
; nextln: addq %r15, %rsi | ||
|
||
;; The reload operates on a full word, so uses movq. | ||
; nextln: movq rsp(0 + virtual offset), %rdi | ||
|
||
; nextln: addq %rdi, %rsi | ||
; nextln: movq %rsi, %rax | ||
; nextln: movq 16(%rsp), %r12 | ||
; nextln: movq 24(%rsp), %r13 | ||
; nextln: movq 32(%rsp), %r14 | ||
; nextln: movq 40(%rsp), %rbx | ||
; nextln: movq 48(%rsp), %r15 | ||
; nextln: addq $$64, %rsp | ||
|
||
return v200 | ||
; nextln: movq %rbp, %rsp | ||
; nextln: popq %rbp | ||
; nextln: ret | ||
} |