Skip to content

Commit

Permalink
cranelift: always spill i32 with i64 stores;
Browse files Browse the repository at this point in the history
Fixes bytecodealliance#2839. See also the issue description and comments in this commits for
details of what the fix is about here.
  • Loading branch information
bnjbvr authored and mchesser committed May 24, 2021
1 parent d49f770 commit 50a4362
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 0 deletions.
13 changes: 13 additions & 0 deletions cranelift/codegen/src/machinst/abi_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);

// When reloading from a spill slot, we might have lost information about real integer
// types. For instance, on the x64 backend, a zero-extension can become spurious and
// optimized into a move, causing vregs of types I32 and I64 to share the same coalescing
// equivalency class. As a matter of fact, such a value can be spilled as an I32 and later
// reloaded as an I64; to make sure the high bits are always defined, do a word-sized store
// all the time, in this case.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};

gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
}

Expand Down
127 changes: 127 additions & 0 deletions cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
test compile
target x86_64 machinst

;; The goal of this test is to ensure that stack spills of an integer value,
;; which width is less than the machine word's size, cause the full word to be
;; stored, and not only the lower bits.

;; Because of unsigned extensions which can be transformed into simple moves,
;; the source vreg of the extension operation can be coalesced with its
;; destination vreg, and if it happens to be spill, then the reload may use a
;; reload of a different, larger size.

function %f0(i32, i32, i32) -> i64 {
fn0 = %g(i32) -> i64

; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$64, %rsp

;; Stash all the callee-saved registers.

; nextln: movq %r12, 16(%rsp)
; nextln: movq %r13, 24(%rsp)
; nextln: movq %r14, 32(%rsp)
; nextln: movq %rbx, 40(%rsp)
; nextln: movq %r15, 48(%rsp)

block0(v0: i32, v1: i32, v2: i32):
;; First, create enough virtual registers so that the call instructions
;; causes at least one of them to be spilled onto the stack.

v3 = iadd.i32 v0, v1
v4 = iadd.i32 v1, v2
v5 = iadd.i32 v0, v2
v6 = iadd.i32 v3, v0
v7 = iadd.i32 v4, v0
v8 = iadd.i32 v5, v0

; nextln: movq %rdi, %r12
; nextln: addl %esi, %r12d
; nextln: movq %rsi, %r13
; nextln: addl %edx, %r13d
; nextln: movq %rdi, %r14
; nextln: addl %edx, %r14d
; nextln: movq %r12, %rbx
; nextln: addl %edi, %ebx
; nextln: movq %r13, %r15
; nextln: addl %edi, %r15d
; nextln: movq %r14, %rsi

;; This should be movq below, not movl.
; nextln: movq %rsi, rsp(0 + virtual offset)

; nextln: movslq rsp(0 + virtual offset), %rsi
; nextln: addl %edi, %esi

;; Put an effectful instruction so that the live-ranges of the adds and
;; uextends are split here, and to prevent the uextend to be emitted
;; before the call. This will effectively causing the above i32 to be
;; spilled as an i32, and not a full i64.

v300 = call fn0(v0)

;; This should be movq below, not movl.
; nextln: movq %rsi, rsp(0 + virtual offset)

; nextln: load_ext_name %g+0, %rsi
; nextln: call *%rsi

v31 = uextend.i64 v3
v41 = uextend.i64 v4
v51 = uextend.i64 v5
v61 = uextend.i64 v6
v71 = uextend.i64 v7
v81 = uextend.i64 v8

;; None of the uextends are generated here yet.

;; At this point, I'd expect that this second call below would be not
;; necessary, but if it is removed, the uextend is applied before the call,
;; and the i64 is spilled (then reloaded), causing the bug to not appear. So
;; an additional call it is!

v100 = call fn0(v3)

; nextln: movq %r12, %rsi
; nextln: movq %rsi, rsp(8 + virtual offset)
; nextln: nop len=0
; nextln: movq %r12, %rdi
; nextln: load_ext_name %g+0, %rsi
; nextln: call *%rsi

;; Cause reloads of all the values. Most are in registers, but one of them
;; is on the stack. Make sure they're all used in the final computation.

v101 = iadd.i64 v100, v31
v102 = iadd.i64 v101, v41
v103 = iadd.i64 v102, v51
v104 = iadd.i64 v103, v61
v105 = iadd.i64 v104, v71
v200 = iadd.i64 v105, v81

; nextln: movq %rax, %rsi
; nextln: movq rsp(8 + virtual offset), %rdi
; nextln: addq %rdi, %rsi
; nextln: addq %r13, %rsi
; nextln: addq %r14, %rsi
; nextln: addq %rbx, %rsi
; nextln: addq %r15, %rsi

;; The reload operates on a full word, so uses movq.
; nextln: movq rsp(0 + virtual offset), %rdi

; nextln: addq %rdi, %rsi
; nextln: movq %rsi, %rax
; nextln: movq 16(%rsp), %r12
; nextln: movq 24(%rsp), %r13
; nextln: movq 32(%rsp), %r14
; nextln: movq 40(%rsp), %rbx
; nextln: movq 48(%rsp), %r15
; nextln: addq $$64, %rsp

return v200
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
}

0 comments on commit 50a4362

Please sign in to comment.