-
Notifications
You must be signed in to change notification settings - Fork 12.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM] Tail-calls do not require caller and callee arguments to match
The ARM backend was checking that the outgoing values for a tail-call matched the incoming argument values of the caller. This isn't necessary, because the caller can change the values in both registers and the stack before doing the tail-call. The actual limitation is that the callee can't need more stack space for it's arguments than the caller does. This is needed for code using the musttail attribute, as well as enabling tail calls as an optimisation in more cases.
- Loading branch information
Showing
4 changed files
with
134 additions
and
143 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=armv7a-none-eabi %s -o - | FileCheck %s | ||
|
||
declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) | ||
|
||
define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { | ||
; CHECK-LABEL: many_args_tail: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: mov r0, #5 | ||
; CHECK-NEXT: mov r1, #2 | ||
; CHECK-NEXT: str r0, [sp] | ||
; CHECK-NEXT: mov r0, #6 | ||
; CHECK-NEXT: str r0, [sp, #4] | ||
; CHECK-NEXT: mov r0, #1 | ||
; CHECK-NEXT: mov r2, #3 | ||
; CHECK-NEXT: mov r3, #4 | ||
; CHECK-NEXT: b many_args_callee | ||
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) | ||
ret i32 %ret | ||
} | ||
|
||
define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { | ||
; CHECK-LABEL: many_args_musttail: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: mov r0, #5 | ||
; CHECK-NEXT: mov r1, #2 | ||
; CHECK-NEXT: str r0, [sp] | ||
; CHECK-NEXT: mov r0, #6 | ||
; CHECK-NEXT: str r0, [sp, #4] | ||
; CHECK-NEXT: mov r0, #1 | ||
; CHECK-NEXT: mov r2, #3 | ||
; CHECK-NEXT: mov r3, #4 | ||
; CHECK-NEXT: b many_args_callee | ||
%ret = musttail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) | ||
ret i32 %ret | ||
} | ||
|
||
; This function has more arguments than it's tail-callee. This isn't valid for | ||
; the musttail attribute, but can still be tail-called as a non-guaranteed | ||
; optimisation, because the outgoing arguments to @many_args_callee fit in the | ||
; stack space allocated by the caller of @more_args_tail. | ||
define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) { | ||
; CHECK-LABEL: more_args_tail: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: mov r0, #5 | ||
; CHECK-NEXT: mov r1, #2 | ||
; CHECK-NEXT: str r0, [sp] | ||
; CHECK-NEXT: mov r0, #6 | ||
; CHECK-NEXT: str r0, [sp, #4] | ||
; CHECK-NEXT: mov r0, #1 | ||
; CHECK-NEXT: mov r2, #3 | ||
; CHECK-NEXT: mov r3, #4 | ||
; CHECK-NEXT: b many_args_callee | ||
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) | ||
ret i32 %ret | ||
} | ||
|
||
; Again, this isn't valid for musttail, but can be tail-called in practice | ||
; because the stack size if the same. | ||
define i32 @different_args_tail(i64 %0, i64 %1, i64 %2) { | ||
; CHECK-LABEL: different_args_tail: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: mov r0, #5 | ||
; CHECK-NEXT: mov r1, #2 | ||
; CHECK-NEXT: str r0, [sp] | ||
; CHECK-NEXT: mov r0, #6 | ||
; CHECK-NEXT: str r0, [sp, #4] | ||
; CHECK-NEXT: mov r0, #1 | ||
; CHECK-NEXT: mov r2, #3 | ||
; CHECK-NEXT: mov r3, #4 | ||
; CHECK-NEXT: b many_args_callee | ||
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) | ||
ret i32 %ret | ||
} | ||
|
||
; Here, the caller requires less stack space for it's arguments than the | ||
; callee, so it would not ba valid to do a tail-call. | ||
define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { | ||
; CHECK-LABEL: fewer_args_tail: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: .pad #8 | ||
; CHECK-NEXT: sub sp, sp, #8 | ||
; CHECK-NEXT: mov r1, #6 | ||
; CHECK-NEXT: mov r0, #5 | ||
; CHECK-NEXT: strd r0, r1, [sp] | ||
; CHECK-NEXT: mov r0, #1 | ||
; CHECK-NEXT: mov r1, #2 | ||
; CHECK-NEXT: mov r2, #3 | ||
; CHECK-NEXT: mov r3, #4 | ||
; CHECK-NEXT: bl many_args_callee | ||
; CHECK-NEXT: add sp, sp, #8 | ||
; CHECK-NEXT: pop {r11, pc} | ||
%ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) | ||
ret i32 %ret | ||
} |