Skip to content

Commit

Permalink
[AVR] Expand shifts of all types except int8 and int16
Browse files Browse the repository at this point in the history
Currently our AVRShiftExpand pass expands only 32-bit shifts, with the
assumption that other kinds of shifts (e.g. 64-bit ones) are
automatically reduced to 8-bit ones by LLVM during ISel.

However this is not always true and causes problems in the rust-lang runtime.

This commit changes the logic a bit, so that instead of expanding only
32-bit shifts, we expand shifts of all types except 8-bit and 16-bit.

This is not the most optimal solution, because 64-bit shifts can be
expanded to 32-bit shifts which has been deeply optimized.

I've checked the generated code using rustc + simavr, and all shifts
seem to behave correctly.

Spotted in the wild in rustc:
rust-lang/compiler-builtins#523
rust-lang/rust#112140

Reviewed By: benshi001

Differential Revision: https://reviews.llvm.org/D154785
  • Loading branch information
Patryk27 authored and benshi001 committed Jul 19, 2023
1 parent eb33db4 commit 4e83175
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 48 deletions.
24 changes: 13 additions & 11 deletions llvm/lib/Target/AVR/AVRShiftExpand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
//===----------------------------------------------------------------------===//
//
/// \file
/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
/// like avr-gcc. This must be done in IR because otherwise the type legalizer
/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
/// Expand non-8-bit and non-16-bit shift instructions (shl, lshr, ashr) to
/// inline loops, just like avr-gcc. This must be done in IR because otherwise
/// the type legalizer will turn 32-bit shifts into (non-existing) library calls
/// such as __ashlsi3.
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -51,8 +52,9 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
if (!I.isShift())
// Only expand shift instructions (shl, lshr, ashr).
continue;
if (I.getType() != Type::getInt32Ty(Ctx))
// Only expand plain i32 types.
if (I.getType() == Type::getInt8Ty(Ctx) || I.getType() == Type::getInt16Ty(Ctx))
// Only expand non-8-bit and non-16-bit shifts, since those are expanded
// directly during isel.
continue;
if (isa<ConstantInt>(I.getOperand(1)))
// Only expand when the shift amount is not known.
Expand All @@ -75,7 +77,7 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
void AVRShiftExpand::expand(BinaryOperator *BI) {
auto &Ctx = BI->getContext();
IRBuilder<> Builder(BI);
Type *Int32Ty = Type::getInt32Ty(Ctx);
Type *InputTy = cast<Instruction>(BI)->getType();
Type *Int8Ty = Type::getInt8Ty(Ctx);
Value *Int8Zero = ConstantInt::get(Int8Ty, 0);

Expand All @@ -101,7 +103,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
Builder.SetInsertPoint(LoopBB);
PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
ShiftAmountPHI->addIncoming(ShiftAmount, BB);
PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
PHINode *ValuePHI = Builder.CreatePHI(InputTy, 2);
ValuePHI->addIncoming(BI->getOperand(0), BB);

// Subtract the shift amount by one, as we're shifting one this loop
Expand All @@ -116,13 +118,13 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
Value *ValueShifted;
switch (BI->getOpcode()) {
case Instruction::Shl:
ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(InputTy, 1));
break;
case Instruction::LShr:
ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(InputTy, 1));
break;
case Instruction::AShr:
ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(InputTy, 1));
break;
default:
llvm_unreachable("asked to expand an instruction that is not a shift");
Expand All @@ -137,7 +139,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
// Collect the resulting value. This is necessary in the IR but won't produce
// any actual instructions.
Builder.SetInsertPoint(BI);
PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
PHINode *Result = Builder.CreatePHI(InputTy, 2);
Result->addIncoming(BI->getOperand(0), BB);
Result->addIncoming(ValueShifted, LoopBB);

Expand Down
137 changes: 104 additions & 33 deletions llvm/test/CodeGen/AVR/shift-expand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,17 @@
target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
target triple = "avr"

define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @shl(
define i16 @shl16(i16 %value, i16 %amount) addrspace(1) {
; CHECK-LABEL: @shl16(
; CHECK-NEXT: [[RESULT:%.*]] = shl i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i16 [[RESULT]]
;
%result = shl i16 %value, %amount
ret i16 %result
}

define i32 @shl32(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @shl32(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
Expand All @@ -28,8 +37,39 @@ define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
ret i32 %result
}

define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @lshr(
define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
; CHECK-LABEL: @shl40(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
; CHECK-NEXT: [[TMP6]] = shl i40 [[TMP4]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i40 [[TMP8]]
;
%result = shl i40 %value, %amount
ret i40 %result
}

; ------------------------------------------------------------------------------

define i16 @lshr16(i16 %value, i16 %amount) addrspace(1) {
; CHECK-LABEL: @lshr16(
; CHECK-NEXT: [[RESULT:%.*]] = lshr i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i16 [[RESULT]]
;
%result = lshr i16 %value, %amount
ret i16 %result
}

define i32 @lshr32(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @lshr32(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
Expand All @@ -48,42 +88,73 @@ define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
ret i32 %result
}

define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
; CHECK-LABEL: @ashr(
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
define i40 @lshr40(i40 %value, i40 %amount) addrspace(1) {
; CHECK-LABEL: @lshr40(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1
; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
; CHECK-NEXT: [[TMP6]] = lshr i40 [[TMP4]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i32 [[TMP10]]
; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i40 [[TMP8]]
;
%3 = ashr i32 %0, %1
ret i32 %3
%result = lshr i40 %value, %amount
ret i40 %result
}

; This function is not modified because it is not an i32.
define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
; CHECK-LABEL: @shl40(
; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i40 [[RESULT]]
; ------------------------------------------------------------------------------

define i16 @ashr16(i16 %value, i16 %amount) addrspace(1) {
; CHECK-LABEL: @ashr16(
; CHECK-NEXT: [[RESULT:%.*]] = ashr i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i16 [[RESULT]]
;
%result = shl i40 %value, %amount
ret i40 %result
%result = ashr i16 %value, %amount
ret i16 %result
}

; This function isn't either, although perhaps it should.
define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
; CHECK-LABEL: @shl24(
; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i24 [[RESULT]]
define i32 @ashr32(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @ashr32(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
; CHECK-NEXT: [[TMP6]] = ashr i32 [[TMP4]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i32 [[TMP8]]
;
%result = ashr i32 %value, %amount
ret i32 %result
}

define i40 @ashr40(i40 %value, i40 %amount) addrspace(1) {
; CHECK-LABEL: @ashr40(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
; CHECK-NEXT: [[TMP6]] = ashr i40 [[TMP4]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i40 [[TMP8]]
;
%result = shl i24 %value, %amount
ret i24 %result
%result = ashr i40 %value, %amount
ret i40 %result
}
34 changes: 30 additions & 4 deletions llvm/test/CodeGen/AVR/shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,36 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) {
; CHECK: ; %bb.0:
; CHECK-NEXT: push r16
; CHECK-NEXT: push r17
; CHECK-NEXT: mov r16, r10
; CHECK-NEXT: mov r17, r11
; CHECK-NEXT: andi r17, 0
; CHECK-NEXT: rcall __ashldi3
; CHECK-NEXT: mov r30, r10
; CHECK-NEXT: mov r31, r11
; CHECK-NEXT: cpi r30, 0
; CHECK-NEXT: breq .LBB3_3
; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader
; CHECK-NEXT: mov r27, r1
; CHECK-NEXT: mov r16, r1
; CHECK-NEXT: mov r17, r1
; CHECK-NEXT: .LBB3_2: ; %shift.loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: mov r31, r21
; CHECK-NEXT: lsl r31
; CHECK-NEXT: mov r26, r1
; CHECK-NEXT: rol r26
; CHECK-NEXT: lsl r22
; CHECK-NEXT: rol r23
; CHECK-NEXT: rol r24
; CHECK-NEXT: rol r25
; CHECK-NEXT: or r24, r16
; CHECK-NEXT: or r25, r17
; CHECK-NEXT: or r22, r26
; CHECK-NEXT: or r23, r27
; CHECK-NEXT: lsl r18
; CHECK-NEXT: rol r19
; CHECK-NEXT: rol r20
; CHECK-NEXT: rol r21
; CHECK-NEXT: dec r30
; CHECK-NEXT: cpi r30, 0
; CHECK-NEXT: brne .LBB3_2
; CHECK-NEXT: .LBB3_3: ; %shift.done
; CHECK-NEXT: pop r17
; CHECK-NEXT: pop r16
; CHECK-NEXT: ret
Expand Down

0 comments on commit 4e83175

Please sign in to comment.