Skip to content

Commit

Permalink
[x86] use zero-extending load of a byte outside of loops too (2nd try)
Browse files Browse the repository at this point in the history
The first attempt missed changing test files for tools
(update_llc_test_checks.py).

Original commit message:

This implements the main suggested change from issue #56498.
Using the shorter (non-extending) instruction with only
-Oz ("minsize") rather than -Os ("optsize") is left as a
possible follow-up.

As noted in the bug report, the zero-extending load may have
shorter latency/better throughput across a wide range of x86
micro-arches, and it avoids a potential false dependency.
The cost is an extra instruction byte.

This could cause perf ups and downs from secondary effects,
but I don't think it is possible to account for those in
advance, and that will likely also depend on exact micro-arch.
This does bring LLVM x86 codegen more in line with existing
gcc codegen, so if problems are exposed they are more likely
to occur for both compilers.

Differential Revision: https://reviews.llvm.org/D129775
  • Loading branch information
rotateright committed Jul 20, 2022
1 parent 2d889a8 commit f0dd12e
Show file tree
Hide file tree
Showing 211 changed files with 3,834 additions and 3,292 deletions.
12 changes: 6 additions & 6 deletions llvm/lib/Target/X86/X86FixupBWInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
switch (MI->getOpcode()) {

case X86::MOV8rm:
// Only replace 8 bit loads with the zero extending versions if
// in an inner most loop and not optimizing for size. This takes
// an extra byte to encode, and provides limited performance upside.
if (MachineLoop *ML = MLI->getLoopFor(&MBB))
if (ML->begin() == ML->end() && !OptForSize)
return tryReplaceLoad(X86::MOVZX32rm8, MI);
// Replace 8-bit loads with the zero-extending version if not optimizing
// for size. The extending op is cheaper across a wide range of uarch and
// it avoids a potentially expensive partial register stall. It takes an
// extra byte to encode, however, so don't do this when optimizing for size.
if (!OptForSize)
return tryReplaceLoad(X86::MOVZX32rm8, MI);
break;

case X86::MOV16rm:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ target triple = "i686-unknown-unknown"
define i32 @test5(i32 %B, i8 %C) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl A, %eax
; CHECK-NEXT: shldl %cl, %edx, %eax
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ define void @test() {
; CHECK: # %bb.0:
; CHECK-NEXT: movl A, %eax
; CHECK-NEXT: movzwl 2(%eax), %eax
; CHECK-NEXT: movb B, %cl
; CHECK-NEXT: movzbl B, %ecx
; CHECK-NEXT: movl C, %edx
; CHECK-NEXT: andb $16, %cl
; CHECK-NEXT: shll %cl, %edx
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ define void @handle_vector_size_attribute() nounwind {
; CHECK-NEXT: cmpl $1, %eax
; CHECK-NEXT: ja .LBB0_2
; CHECK-NEXT: # %bb.1: # %bb77
; CHECK-NEXT: movb 0, %al
; CHECK-NEXT: movb 0, %al
; CHECK-NEXT: movzbl 0, %eax
; CHECK-NEXT: movzbl 0, %eax
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: .LBB0_2: # %bb84
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %rbp
; CHECK-NEXT: movq (%rbp), %rax
; CHECK-NEXT: callq *216(%rax)
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: je LBB0_11
; CHECK-NEXT: ## %bb.7: ## %cond_false.i
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-NEXT: movzbl %bl, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
Expand All @@ -98,8 +98,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: LBB0_11: ## %cond_true.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl
; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: je LBB0_14
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
; CHECK-NEXT: .cfi_offset %ebx, -12
; CHECK-NEXT: .cfi_offset %ebp, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: testb $1, %bl
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.1: ## %bb116.i
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define void @testit63_entry_2E_ce() nounwind {
; CHECK-NEXT: movl %esp, %edi
; CHECK-NEXT: movl $g1s63, %esi
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
; CHECK-NEXT: movb g1s63+62, %al
; CHECK-NEXT: movzbl g1s63+62, %eax
; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl g1s63+60, %eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define i32 @func_44(i16 signext %p_46) nounwind {
; SOURCE-SCHED-NEXT: xorl %ecx, %ecx
; SOURCE-SCHED-NEXT: cmpl $2, %eax
; SOURCE-SCHED-NEXT: setge %cl
; SOURCE-SCHED-NEXT: movb g_73, %dl
; SOURCE-SCHED-NEXT: movzbl g_73, %edx
; SOURCE-SCHED-NEXT: xorl %eax, %eax
; SOURCE-SCHED-NEXT: subb {{[0-9]+}}(%esp), %al
; SOURCE-SCHED-NEXT: testb %dl, %dl
Expand Down
45 changes: 36 additions & 9 deletions llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,44 @@
; RUN: llc < %s -mcpu=core2 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 | FileCheck %s

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.4"
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind

define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
; CHECK-LABEL: cli_magic_scandesc:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $72, %rsp
; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movzbl (%rsp), %eax
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: movq (%rdi), %rdx
; CHECK-NEXT: movq 8(%rdi), %rsi
; CHECK-NEXT: movq %rdx, (%rsp)
; CHECK-NEXT: movq 24(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 16(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 32(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 40(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 48(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 56(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %al, (%rsp)
; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
; CHECK-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_2: # %entry
; CHECK-NEXT: callq __stack_chk_fail@PLT
entry:
%a = alloca [64 x i8]
%c = getelementptr inbounds [64 x i8], ptr %a, i64 0, i32 30
Expand All @@ -15,10 +49,3 @@ entry:
store i8 %e, ptr %c, align 8
ret i32 0
}

; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
; CHECK: movb (%rsp), [[R1:%.+]]
; CHECK: movb 30(%rsp), [[R0:%.+]]
; CHECK: movb [[R1]], (%rsp)
; CHECK: movb [[R0]], 30(%rsp)
; CHECK: callq ___stack_chk_fail
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ define i8 @neg_type_mismatch(i32 %a1_wide_orig, i16 %a2_wide_orig, i32 %inc) nou
define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind {
; I386-NOCMOV-LABEL: negative_CopyFromReg:
; I386-NOCMOV: # %bb.0:
; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I386-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
; I386-NOCMOV-NEXT: cmpb %cl, %al
Expand All @@ -255,7 +255,7 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw
;
; I686-NOCMOV-LABEL: negative_CopyFromReg:
; I686-NOCMOV: # %bb.0:
; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I686-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
; I686-NOCMOV-NEXT: cmpb %cl, %al
Expand Down Expand Up @@ -297,8 +297,8 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw
define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
; I386-NOCMOV-LABEL: negative_CopyFromRegs:
; I386-NOCMOV: # %bb.0:
; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NOCMOV-NEXT: cmpb %cl, %al
; I386-NOCMOV-NEXT: jg .LBB4_2
; I386-NOCMOV-NEXT: # %bb.1:
Expand All @@ -317,8 +317,8 @@ define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
;
; I686-NOCMOV-LABEL: negative_CopyFromRegs:
; I686-NOCMOV: # %bb.0:
; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I686-NOCMOV-NEXT: cmpb %cl, %al
; I686-NOCMOV-NEXT: jg .LBB4_2
; I686-NOCMOV-NEXT: # %bb.1:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ define void @test_abi_exts_call(ptr %addr) {
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movb (%eax), %bl
; X32-NEXT: movzbl (%eax), %ebx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: calll take_char
Expand All @@ -346,7 +346,7 @@ define void @test_abi_exts_call(ptr %addr) {
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
; X64-NEXT: movb (%rdi), %al
; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: movzbl %al, %ebx
; X64-NEXT: movl %ebx, %edi
; X64-NEXT: callq take_char
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define i1 @test_load_i1(ptr %p1) {
; CHECK-LABEL: test_load_i1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl 4(%esp), %eax
; CHECK-NEXT: movb (%eax), %al
; CHECK-NEXT: movzbl (%eax), %eax
; CHECK-NEXT: retl
%r = load i1, ptr %p1
ret i1 %r
Expand All @@ -18,7 +18,7 @@ define i8 @test_load_i8(ptr %p1) {
; CHECK-LABEL: test_load_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl 4(%esp), %eax
; CHECK-NEXT: movb (%eax), %al
; CHECK-NEXT: movzbl (%eax), %eax
; CHECK-NEXT: retl
%r = load i8, ptr %p1
ret i8 %r
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
define i1 @test_load_i1(ptr %p1) {
; ALL-LABEL: test_load_i1:
; ALL: # %bb.0:
; ALL-NEXT: movb (%rdi), %al
; ALL-NEXT: movzbl (%rdi), %eax
; ALL-NEXT: retq
%r = load i1, ptr %p1
ret i1 %r
Expand All @@ -14,7 +14,7 @@ define i1 @test_load_i1(ptr %p1) {
define i8 @test_load_i8(ptr %p1) {
; ALL-LABEL: test_load_i8:
; ALL: # %bb.0:
; ALL-NEXT: movb (%rdi), %al
; ALL-NEXT: movzbl (%rdi), %eax
; ALL-NEXT: retq
%r = load i8, ptr %p1
ret i8 %r
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/PR40322.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define void @_Z2ami(i32) #0 personality ptr @__gxx_personality_v0 {
; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12
; CHECK-MINGW-X86-NEXT: .cfi_offset %esi, -12
; CHECK-MINGW-X86-NEXT: .cfi_offset %edi, -8
; CHECK-MINGW-X86-NEXT: movb __ZGVZ2amiE2au, %al
; CHECK-MINGW-X86-NEXT: movzbl __ZGVZ2amiE2au, %eax
; CHECK-MINGW-X86-NEXT: testb %al, %al
; CHECK-MINGW-X86-NEXT: jne LBB0_4
; CHECK-MINGW-X86-NEXT: # %bb.1: # %init.check
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/X86/abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ define i8 @test_i8(i8 %a) nounwind {
;
; X86-LABEL: test_i8:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarb $7, %cl
; X86-NEXT: xorb %cl, %al
Expand Down Expand Up @@ -530,13 +530,13 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: xorb %al, %bh
; X86-NEXT: subb %al, %bh
; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
Expand Down Expand Up @@ -572,7 +572,7 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb %al, %ah
; X86-NEXT: sarb $7, %ah
; X86-NEXT: xorb %ah, %al
Expand All @@ -585,23 +585,23 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: movb %dh, 11(%esi)
; X86-NEXT: movb %bl, 10(%esi)
; X86-NEXT: movb %bh, 9(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 8(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 7(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 6(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 5(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 4(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 3(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 2(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 1(%esi)
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, (%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $12, %esp
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/add-sub-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
Expand Down Expand Up @@ -529,7 +529,7 @@ define i32 @test_i32_sub_add_sext_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_sub_add_sext_var:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: sarl $31, %edx
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/and-load-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define i8 @foo(ptr %V) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: movb 2(%rdi), %al
; CHECK-NEXT: movzbl 2(%rdi), %eax
; CHECK-NEXT: andb $95, %al
; CHECK-NEXT: retq
%V3i8 = load <3 x i8>, ptr %V, align 4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/and-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: je .LBB1_5
; CHECK-NEXT: # %bb.1: # %bb0.preheader
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_2: # %bb0
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/and-with-overflow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: and_i8_ri:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andb $-17, %cl
; X86-NEXT: je .LBB0_2
Expand All @@ -35,8 +35,8 @@ define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: and_i8_rr:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb %al, %cl
; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1:
Expand Down
Loading

0 comments on commit f0dd12e

Please sign in to comment.