From d5e47421a8a78a07431940fd52a15261dad0069b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Jul 2024 14:17:10 +0100 Subject: [PATCH] [X86] Add MMX nontemporal store pattern As noticed on #98505 - try to reduce codegen diffs until we're ready to drop MMX entirely --- llvm/lib/Target/X86/X86InstrMMX.td | 10 ++++++++++ llvm/test/CodeGen/X86/nontemporal.ll | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 8d6bc8d0ee2cfc3..60dfe66960507dd 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -576,3 +576,13 @@ def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), (MMX_CVTTPD2PIrr VR128:$src)>; } + +let AddedComplexity = 400 in { // Prefer non-temporal version + +// Non-temporal store (no alignment required). +let Predicates = [HasMMX] in { + def : Pat<(nontemporalstore (x86mmx VR64:$src), addr:$dst), + (MMX_MOVNTQmr addr:$dst, VR64:$src)>; +} + +} // AddedComplexity diff --git a/llvm/test/CodeGen/X86/nontemporal.ll b/llvm/test/CodeGen/X86/nontemporal.ll index f5a4d8f3434d7f0..1f273eb43c6a60a 100644 --- a/llvm/test/CodeGen/X86/nontemporal.ll +++ b/llvm/test/CodeGen/X86/nontemporal.ll @@ -183,14 +183,14 @@ define void @test_mmx(ptr nocapture %a0, ptr nocapture %a1) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movq (%ecx), %mm0 ; X86-NEXT: psrlq $3, %mm0 -; X86-NEXT: movq %mm0, (%eax) +; X86-NEXT: movntq %mm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: test_mmx: ; X64: # %bb.0: # %entry ; X64-NEXT: movq (%rdi), %mm0 ; X64-NEXT: psrlq $3, %mm0 -; X64-NEXT: movq %mm0, (%rsi) +; X64-NEXT: movntq %mm0, (%rsi) ; X64-NEXT: retq entry: %0 = load x86_mmx, ptr %a0