From 8c4b383134bbcb5185fea359d47bbe1e25db2a59 Mon Sep 17 00:00:00 2001 From: Daniel Paoliello Date: Fri, 4 Oct 2024 10:29:03 -0700 Subject: [PATCH] [llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, __sptr, __uptr --- .../Target/AArch64/AArch64ISelLowering.cpp | 83 +++++++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 22 ++- .../lib/Target/AArch64/AArch64TargetMachine.h | 3 +- .../AArch64/aarch64-mixed-ptr-sizes.ll | 180 ++++++++++++++++++ 4 files changed, 277 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0814380b188485..2c7304243f5010 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -533,6 +533,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::XOR, MVT::i32, Custom); setOperationAction(ISD::XOR, MVT::i64, Custom); + setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); + setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); + // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. setOperationAction(ISD::FABS, MVT::f128, Expand); @@ -6722,6 +6725,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, ST->getBasePtr(), ST->getMemOperand()); } +static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + MVT DestVT = Op.getSimpleValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + AddrSpaceCastSDNode *N = cast(Op.getNode()); + + unsigned SrcAS = N->getSrcAddressSpace(); + unsigned DestAS = N->getDestAddressSpace(); + assert(SrcAS != DestAS && + "addrspacecast must be between different address spaces"); + assert(TLI.getTargetMachine().getPointerSize(SrcAS) != + TLI.getTargetMachine().getPointerSize(DestAS) && + "addrspacecast must be between different ptr sizes"); + + if (SrcAS == ARM64AS::PTR32_SPTR) { + return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src, + DAG.getTargetConstant(0, dl, DestVT)); + } else if (SrcAS == ARM64AS::PTR32_UPTR) { + return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src, + DAG.getTargetConstant(0, dl, DestVT)); + } else if ((DestAS == ARM64AS::PTR32_SPTR) || + (DestAS == ARM64AS::PTR32_UPTR)) { + SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT); + SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT); + return Trunc; + } else { + return Src; + } +} + // Custom lowering for any store, vector or scalar and/or default or with // a truncate operations. Currently only custom lower truncate operation // from vector v4i16 to v4i8 or volatile stores of i128. @@ -7375,6 +7409,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return LowerFixedLengthVectorIntExtendToSVE(Op, DAG); + case ISD::ADDRSPACECAST: + return LowerADDRSPACECAST(Op, DAG); case ISD::SIGN_EXTEND_INREG: { // Only custom lower when ExtraVT has a legal byte based element type. EVT ExtraVT = cast(Op.getOperand(1))->getVT(); @@ -23366,6 +23402,26 @@ static SDValue performLOADCombine(SDNode *N, performTBISimplification(N->getOperand(1), DCI, DAG); LoadSDNode *LD = cast(N); + EVT RegVT = LD->getValueType(0); + EVT MemVT = LD->getMemoryVT(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(LD); + + // Cast ptr32 and ptr64 pointers to the default address space before a load. + unsigned AddrSpace = LD->getAddressSpace(); + if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR || + AddrSpace == ARM64AS::PTR32_UPTR) { + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + if (PtrVT != LD->getBasePtr().getSimpleValueType()) { + SDValue Cast = + DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0); + return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(), + Cast, LD->getPointerInfo(), MemVT, + LD->getOriginalAlign(), + LD->getMemOperand()->getFlags()); + } + } + if (LD->isVolatile() || !Subtarget->isLittleEndian()) return SDValue(N, 0); @@ -23375,13 +23431,11 @@ static SDValue performLOADCombine(SDNode *N, if (!LD->isNonTemporal()) return SDValue(N, 0); - EVT MemVT = LD->getMemoryVT(); if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 || MemVT.getSizeInBits() % 256 == 0 || 256 % MemVT.getScalarSizeInBits() != 0) return SDValue(N, 0); - SDLoc DL(LD); SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); SDNodeFlags Flags = LD->getFlags(); @@ -23641,12 +23695,28 @@ static SDValue performSTORECombine(SDNode *N, SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); EVT ValueVT = Value.getValueType(); + EVT MemVT = ST->getMemoryVT(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(ST); auto hasValidElementTypeForFPTruncStore = [](EVT VT) { EVT EltVT = VT.getVectorElementType(); return EltVT == MVT::f32 || EltVT == MVT::f64; }; + // Cast ptr32 and ptr64 pointers to the default address space before a store. + unsigned AddrSpace = ST->getAddressSpace(); + if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR || + AddrSpace == ARM64AS::PTR32_UPTR) { + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + if (PtrVT != Ptr.getSimpleValueType()) { + SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0); + return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(), + ST->getOriginalAlign(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + } + } + if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget)) return Res; @@ -23660,8 +23730,8 @@ static SDValue performSTORECombine(SDNode *N, ValueVT.isFixedLengthVector() && ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() && hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType())) - return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, - ST->getMemoryVT(), ST->getMemOperand()); + return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT, + ST->getMemOperand()); if (SDValue Split = splitStores(N, DCI, DAG, Subtarget)) return Split; @@ -26988,6 +27058,11 @@ void AArch64TargetLowering::ReplaceNodeResults( ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget); return; } + case ISD::ADDRSPACECAST: { + SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG); + Results.push_back(V); + return; + } case ISD::ATOMIC_LOAD: case ISD::LOAD: { MemSDNode *LoadNode = cast(N); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index d696355bb062a8..b4ac52790612d2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -554,6 +554,10 @@ const unsigned StackProbeMaxLoopUnroll = 4; } // namespace AArch64 +namespace ARM64AS { +enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 }; +} + class AArch64Subtarget; class AArch64TargetLowering : public TargetLowering { @@ -585,11 +589,19 @@ class AArch64TargetLowering : public TargetLowering { unsigned Depth) const override; MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { - // Returning i64 unconditionally here (i.e. even for ILP32) means that the - // *DAG* representation of pointers will always be 64-bits. They will be - // truncated and extended when transferred to memory, but the 64-bit DAG - // allows us to use AArch64's addressing modes much more easily. - return MVT::getIntegerVT(64); + if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) { + // These are 32-bit pointers created using the `__ptr32` extension or + // similar. They are handled by marking them as being in a different + // address space, and will be extended to 64-bits when used as the target + // of a load or store operation, or cast to a 64-bit pointer type. + return MVT::i32; + } else { + // Returning i64 unconditionally here (i.e. even for ILP32) means that the + // *DAG* representation of pointers will always be 64-bits. They will be + // truncated and extended when transferred to memory, but the 64-bit DAG + // allows us to use AArch64's addressing modes much more easily. + return MVT::i64; + } } bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index 1a470ca87127ce..f57ba308de1e81 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -65,8 +65,7 @@ class AArch64TargetMachine : public LLVMTargetMachine { /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; + return (getPointerSize(SrcAS) == getPointerSize(DestAS)); } private: diff --git a/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll b/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll new file mode 100644 index 00000000000000..fc19325925feef --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +; Source to regenerate: +; struct Foo { +; int * __ptr32 p32; +; int * __ptr64 p64; +; __attribute__((address_space(9))) int *p_other; +; }; +; extern "C" void use_foo(Foo *f); +; extern "C" int use_int(int i); +; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) { +; f->p64 = i; +; use_foo(f); +; } +; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) { +; *i = use_int(*i); +; } +; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) { +; f->p64 = i; +; use_foo(f); +; } +; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) { +; *i = use_int(*i); +; } +; extern "C" void test_trunc(Foo *f, int * __ptr64 i) { +; f->p32 = i; +; use_foo(f); +; } +; extern "C" void test_noop1(Foo *f, int * __ptr32 i) { +; f->p32 = i; +; use_foo(f); +; } +; extern "C" void test_noop2(Foo *f, int * __ptr64 i) { +; f->p64 = i; +; use_foo(f); +; } +; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) { +; test_noop1(f, 0); +; } +; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) { +; f->p32 = (int * __ptr32)i; +; use_foo(f); +; } +; +; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp + +target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-windows-msvc" + +; Function Attrs: mustprogress uwtable +define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_sign_ext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: str x8, [x0, #8] +; CHECK-NEXT: b use_foo +entry: + %0 = addrspacecast ptr addrspace(270) %i to ptr + %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8 + store ptr %0, ptr %p64, align 8 + tail call void @use_foo(ptr noundef %f) + ret void +} + +declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1 + +; Function Attrs: mustprogress uwtable +define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_sign_ext_store_load: +; CHECK: // %bb.0: // %entry +; CHECK: sxtw x19, w0 +; CHECK-NEXT: ldr w0, [x19] +; CHECK-NEXT: bl use_int +; CHECK-NEXT: str w0, [x19] +entry: + %0 = load i32, ptr addrspace(270) %i, align 4 + %call = tail call i32 @use_int(i32 noundef %0) + store i32 %call, ptr addrspace(270) %i, align 4 + ret void +} + +declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1 + +; Function Attrs: mustprogress uwtable +define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_zero_ext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: str x8, [x0, #8] +; CHECK-NEXT: b use_foo +entry: + %0 = addrspacecast ptr addrspace(271) %i to ptr + %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8 + store ptr %0, ptr %p64, align 8 + tail call void @use_foo(ptr noundef %f) + ret void +} + +; Function Attrs: mustprogress uwtable +define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_zero_ext_store_load: +; CHECK: // %bb.0: // %entry +; CHECK: mov w19, w0 +; CHECK-NEXT: ldr w0, [x19] +; CHECK-NEXT: bl use_int +; CHECK-NEXT: str w0, [x19] +entry: + %0 = load i32, ptr addrspace(271) %i, align 4 + %call = tail call i32 @use_int(i32 noundef %0) + store i32 %call, ptr addrspace(271) %i, align 4 + ret void +} + +; Function Attrs: mustprogress uwtable +define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_trunc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str w1, [x0] +; CHECK-NEXT: b use_foo +entry: + %0 = addrspacecast ptr %i to ptr addrspace(270) + store ptr addrspace(270) %0, ptr %f, align 8 + tail call void @use_foo(ptr noundef nonnull %f) + ret void +} + +; Function Attrs: mustprogress uwtable +define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_noop1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str w1, [x0] +; CHECK-NEXT: b use_foo +entry: + store ptr addrspace(270) %i, ptr %f, align 8 + tail call void @use_foo(ptr noundef nonnull %f) + ret void +} + +; Function Attrs: mustprogress uwtable +define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_noop2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x1, [x0, #8] +; CHECK-NEXT: b use_foo +entry: + %p64 = getelementptr inbounds nuw i8, ptr %f, i64 8 + store ptr %i, ptr %p64, align 8 + tail call void @use_foo(ptr noundef %f) + ret void +} + +; Function Attrs: mustprogress uwtable +define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_null_arg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str wzr, [x0] +; CHECK-NEXT: b use_foo +entry: + store ptr addrspace(270) null, ptr %f, align 8 + tail call void @use_foo(ptr noundef nonnull %f) + ret void +} + +; Function Attrs: mustprogress uwtable +define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_unrecognized: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str w1, [x0] +; CHECK-NEXT: b use_foo +entry: + %0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270) + store ptr addrspace(270) %0, ptr %f, align 8 + tail call void @use_foo(ptr noundef nonnull %f) + ret void +} + +attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" } +attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }