Skip to content

Commit

Permalink
[llvm][aarch64] Add support for the MS qualifiers __ptr32, __ptr64, _…
Browse files Browse the repository at this point in the history
…_sptr, __uptr
  • Loading branch information
dpaoliello committed Nov 5, 2024
1 parent c695a32 commit 8c4b383
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 11 deletions.
83 changes: 79 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);

setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);

// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
Expand Down Expand Up @@ -6722,6 +6725,37 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
ST->getBasePtr(), ST->getMemOperand());
}

static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue Src = Op.getOperand(0);
MVT DestVT = Op.getSimpleValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());

unsigned SrcAS = N->getSrcAddressSpace();
unsigned DestAS = N->getDestAddressSpace();
assert(SrcAS != DestAS &&
"addrspacecast must be between different address spaces");
assert(TLI.getTargetMachine().getPointerSize(SrcAS) !=
TLI.getTargetMachine().getPointerSize(DestAS) &&
"addrspacecast must be between different ptr sizes");

if (SrcAS == ARM64AS::PTR32_SPTR) {
return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
DAG.getTargetConstant(0, dl, DestVT));
} else if (SrcAS == ARM64AS::PTR32_UPTR) {
return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
DAG.getTargetConstant(0, dl, DestVT));
} else if ((DestAS == ARM64AS::PTR32_SPTR) ||
(DestAS == ARM64AS::PTR32_UPTR)) {
SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
return Trunc;
} else {
return Src;
}
}

// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
Expand Down Expand Up @@ -7375,6 +7409,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
case ISD::ADDRSPACECAST:
return LowerADDRSPACECAST(Op, DAG);
case ISD::SIGN_EXTEND_INREG: {
// Only custom lower when ExtraVT has a legal byte based element type.
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
Expand Down Expand Up @@ -23366,6 +23402,26 @@ static SDValue performLOADCombine(SDNode *N,
performTBISimplification(N->getOperand(1), DCI, DAG);

LoadSDNode *LD = cast<LoadSDNode>(N);
EVT RegVT = LD->getValueType(0);
EVT MemVT = LD->getMemoryVT();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(LD);

// Cast ptr32 and ptr64 pointers to the default address space before a load.
unsigned AddrSpace = LD->getAddressSpace();
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
AddrSpace == ARM64AS::PTR32_UPTR) {
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
if (PtrVT != LD->getBasePtr().getSimpleValueType()) {
SDValue Cast =
DAG.getAddrSpaceCast(DL, PtrVT, LD->getBasePtr(), AddrSpace, 0);
return DAG.getExtLoad(LD->getExtensionType(), DL, RegVT, LD->getChain(),
Cast, LD->getPointerInfo(), MemVT,
LD->getOriginalAlign(),
LD->getMemOperand()->getFlags());
}
}

if (LD->isVolatile() || !Subtarget->isLittleEndian())
return SDValue(N, 0);

Expand All @@ -23375,13 +23431,11 @@ static SDValue performLOADCombine(SDNode *N,
if (!LD->isNonTemporal())
return SDValue(N, 0);

EVT MemVT = LD->getMemoryVT();
if (MemVT.isScalableVector() || MemVT.getSizeInBits() <= 256 ||
MemVT.getSizeInBits() % 256 == 0 ||
256 % MemVT.getScalarSizeInBits() != 0)
return SDValue(N, 0);

SDLoc DL(LD);
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
SDNodeFlags Flags = LD->getFlags();
Expand Down Expand Up @@ -23641,12 +23695,28 @@ static SDValue performSTORECombine(SDNode *N,
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
EVT ValueVT = Value.getValueType();
EVT MemVT = ST->getMemoryVT();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(ST);

auto hasValidElementTypeForFPTruncStore = [](EVT VT) {
EVT EltVT = VT.getVectorElementType();
return EltVT == MVT::f32 || EltVT == MVT::f64;
};

// Cast ptr32 and ptr64 pointers to the default address space before a store.
unsigned AddrSpace = ST->getAddressSpace();
if (AddrSpace == ARM64AS::PTR64 || AddrSpace == ARM64AS::PTR32_SPTR ||
AddrSpace == ARM64AS::PTR32_UPTR) {
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
if (PtrVT != Ptr.getSimpleValueType()) {
SDValue Cast = DAG.getAddrSpaceCast(DL, PtrVT, Ptr, AddrSpace, 0);
return DAG.getStore(Chain, DL, Value, Cast, ST->getPointerInfo(),
ST->getOriginalAlign(),
ST->getMemOperand()->getFlags(), ST->getAAInfo());
}
}

if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
return Res;

Expand All @@ -23660,8 +23730,8 @@ static SDValue performSTORECombine(SDNode *N,
ValueVT.isFixedLengthVector() &&
ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() &&
hasValidElementTypeForFPTruncStore(Value.getOperand(0).getValueType()))
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemoryVT(), ST->getMemOperand());
return DAG.getTruncStore(Chain, DL, Value.getOperand(0), Ptr, MemVT,
ST->getMemOperand());

if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
Expand Down Expand Up @@ -26988,6 +27058,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
return;
}
case ISD::ADDRSPACECAST: {
SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
Results.push_back(V);
return;
}
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
MemSDNode *LoadNode = cast<MemSDNode>(N);
Expand Down
22 changes: 17 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,10 @@ const unsigned StackProbeMaxLoopUnroll = 4;

} // namespace AArch64

namespace ARM64AS {
enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
}

class AArch64Subtarget;

class AArch64TargetLowering : public TargetLowering {
Expand Down Expand Up @@ -585,11 +589,19 @@ class AArch64TargetLowering : public TargetLowering {
unsigned Depth) const override;

MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
// *DAG* representation of pointers will always be 64-bits. They will be
// truncated and extended when transferred to memory, but the 64-bit DAG
// allows us to use AArch64's addressing modes much more easily.
return MVT::getIntegerVT(64);
if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
// These are 32-bit pointers created using the `__ptr32` extension or
// similar. They are handled by marking them as being in a different
// address space, and will be extended to 64-bits when used as the target
// of a load or store operation, or cast to a 64-bit pointer type.
return MVT::i32;
} else {
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
// *DAG* representation of pointers will always be 64-bits. They will be
// truncated and extended when transferred to memory, but the 64-bit DAG
// allows us to use AArch64's addressing modes much more easily.
return MVT::i64;
}
}

bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ class AArch64TargetMachine : public LLVMTargetMachine {

/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
return (getPointerSize(SrcAS) == getPointerSize(DestAS));
}

private:
Expand Down
180 changes: 180 additions & 0 deletions llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s

; Source to regenerate:
; struct Foo {
; int * __ptr32 p32;
; int * __ptr64 p64;
; __attribute__((address_space(9))) int *p_other;
; };
; extern "C" void use_foo(Foo *f);
; extern "C" int use_int(int i);
; extern "C" void test_sign_ext(Foo *f, int * __ptr32 __sptr i) {
; f->p64 = i;
; use_foo(f);
; }
; extern "C" void test_sign_ext_store_load(int * __ptr32 __sptr i) {
; *i = use_int(*i);
; }
; extern "C" void test_zero_ext(Foo *f, int * __ptr32 __uptr i) {
; f->p64 = i;
; use_foo(f);
; }
; extern "C" void test_zero_ext_store_load(int * __ptr32 __uptr i) {
; *i = use_int(*i);
; }
; extern "C" void test_trunc(Foo *f, int * __ptr64 i) {
; f->p32 = i;
; use_foo(f);
; }
; extern "C" void test_noop1(Foo *f, int * __ptr32 i) {
; f->p32 = i;
; use_foo(f);
; }
; extern "C" void test_noop2(Foo *f, int * __ptr64 i) {
; f->p64 = i;
; use_foo(f);
; }
; extern "C" void test_null_arg(Foo *f, int * __ptr32 i) {
; test_noop1(f, 0);
; }
; extern "C" void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) {
; f->p32 = (int * __ptr32)i;
; use_foo(f);
; }
;
; $ clang --target=aarch64-windows-msvc -fms-extensions -O2 -S -emit-llvm t.cpp

target datalayout = "e-m:w-p:64:64-i32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-windows-msvc"

; Function Attrs: mustprogress uwtable
define dso_local void @test_sign_ext(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_sign_ext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sxtw x8, w1
; CHECK-NEXT: str x8, [x0, #8]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr addrspace(270) %i to ptr
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
store ptr %0, ptr %p64, align 8
tail call void @use_foo(ptr noundef %f)
ret void
}

declare dso_local void @use_foo(ptr noundef) local_unnamed_addr #1

; Function Attrs: mustprogress uwtable
define dso_local void @test_sign_ext_store_load(ptr addrspace(270) nocapture noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_sign_ext_store_load:
; CHECK: // %bb.0: // %entry
; CHECK: sxtw x19, w0
; CHECK-NEXT: ldr w0, [x19]
; CHECK-NEXT: bl use_int
; CHECK-NEXT: str w0, [x19]
entry:
%0 = load i32, ptr addrspace(270) %i, align 4
%call = tail call i32 @use_int(i32 noundef %0)
store i32 %call, ptr addrspace(270) %i, align 4
ret void
}

declare dso_local i32 @use_int(i32 noundef) local_unnamed_addr #1

; Function Attrs: mustprogress uwtable
define dso_local void @test_zero_ext(ptr noundef %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_zero_ext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: str x8, [x0, #8]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr addrspace(271) %i to ptr
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
store ptr %0, ptr %p64, align 8
tail call void @use_foo(ptr noundef %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_zero_ext_store_load(ptr addrspace(271) nocapture noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_zero_ext_store_load:
; CHECK: // %bb.0: // %entry
; CHECK: mov w19, w0
; CHECK-NEXT: ldr w0, [x19]
; CHECK-NEXT: bl use_int
; CHECK-NEXT: str w0, [x19]
entry:
%0 = load i32, ptr addrspace(271) %i, align 4
%call = tail call i32 @use_int(i32 noundef %0)
store i32 %call, ptr addrspace(271) %i, align 4
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_trunc:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str w1, [x0]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr %i to ptr addrspace(270)
store ptr addrspace(270) %0, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_noop1(ptr noundef %f, ptr addrspace(270) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_noop1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str w1, [x0]
; CHECK-NEXT: b use_foo
entry:
store ptr addrspace(270) %i, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_noop2(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_noop2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x1, [x0, #8]
; CHECK-NEXT: b use_foo
entry:
%p64 = getelementptr inbounds nuw i8, ptr %f, i64 8
store ptr %i, ptr %p64, align 8
tail call void @use_foo(ptr noundef %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_null_arg(ptr noundef %f, ptr addrspace(270) nocapture noundef readnone %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_null_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str wzr, [x0]
; CHECK-NEXT: b use_foo
entry:
store ptr addrspace(270) null, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

; Function Attrs: mustprogress uwtable
define dso_local void @test_unrecognized(ptr noundef %f, ptr addrspace(14) noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: test_unrecognized:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str w1, [x0]
; CHECK-NEXT: b use_foo
entry:
%0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270)
store ptr addrspace(270) %0, ptr %f, align 8
tail call void @use_foo(ptr noundef nonnull %f)
ret void
}

attributes #0 = { mustprogress uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }

0 comments on commit 8c4b383

Please sign in to comment.