Skip to content

Commit

Permalink
ARM64: Enable End-To-End ReadyToRun (R2R) Crossgen
Browse files Browse the repository at this point in the history
Fixes https://github.com/dotnet/coreclr/issues/4649
The immediate issues was NYI on genEmitHelperCalls. The initial
implementation for the missing part was enough to just crossgen System.dll.
But running tests revealed various issues in crossgened binaries (R2R).
Most common user/helper calls in R2R are represented as indirect calls
similar to interface call using virtual stub dispatch cell --
thunk/helper needs a indirect cell address to update the final target
address on the data location. `IsDelayLoadHelper` and `IsLazyHelper` belong to
this case.
Instead of passing such parameter, x64/x86 uses an encoding trick -- it
assumes the call is dispatched like `call [addr]`. So from the return
address, runtime could extract indirect cell address. Unfortunately this is not
an option for arm64 (actually arm as well but I haven't fixed it in this
change) where indirect call on memory is not encodable.
So, I made the following changes:

1. For the call requiring that needs to pass indirect cell address, I
tagged the call tree via `setR2RRelativeIndir`. Tried to be comprehensive,
but I may miss something. Currently, it includes a regular call and
various helpers for (virtual) load function pointer/static data access, etc.
Hopely we change JIT/EE interface somehow that gives us such explicit information.

2. Use the X11 to record indirect cell address for such call tree in
lower similar to VSD.

3. Fixed encodings `ZapIndirectHelperThunk`. In particular the immediate
value/offset for `ldr` should be scaled down 4 times since HW will scale
it 4 times.

4. Implement `genEmitHelperCalls` for indirect case. This is not the case requiring indirect
cell address. This is the case we inlined the indirect helper thunk for
the speed. I'm seeing the case for size opt helper call, we invoke a
direct call to such thunk which actually uses x12 to dispatch the final
target. Likewise, I used x12 for this expansion which seems a trash register that is not
overlapped with arugments with jit helpers like writer barriers.

With this change, I've tested various cases/scenraios locally.
Also I've verified all tests are passed against mscorlib.ni.dll and System.ni.dll.


Commit migrated from dotnet/coreclr@3165d8e
  • Loading branch information
kyulee1 committed May 18, 2016
1 parent 286c1be commit f7ff124
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 47 deletions.
48 changes: 20 additions & 28 deletions src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6730,39 +6730,31 @@ void CodeGen::genEmitHelperCall(unsigned helper,

if (addr == nullptr)
{
NYI("genEmitHelperCall indirect");
#if 0
assert(pAddr != nullptr);
if (genAddrCanBeEncodedAsPCRelOffset((size_t)pAddr))
// This is call to a runtime helper.
// adrp x, [reloc:rel page addr]
// add x, x, [reloc:page offset]
// ldr x, [x]
// br x

if (callTargetReg == REG_NA)
{
// generate call whose target is specified by PC-relative 32-bit offset.
callType = emitter::EC_FUNC_TOKEN_INDIR;
addr = pAddr;
// If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
// this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
}
else
{
// If this address cannot be encoded as PC-relative 32-bit offset, load it into REG_HELPER_CALL_TARGET
// and use register indirect addressing mode to make the call.
// mov reg, addr
// call [reg]
if (callTargetReg == REG_NA)
{
// If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
// this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
}

regMaskTP callTargetMask = genRegMask(callTargetReg);
regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
regMaskTP callTargetMask = genRegMask(callTargetReg);
regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);

// assert that all registers in callTargetMask are in the callKillSet
noway_assert((callTargetMask & callKillSet) == callTargetMask);
// assert that all registers in callTargetMask are in the callKillSet
noway_assert((callTargetMask & callKillSet) == callTargetMask);

callTarget = callTargetReg;
CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL);
callType = emitter::EC_INDIR_ARD;
}
#endif // 0
callTarget = callTargetReg;

// adrp + add with relocations will be emitted
getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
callType = emitter::EC_INDIR_R;
}

getEmitter()->emitIns_Call(callType,
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7047,6 +7047,8 @@ GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_C
info.compCompHnd->getReadyToRunHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken,
CORINFO_HELP_READYTORUN_DELEGATE_CTOR, &call->gtCall.gtEntryPoint);
#endif
// This is the case from GetDynamicHelperCell.
call->gtCall.setR2RRelativeIndir();
}
}
else
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4464,6 +4464,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree * tree)
ftreg |= RBM_VIRTUAL_STUB_PARAM;
}

#ifdef FEATURE_READYTORUN_COMPILER
#ifdef _TARGET_ARM64_
if (tree->gtCall.IsR2RRelativeIndir())
{
ftreg |= RBM_R2R_INDIRECT_PARAM;
}
#endif
#endif

// Normally function calls don't preserve caller save registers
// and thus are much more expensive.
// However a few function calls do preserve these registers
Expand Down Expand Up @@ -7403,6 +7412,10 @@ Compiler::gtDispNodeName(GenTree *tree)
gtfType = " ind";
else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
gtfType = " stub";
#ifdef FEATURE_READYTORUN_COMPILER
else if (tree->gtCall.IsR2RRelativeIndir())
gtfType = " r2r_ind";
#endif // FEATURE_READYTORUN_COMPILER
else if (tree->gtFlags & GTF_CALL_UNMANAGED)
{
char * gtfTypeBufWalk = gtfTypeBuf;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2762,6 +2762,8 @@ struct GenTreeCall final : public GenTree
// a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
// know when these flags are set.

#define GTF_CALL_M_R2R_REL_INDIRECT 0x2000 // GT_CALL -- ready to run call is indirected through a relative address

bool IsUnmanaged() { return (gtFlags & GTF_CALL_UNMANAGED) != 0; }
bool NeedsNullCheck() { return (gtFlags & GTF_CALL_NULLCHECK) != 0; }
bool CallerPop() { return (gtFlags & GTF_CALL_POP_ARGS) != 0; }
Expand Down Expand Up @@ -2870,6 +2872,15 @@ struct GenTreeCall final : public GenTree
bool IsSameThis() { return (gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) != 0; }
bool IsDelegateInvoke(){ return (gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) != 0; }
bool IsVirtualStubRelativeIndir() { return (gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0; }
#ifdef FEATURE_READYTORUN_COMPILER
bool IsR2RRelativeIndir() { return (gtCallMoreFlags & GTF_CALL_M_R2R_REL_INDIRECT) != 0; }
void setR2RRelativeIndir() {
if (gtEntryPoint.accessType == IAT_PVALUE)
{
gtCallMoreFlags |= GTF_CALL_M_R2R_REL_INDIRECT;
}
}
#endif // FEATURE_READYTORUN_COMPILER
bool IsVarargs() { return (gtCallMoreFlags & GTF_CALL_M_VARARGS) != 0; }

unsigned short gtCallMoreFlags; // in addition to gtFlags
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,9 @@ GenTreePtr Compiler::impReadyToRunHelperToTree(

op1->gtCall.gtEntryPoint = lookup;

// This is the case from GetDynamicHelperCell.
op1->gtCall.setR2RRelativeIndir();

return op1;
}
#endif
Expand Down Expand Up @@ -4519,6 +4522,8 @@ GenTreePtr Compiler::impImportLdvirtftn (GenTreePtr thisPtr,

call->gtEntryPoint = pCallInfo->codePointerLookup.constLookup;

// This is the case from GetDynamicHelperCell.
call->setR2RRelativeIndir();
return call;
}
#endif
Expand Down Expand Up @@ -5192,6 +5197,9 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN * pResolv
op1 = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF, callFlags);

op1->gtCall.gtEntryPoint = pFieldInfo->fieldLookup;

// This is the case from GetDynamicHelperCell.
op1->gtCall.setR2RRelativeIndir();
}
else
#endif
Expand Down Expand Up @@ -6028,6 +6036,9 @@ var_types Compiler::impImportCall (OPCODE opcode,
if (opts.IsReadyToRun())
{
call->gtCall.gtEntryPoint = callInfo->codePointerLookup.constLookup;

// This is the case from GetExternalMethodCell.
call->gtCall.setR2RRelativeIndir();
}
#endif
break;
Expand Down
18 changes: 17 additions & 1 deletion src/coreclr/src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2399,10 +2399,26 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
break;

case IAT_PVALUE:
{
// Non-virtual direct calls to addresses accessed by
// a single indirection.
result = Ind(AddrGen(addr));
GenTree* cellAddr = AddrGen(addr);
GenTree* indir = Ind(cellAddr);

#ifdef FEATURE_READYTORUN_COMPILER
#ifdef _TARGET_ARM64_
// For arm64, we dispatch code same as VSD using X11 for indirection cell address,
// which ZapIndirectHelperThunk expects.
if (call->IsR2RRelativeIndir())
{
cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
indir->gtRegNum = REG_JUMP_THUNK_PARAM;
}
#endif
#endif
result = indir;
break;
}

case IAT_PPVALUE:
// Non-virtual direct calls to addresses accessed by
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/src/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
#define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
#define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15)
#define REG_DEFAULT_HELPER_CALL_TARGET REG_R12

#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
#define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
Expand Down Expand Up @@ -1606,6 +1607,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_VIRTUAL_STUB_PARAM RBM_R11
#define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_R11

// R2R indirect call. Use the same registers as VSD
#define REG_R2R_INDIRECT_PARAM REG_R11
#define RBM_R2R_INDIRECT_PARAM RBM_R11
#define PREDICT_REG_RER_INDIRECT_PARAM PREDICT_REG_R11

// Registers used by PInvoke frame setup
#define REG_PINVOKE_FRAME REG_R8
#define RBM_PINVOKE_FRAME RBM_R8
Expand Down
26 changes: 8 additions & 18 deletions src/coreclr/src/zap/zapimport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2146,17 +2146,8 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
#elif defined(_TARGET_ARM64_)
if (IsDelayLoadHelper())
{
if (IsVSD())
{
// x11 contains indirection cell
// Do nothing x11 contains our first param
}
else
{
// mov x11, x12
*(DWORD*)p = 0xaa0c03eb;
p += 4;
}
// x11 contains indirection cell
// Do nothing x11 contains our first param

// movz x8, #index
DWORD index = GetSectionIndex();
Expand All @@ -2166,9 +2157,9 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)

// move Module* -> x9
// ldr x9, [PC+0x14]
*(DWORD*)p = 0x58000289;
*(DWORD*)p = 0x580000A9;
p += 4;

//ldr x9, [x9]
*(DWORD*)p = 0xf9400129;
p += 4;
Expand All @@ -2178,7 +2169,7 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
{
// Move Module* -> x1
// ldr x1, [PC+0x14]
*(DWORD*)p = 0x58000289;
*(DWORD*)p = 0x580000A1;
p += 4;

// ldr x1, [x1]
Expand All @@ -2187,10 +2178,8 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
}

// branch to helper

// mov x12, [helper]
// ldr x12, [PC+0x14]
*(DWORD*)p = 0x58000289;
*(DWORD*)p = 0x580000AC;
p += 4;

// ldr x12, [x12]
Expand All @@ -2199,12 +2188,13 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)

// br x12
*(DWORD *)p = 0xd61f0180;
p += 4;
p += 4;

// [Module*]
if (pImage != NULL)
pImage->WriteReloc(buffer, (int)(p - buffer), pImage->GetImportTable()->GetHelperImport(READYTORUN_HELPER_Module), 0, IMAGE_REL_BASED_PTR);
p += 8;

// [helper]
if (pImage != NULL)
pImage->WriteReloc(buffer, (int)(p - buffer), pImage->GetImportTable()->GetHelperImport(GetReadyToRunHelper()), 0, IMAGE_REL_BASED_PTR);
Expand Down

0 comments on commit f7ff124

Please sign in to comment.