Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make linux-riscv nativeaot port robust #112736

Open
wants to merge 46 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
1048e2e
Fix AOT exception handling on linux-riscv64
am11 Feb 13, 2025
88ce95b
Apply more suggestions
am11 Feb 13, 2025
59705b8
Fix unwind information for allocation helpers
filipnavara Feb 13, 2025
d116a11
Restore correct FP registers in StackFrameIterator::UnwindFuncletInvo…
filipnavara Feb 14, 2025
b179a7f
Fix register addressing in GcInfoDecoder::GetRegisterSlot
filipnavara Feb 14, 2025
1cb6ced
Fix and enable software write watch
filipnavara Feb 15, 2025
13953be
Fix COOP frames layout
filipnavara Feb 15, 2025
53a4ef0
Fix indirections, comparison and other assembly for write barrier checks
filipnavara Feb 15, 2025
c11dd62
Fix cross-build on Risc-V host
filipnavara Feb 15, 2025
37680d7
Disable R2R in stage2 build
am11 Feb 16, 2025
f861b44
Fix uninitialized value in RhpPInvoke
filipnavara Feb 16, 2025
acab644
Fix register in RhpPInvokeReturn
am11 Feb 16, 2025
de03a3e
Initialize FP/RA pointers when creating StackFrameIterator from nativ…
filipnavara Feb 16, 2025
eb4449b
Fix sign on PROLOG_SAVE_REG_PAIR_INDEXED
filipnavara Feb 16, 2025
541ddb1
Fix more of the logic in write barriers. Needs further audit.
filipnavara Feb 16, 2025
a0e1a83
Fix incorrect unwinding information generated for methods with frame …
filipnavara Feb 16, 2025
f48eae5
Fix emitting jump to bad slot helper
filipnavara Feb 16, 2025
7734924
Fix unwind info for universal transitions
filipnavara Feb 16, 2025
19c97fb
Fix comment
filipnavara Feb 17, 2025
c62c075
Fix flipped RA/FP in universal transition unwinding
filipnavara Feb 17, 2025
ea12bf4
WIP: Rewrite thunk code generation
filipnavara Feb 17, 2025
35a22b9
WIP: Fix RhCommonStub
filipnavara Feb 17, 2025
a719d7d
Fix layout of universal translation for unwinding
filipnavara Feb 18, 2025
212157a
WIP: Add memory barriers to native AOT asm helpers (https://github.co…
filipnavara Feb 18, 2025
9743390
WIP: Add fences to RhpCheckedLockCmpXchg/RhpCheckedXchg and fix atomi…
filipnavara Feb 18, 2025
20101ac
Preserve registers in INLINE_GET_TLS_VAR
am11 Feb 19, 2025
310e8f8
Preserve a7 as well
am11 Feb 19, 2025
5b058e4
WIP: Attempt to fix masks in IsInProlog/TrailingEpilogueInstructionsC…
filipnavara Feb 20, 2025
e5e96bb
WIP: Fix GC hijacking flags and possible return value trashing
filipnavara Feb 20, 2025
a5770dd
Fix return value trashing
filipnavara Feb 20, 2025
215e5f2
Actually fix the PTFF_THREAD_HIJACK_HI flag in correct file
filipnavara Feb 20, 2025
6515769
Merge dotnet/main into feature/nativeaot/riscv64-port
am11 Feb 20, 2025
1fd1716
Apply suggestions from code review
am11 Feb 20, 2025
a310c69
Update src/coreclr/nativeaot/Runtime/ThunksMapping.cpp
am11 Feb 20, 2025
a2f8422
Make style consistent
am11 Feb 20, 2025
0590ea5
Merge branch 'main' into feature/nativeaot/riscv64-port
am11 Feb 20, 2025
a9a6a47
Flip the sign of PROLOG_SAVE_REG_PAIR_INDEXED to match CoreCLR defini…
filipnavara Feb 21, 2025
7be7b76
Merge branch 'main' into feature/nativeaot/riscv64-port
am11 Feb 21, 2025
de257a3
Fix the last change to sign
filipnavara Feb 21, 2025
d5adb94
Save one mv instruction
filipnavara Feb 21, 2025
e34c90c
Relax FENCE in R2R helpers
filipnavara Feb 21, 2025
939c0a2
Apply suggestiosn from CR review
am11 Feb 21, 2025
2fd845a
Update managed defintion of TransitionBlock too
filipnavara Feb 21, 2025
21fc86a
Merge branch 'main' into feature/nativeaot/riscv64-port
am11 Feb 21, 2025
84504b5
Update TLSDESC comments
am11 Feb 21, 2025
0acb29d
Merge branch 'main' into feature/nativeaot/riscv64-port
am11 Feb 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions eng/native/configureplatform.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ if(CLR_CMAKE_HOST_OS STREQUAL linux)
set(CLR_CMAKE_HOST_UNIX_X86 1)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL aarch64)
set(CLR_CMAKE_HOST_UNIX_ARM64 1)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL riscv64)
set(CLR_CMAKE_HOST_UNIX_RISCV64 1)
else()
clr_unknown_arch()
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7223,7 +7223,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe

if (leftFrameSize != 0)
{
genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ false);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/nativeaot/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ endif()

add_definitions(-DFEATURE_BASICFREEZE)
add_definitions(-DFEATURE_CONSERVATIVE_GC)
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)
if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES)
endif()
Expand Down
21 changes: 12 additions & 9 deletions src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO
// preserved floating-point registers
//
int32_t preservedFpIndices[] = {8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
for (int i = 0; i < sizeof(preservedFpIndices) / sizeof(preservedFpIndices[0]); i++)
for (int i = 0; i < ARRAY_SIZE(preservedFpIndices); i++)
{
m_RegDisplay.F[preservedFpIndices[i]] = pCtx->F[preservedFpIndices[i]];
}
Expand Down Expand Up @@ -809,6 +809,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC
m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S9);
m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S10);
m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S11);
m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, Fp);
m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, Ra);

//
// scratch regs
Expand Down Expand Up @@ -1285,9 +1287,10 @@ void StackFrameIterator::UnwindFuncletInvokeThunk()
#elif defined(TARGET_RISCV64)
PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP);

for (int i = 0; i < 32; i++)
int32_t preservedFpIndices[] = {8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
for (int i = 0; i < ARRAY_SIZE(preservedFpIndices); i++)
{
m_RegDisplay.F[i] = *f++;
m_RegDisplay.F[preservedFpIndices[i]] = *f++;
}

SP = (PTR_uintptr_t)f;
Expand Down Expand Up @@ -1496,12 +1499,12 @@ struct UniversalTransitionStackFrame
// Conservative GC reporting must be applied to everything between the base of the
// ReturnBlock and the top of the StackPassedArgs.
private:
uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra)
uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp)
Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7)
uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes)
uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7)
uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size)
uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0B0 (0x08 bytes) (fp)
uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0A8 (0x08 bytes) (ra)
uint64_t m_fpArgRegs[8]; // ChildSP+010 CallerSP-0A0 (0x80 bytes) (fa0-fa7)
uintptr_t m_returnBlock[4]; // ChildSP+050 CallerSP-060 (0x20 bytes)
uintptr_t m_intArgRegs[8]; // ChildSP+070 CallerSP-040 (0x40 bytes) (a0-a7)
uintptr_t m_stackPassedArgs[1]; // ChildSP+0B0 CallerSP+000 (unknown size)

public:
PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); }
Expand Down
26 changes: 16 additions & 10 deletions src/coreclr/nativeaot/Runtime/ThunksMapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#elif TARGET_LOONGARCH64
#define THUNK_SIZE 16
#elif TARGET_RISCV64
#define THUNK_SIZE 12
#define THUNK_SIZE 20
#else
#define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0
#endif
Expand Down Expand Up @@ -259,21 +259,27 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping()

#elif defined(TARGET_RISCV64)

// auipc t0, %hi(delta) // Load upper immediate with address high bits
// ld t1, %lo(delta)(t0) // Load data from address in (t0 + lower immediate)
// jr t1 // Jump and don't link register
//auipc t1, hi(<delta PC to thunk data address>)
//addi t1, t1, lo(<delta PC to thunk data address>)
//auipc t0, hi(<delta to get to last word in data page>)
//ld t0, (t0)
//jalr zero, t0, 0

int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress);
uint32_t deltaHi = (delta + 0x800) & 0xfffff000;
uint32_t deltaLo = delta << (32 - 12);

*((uint32_t*)pCurrentThunkAddress) = 0x00000297 | deltaHi; // auipc
*((uint32_t*)pCurrentThunkAddress) = 0x00000317 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t1, delta[31:12]
pCurrentThunkAddress += 4;

*((uint32_t*)pCurrentThunkAddress) = 0x00030313 | ((delta & 0xFFF) << 20); // addi t1, t1, delta[11:0]
pCurrentThunkAddress += 4;

delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 8;
*((uint32_t*)pCurrentThunkAddress) = 0x00000297 | ((((delta + 0x800) & 0xFFFFF000) >> 12) << 12); // auipc t0, delta[31:12]
pCurrentThunkAddress += 4;

*((uint32_t*)pCurrentThunkAddress) = 0x0002B303 | deltaLo; // addi
*((uint32_t*)pCurrentThunkAddress) = 0x0002b283 | ((delta & 0xFFF) << 20); // ld t0, (delta[11:0])(t0)
pCurrentThunkAddress += 4;

*((uint32_t*)pCurrentThunkAddress) = 0x00030067; // jr
*((uint32_t*)pCurrentThunkAddress) = 0x00008282; // jalr zero, t0, 0
pCurrentThunkAddress += 4;

#else
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ LOCAL_LABEL(RhpNewFast_RarePath):

// a3: transition frame

// Preserve the MethodTable in s0
mv s0, a0
// Preserve the MethodTable in s2
mv s2, a0

li a2, 0 // numElements

Expand All @@ -96,7 +96,7 @@ LOCAL_LABEL(NewOutOfMemory):
// This is the OOM failure path. We are going to tail-call to a managed helper that will throw
// an out of memory exception that the caller of this allocator understands.

mv a0, s0 // MethodTable pointer
mv a0, s2 // MethodTable pointer
li a1, 0 // Indicate that we should throw OOM.

POP_COOP_PINVOKE_FRAME
Expand Down Expand Up @@ -243,7 +243,7 @@ LOCAL_LABEL(RhpNewArray_Rare):
PUSH_COOP_PINVOKE_FRAME a3

// Preserve data we will need later into the callee saved registers
mv s0, a0 // Preserve MethodTable
mv s2, a0 // Preserve MethodTable

mv a2, a1 // numElements
li a1, 0 // uFlags
Expand All @@ -264,7 +264,7 @@ LOCAL_LABEL(ArrayOutOfMemory):
// This is the OOM failure path. We are going to tail-call to a managed helper that will throw
// an out of memory exception that the caller of this allocator understands.

mv a0, s0 // MethodTable Pointer
mv a0, s2 // MethodTable Pointer
li a1, 0 // Indicate that we should throw OOM.

POP_COOP_PINVOKE_FRAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <unixasmmacros.inc>
#include "AsmOffsets.inc"

#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15))
#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7) & ~7)

#define HARDWARE_EXCEPTION 1
#define SOFTWARE_EXCEPTION 0
Expand Down
11 changes: 6 additions & 5 deletions src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

# Perform the rest of the PInvokeTransitionFrame initialization.
sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker)
sd \BITMASK, (OFFSETOF__PInvokeTransitionFrame__m_pThread + 8)(sp) # Save the register bitmask passed in by caller
sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) # Save the register bitmask passed in by caller

addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP
sd \trashReg, 0x78(sp) # Save caller's SP
Expand Down Expand Up @@ -84,7 +84,9 @@
.macro FixupHijackedCallstack

// a2 <- GetThread()
mv t1, a0
INLINE_GETTHREAD a2
mv a0, t1

// Fix the stack by restoring the original return address
ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2)
Expand All @@ -100,14 +102,13 @@
NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
FixupHijackedCallstack

PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3
andi t3, a3, 1 << TrapThreadsFlags_TrapThreads_Bit
PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, t3
andi t3, t3, 1 << TrapThreadsFlags_TrapThreads_Bit
bnez t3, LOCAL_LABEL(WaitForGC)
jr ra

LOCAL_LABEL(WaitForGC):
li t6, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + PTFF_THREAD_HIJACK_HI)
or t3, t3, t6
li t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1 + (PTFF_THREAD_HIJACK_HI << 32))
tail C_FUNC(RhpWaitForGC)
NESTED_END RhpGcProbeHijack

Expand Down
16 changes: 9 additions & 7 deletions src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,29 @@
//
// RhCommonStub
//
// INPUT: tp: thunk's data block
// INPUT: t1: thunk's data block
//
// TRASHES: t0, t1, tp
// TRASHES: t0, t1, t2
//
LEAF_ENTRY RhCommonStub, _TEXT
// There are arbitrary callers passing arguments with arbitrary signatures.
// Custom calling convention:
// tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)

mv t2, a0
INLINE_GET_TLS_VAR t0, C_FUNC(tls_thunkData)
mv a0, t2

// t0 = base address of TLS data
// tp = address of context cell in thunk's data
// t1 = address of context cell in thunk's data

// Load the thunk address from the data block and store it in the thread's static storage
ld t1, 0(t0) // Load thunk address into t1 from the TLS base address
sd t1, 0(t0) // Store the thunk address in thread static storage
ld t2, 0(t1) // Load thunk data into t2
sd t2, 0(t0) // Store the thunk address in thread static storage

// Load the target address from the data block and jump to it
ld t1, POINTER_SIZE(t0) // Load target address into t1 from the data block
jalr t1 // Jump to the target address in t1
ld t1, POINTER_SIZE(t1) // Load target address into t1 from the data block
jr t1 // Jump to the target address in t1

LEAF_END RhCommonStub, _TEXT

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler
sd fp, OFFSETOF__PInvokeTransitionFrame__m_FramePointer(a0)
sd ra, OFFSETOF__PInvokeTransitionFrame__m_RIP(a0)
sd t0, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0)
sd sp, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0)
li t0, PTFF_SAVE_SP
sd t0, OFFSETOF__PInvokeTransitionFrame__m_Flags(a0)

Expand All @@ -40,7 +40,7 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT

PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, t0

bnez t1, 0f // If TrapThreadsFlags_None is non-zero, branch
bnez t0, 0f // If TrapThreadsFlags_None is non-zero, branch
ret

0:
Expand Down
12 changes: 3 additions & 9 deletions src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
.global RhpFpTrashValues
#endif // TRASH_SAVED_ARGUMENT_REGISTERS

// Padding to account for the odd number of saved integer registers
#define ALIGNMENT_PADDING_SIZE (8)

#define COUNT_ARG_REGISTERS (8)
#define INTEGER_REGISTER_SIZE (8)
#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE)
Expand All @@ -31,7 +28,6 @@

// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
//
// ALIGNMENT_PADDING_SIZE
// ARGUMENT_REGISTERS_SIZE
// RETURN_BLOCK_SIZE
// FLOAT_ARG_REGISTERS_SIZE
Expand All @@ -41,7 +37,7 @@

#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE)

#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE)
#define STACK_SIZE (ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE)

#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE)
#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE)
Expand Down Expand Up @@ -91,9 +87,7 @@
NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler

# FP and RA registers
addi sp, sp, -STACK_SIZE
sd s0, 0x0(sp) # Save frame pointer
sd ra, 0x08(sp) # Save return address
PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, STACK_SIZE

# Floating point registers
fsd fa0, FLOAT_ARG_OFFSET(sp)
Expand All @@ -105,7 +99,7 @@
fsd fa6, FLOAT_ARG_OFFSET + 0x30(sp)
fsd fa7, FLOAT_ARG_OFFSET + 0x38(sp)

# Space for return buffer data (0x40 bytes)
# Space for return buffer data (0x20 bytes)

# Save argument registers
sd a0, ARGUMENT_REGISTERS_OFFSET(sp)
Expand Down
Loading
Loading