Skip to content

Commit

Permalink
8338383: Implement JEP 491: Synchronize Virtual Threads without Pinning
Browse files Browse the repository at this point in the history
Co-authored-by: Patricio Chilano Mateo <pchilanomate@openjdk.org>
Co-authored-by: Alan Bateman <alanb@openjdk.org>
Co-authored-by: Andrew Haley <aph@openjdk.org>
Co-authored-by: Fei Yang <fyang@openjdk.org>
Co-authored-by: Coleen Phillimore <coleenp@openjdk.org>
Co-authored-by: Richard Reingruber <rrich@openjdk.org>
Co-authored-by: Martin Doerr <mdoerr@openjdk.org>
Reviewed-by: aboldtch, dholmes, coleenp, fbredberg, dlong, sspitsyn
  • Loading branch information
7 people committed Nov 12, 2024
1 parent 8a2a75e commit 78b8015
Show file tree
Hide file tree
Showing 246 changed files with 8,283 additions and 2,743 deletions.
7 changes: 3 additions & 4 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1648,8 +1648,8 @@ int MachCallRuntimeNode::ret_addr_offset() {
// for real runtime callouts it will be six instructions
// see aarch64_enc_java_to_runtime
// adr(rscratch2, retaddr)
// str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
// lea(rscratch1, RuntimeAddress(addr)
// stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
// blr(rscratch1)
CodeBlob *cb = CodeCache::find_blob(_entry_point);
if (cb) {
Expand Down Expand Up @@ -3696,14 +3696,13 @@ encode %{
__ post_call_nop();
} else {
Label retaddr;
// Make the anchor frame walkable
__ adr(rscratch2, retaddr);
__ str(rscratch2, Address(rthread, JavaThread::last_Java_pc_offset()));
__ lea(rscratch1, RuntimeAddress(entry));
// Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
__ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
__ blr(rscratch1);
__ bind(retaddr);
__ post_call_nop();
__ add(sp, sp, 2 * wordSize);
}
if (Compile::current()->max_vector_size() > 0) {
__ reinitialize_ptrue();
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
cbnz(hdr, slow_case);
// done
bind(done);
inc_held_monitor_count(rscratch1);
}
increment(Address(rthread, JavaThread::held_monitor_count_offset()));
return null_check_offset;
}

Expand Down Expand Up @@ -159,8 +159,8 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
}
// done
bind(done);
dec_held_monitor_count(rscratch1);
}
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
}


Expand Down
37 changes: 27 additions & 10 deletions src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,16 +160,15 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
}

enum return_state_t {
does_not_return, requires_return
does_not_return, requires_return, requires_pop_epilogue_return
};


// Implementation of StubFrame

class StubFrame: public StackObj {
private:
StubAssembler* _sasm;
bool _return_state;
return_state_t _return_state;

public:
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
Expand All @@ -183,8 +182,17 @@ void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
enter();
}

void StubAssembler::epilogue() {
leave();
void StubAssembler::epilogue(bool use_pop) {
// Avoid using a leave instruction when this frame may
// have been frozen, since the current value of rfp
// restored from the stub would be invalid. We still
// must restore the rfp value saved on enter though.
if (use_pop) {
ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
authenticate_return_address();
} else {
leave();
}
ret(lr);
}

Expand All @@ -203,10 +211,10 @@ void StubFrame::load_argument(int offset_in_words, Register reg) {
}

StubFrame::~StubFrame() {
if (_return_state == requires_return) {
__ epilogue();
} else {
if (_return_state == does_not_return) {
__ should_not_reach_here();
} else {
__ epilogue(_return_state == requires_pop_epilogue_return);
}
}

Expand Down Expand Up @@ -252,7 +260,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {

for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
Register r = as_Register(i);
if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) {
if (r == rthread || (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding())) {
int sp_offset = cpu_reg_save_offsets[i];
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg());
Expand Down Expand Up @@ -337,6 +345,15 @@ void Runtime1::initialize_pd() {
}
}

// return: offset in 64-bit words.
uint Runtime1::runtime_blob_current_thread_offset(frame f) {
CodeBlob* cb = f.cb();
assert(cb == Runtime1::blob_for(C1StubId::monitorenter_id) ||
cb == Runtime1::blob_for(C1StubId::monitorenter_nofpu_id), "must be");
assert(cb != nullptr && cb->is_runtime_stub(), "invalid frame");
int offset = cpu_reg_save_offsets[rthread->encoding()];
return offset / 2; // SP offsets are in halfwords
}

// target: the entry point of the method that creates and posts the exception oop
// has_argument: true if the exception needs arguments (passed in rscratch1 and rscratch2)
Expand Down Expand Up @@ -868,7 +885,7 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
// fall through
case C1StubId::monitorenter_id:
{
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
StubFrame f(sasm, "monitorenter", dont_gc_arguments, requires_pop_epilogue_return);
OopMap* map = save_live_registers(sasm, save_fpu_registers);

// Called with store_parameter and not C abi
Expand Down
31 changes: 16 additions & 15 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
Label count, no_count;

assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
assert_different_registers(oop, box, tmp, disp_hdr);
assert_different_registers(oop, box, tmp, disp_hdr, rscratch2);

// Load markWord from object into displaced_header.
ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
Expand Down Expand Up @@ -206,12 +206,10 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
// Handle existing monitor.
bind(object_has_monitor);

// The object's monitor m is unlocked iff m->owner == nullptr,
// otherwise m->owner may contain a thread or a stack address.
//
// Try to CAS m->owner from null to current thread.
// Try to CAS owner (no owner => current thread's _lock_id).
ldr(rscratch2, Address(rthread, JavaThread::lock_id_offset()));
add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset())-markWord::monitor_value));
cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
cmpxchg(tmp, zr, rscratch2, Assembler::xword, /*acquire*/ true,
/*release*/ true, /*weak*/ false, tmp3Reg); // Sets flags for result

// Store a non-null value into the box to avoid looking like a re-entrant
Expand All @@ -223,7 +221,7 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register

br(Assembler::EQ, cont); // CAS success means locking succeeded

cmp(tmp3Reg, rthread);
cmp(tmp3Reg, rscratch2);
br(Assembler::NE, cont); // Check for recursive locking

// Recursive lock case
Expand All @@ -236,7 +234,9 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register
br(Assembler::NE, no_count);

bind(count);
increment(Address(rthread, JavaThread::held_monitor_count_offset()));
if (LockingMode == LM_LEGACY) {
inc_held_monitor_count(rscratch1);
}

bind(no_count);
}
Expand Down Expand Up @@ -343,15 +343,17 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
br(Assembler::NE, no_count);

bind(count);
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
if (LockingMode == LM_LEGACY) {
dec_held_monitor_count(rscratch1);
}

bind(no_count);
}

void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register t1,
Register t2, Register t3) {
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
assert_different_registers(obj, box, t1, t2, t3);
assert_different_registers(obj, box, t1, t2, t3, rscratch2);

// Handle inflated monitor.
Label inflated;
Expand Down Expand Up @@ -467,13 +469,14 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
// Compute owner address.
lea(t2_owner_addr, owner_address);

// CAS owner (null => current thread).
cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
// Try to CAS owner (no owner => current thread's _lock_id).
ldr(rscratch2, Address(rthread, JavaThread::lock_id_offset()));
cmpxchg(t2_owner_addr, zr, rscratch2, Assembler::xword, /*acquire*/ true,
/*release*/ false, /*weak*/ false, t3_owner);
br(Assembler::EQ, monitor_locked);

// Check if recursive.
cmp(t3_owner, rthread);
cmp(t3_owner, rscratch2);
br(Assembler::NE, slow_path);

// Recursive.
Expand All @@ -486,7 +489,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist
}

bind(locked);
increment(Address(rthread, JavaThread::held_monitor_count_offset()));

#ifdef ASSERT
// Check that locked label is reached with Flags == EQ.
Expand Down Expand Up @@ -655,7 +657,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi
}

bind(unlocked);
decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
cmp(zr, zr); // Set Flags to EQ => fast path

#ifdef ASSERT
Expand Down
42 changes: 35 additions & 7 deletions src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -129,6 +129,11 @@ void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) {
}
}

inline void FreezeBase::prepare_freeze_interpreted_top_frame(frame& f) {
assert(f.interpreter_frame_last_sp() == nullptr, "should be null for top frame");
f.interpreter_frame_set_last_sp(f.unextended_sp());
}

inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) {
assert(hf.fp() == hf.unextended_sp() + (f.fp() - f.unextended_sp()), "");
assert((f.at(frame::interpreter_frame_last_sp_offset) != 0)
Expand All @@ -149,10 +154,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
// AbstractInterpreter::layout_activation

// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)

assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.fp() <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
}
Expand Down Expand Up @@ -213,7 +224,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// If caller is interpreted it already made room for the callee arguments
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
const int locals = hf.interpreter_frame_method()->max_locals();
intptr_t* frame_sp = caller.unextended_sp() - fsize;
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
if ((intptr_t)fp % frame::frame_alignment != 0) {
Expand All @@ -235,7 +245,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
int fsize = FKind::size(hf);
intptr_t* frame_sp = caller.unextended_sp() - fsize;
if (bottom || caller.is_interpreted_frame()) {
int argsize = hf.compiled_frame_stack_argsize();
int argsize = FKind::stack_argsize(hf);

fsize += argsize;
frame_sp -= argsize;
Expand All @@ -252,8 +262,8 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// we need to recreate a "real" frame pointer, pointing into the stack
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
} else {
fp = FKind::stub
? frame_sp + fsize - frame::sender_sp_offset // on AArch64, this value is used for the safepoint stub
fp = FKind::stub || FKind::native
? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
: *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
}
return frame(frame_sp, frame_sp, fp, hf.pc(), hf.cb(), hf.oop_map(), false); // TODO PERF : this computes deopt state; is it necessary?
Expand All @@ -277,6 +287,22 @@ inline void ThawBase::patch_pd(frame& f, const frame& caller) {
patch_callee_link(caller, caller.fp());
}

inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
intptr_t* fp = caller_sp - frame::sender_sp_offset;
patch_callee_link(f, fp);
}

inline intptr_t* ThawBase::push_cleanup_continuation() {
frame enterSpecial = new_entry_frame();
intptr_t* sp = enterSpecial.sp();

sp[-1] = (intptr_t)ContinuationEntry::cleanup_pc();
sp[-2] = (intptr_t)enterSpecial.fp();

log_develop_trace(continuations, preempt)("push_cleanup_continuation initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
return sp;
}

inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
// Make sure that last_sp is kept relativized.
assert((intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset) == f.unextended_sp(), "");
Expand All @@ -285,7 +311,9 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");

// Make sure that extended_sp is kept relativized.
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
}

#endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP
20 changes: 18 additions & 2 deletions src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -40,6 +40,22 @@ static inline intptr_t** link_address(const frame& f) {
: (intptr_t**)(f.unextended_sp() + f.cb()->frame_size() - frame::sender_sp_offset);
}

static inline void patch_return_pc_with_preempt_stub(frame& f) {
if (f.is_runtime_frame()) {
// Unlike x86 we don't know where in the callee frame the return pc is
// saved so we can't patch the return from the VM call back to Java.
// Instead, we will patch the return from the runtime stub back to the
// compiled method so that the target returns to the preempt cleanup stub.
intptr_t* caller_sp = f.sp() + f.cb()->frame_size();
caller_sp[-1] = (intptr_t)StubRoutines::cont_preempt_stub();
} else {
// The target will check for preemption once it returns to the interpreter
// or the native wrapper code and will manually jump to the preempt stub.
JavaThread *thread = JavaThread::current();
thread->set_preempt_alternate_return(StubRoutines::cont_preempt_stub());
}
}

inline int ContinuationHelper::frame_align_words(int size) {
#ifdef _LP64
return size & 1;
Expand Down Expand Up @@ -83,12 +99,12 @@ inline void ContinuationHelper::set_anchor_to_entry_pd(JavaFrameAnchor* anchor,
anchor->set_last_Java_fp(entry->entry_fp());
}

#ifdef ASSERT
inline void ContinuationHelper::set_anchor_pd(JavaFrameAnchor* anchor, intptr_t* sp) {
intptr_t* fp = *(intptr_t**)(sp - frame::sender_sp_offset);
anchor->set_last_Java_fp(fp);
}

#ifdef ASSERT
inline bool ContinuationHelper::Frame::assert_frame_laid_out(frame f) {
intptr_t* sp = f.sp();
address pc = ContinuationHelper::return_address_at(
Expand Down
Loading

1 comment on commit 78b8015

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.