Skip to content

Commit

Permalink
release carrier on Object.wait
Browse files Browse the repository at this point in the history
  • Loading branch information
pchilano committed May 23, 2024
1 parent 3ead0d8 commit 756743d
Show file tree
Hide file tree
Showing 71 changed files with 1,580 additions and 548 deletions.
65 changes: 46 additions & 19 deletions src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
// AbstractInterpreter::layout_activation

// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)

assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
assert(hf.fp() <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
}
Expand Down Expand Up @@ -219,7 +225,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// If caller is interpreted it already made room for the callee arguments
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
const int locals = hf.interpreter_frame_method()->max_locals();
intptr_t* frame_sp = caller.unextended_sp() - fsize;
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
if ((intptr_t)fp % frame::frame_alignment != 0) {
Expand Down Expand Up @@ -258,7 +263,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
// we need to recreate a "real" frame pointer, pointing into the stack
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
} else {
fp = FKind::stub
fp = FKind::stub || FKind::native
? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
: *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
}
Expand Down Expand Up @@ -287,10 +292,32 @@ inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
Unimplemented();
}

inline intptr_t* ThawBase::push_preempt_rerun_adapter(frame top, bool is_interpreted_frame) {
inline void ThawBase::fix_native_return_pc_pd(frame& top) {
// Nothing to do since the last pc saved before making the call to
// JVM_MonitorWait() was already set to the correct resume pc. Just
// do some sanity check.
#ifdef ASSERT
Method* method = top.is_interpreted_frame() ? top.interpreter_frame_method() : CodeCache::find_blob(top.pc())->as_nmethod()->method();
assert(method->is_object_wait0(), "");
#endif
}

inline intptr_t* ThawBase::push_resume_adapter(frame& top) {
intptr_t* sp = top.sp();
CodeBlob* cb = top.cb();
if (!is_interpreted_frame && cb->frame_size() == 2) {

#ifdef ASSERT
RegisterMap map(JavaThread::current(),
RegisterMap::UpdateMap::skip,
RegisterMap::ProcessFrames::skip,
RegisterMap::WalkContinuation::skip);
frame caller = top.sender(&map);
intptr_t link_addr = (intptr_t)ContinuationHelper::Frame::callee_link_address(caller);
assert(sp[-2] == link_addr, "wrong link address: " INTPTR_FORMAT " != " INTPTR_FORMAT, sp[-2], link_addr);
#endif

bool interpreted = top.is_interpreted_frame();
if (!interpreted && cb->frame_size() == 2) {
// C2 runtime stub case. For aarch64 the real size of the c2 runtime stub is 2 words bigger
// than what we think, i.e. size is 4. This is because the _last_Java_sp is not set to the
// sp right before making the call to the VM, but rather it is artificially set 2 words above
Expand All @@ -306,21 +333,19 @@ inline intptr_t* ThawBase::push_preempt_rerun_adapter(frame top, bool is_interpr
}

intptr_t* fp = sp - frame::sender_sp_offset;
address pc = is_interpreted_frame ? Interpreter::cont_preempt_rerun_interpreter_adapter()
: StubRoutines::cont_preempt_rerun_compiler_adapter();
address pc = interpreted ? Interpreter::cont_resume_interpreter_adapter()
: StubRoutines::cont_resume_compiler_adapter();

sp -= frame::metadata_words;
*(address*)(sp - frame::sender_sp_ret_address_offset()) = pc;
*(intptr_t**)(sp - frame::sender_sp_offset) = fp;

log_develop_trace(continuations, preempt)("push_preempt_rerun_%s_adapter() initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT " fp: " INTPTR_FORMAT,
is_interpreted_frame ? "interpreter" : "safepointblob", p2i(sp + frame::metadata_words), p2i(sp), p2i(fp));
log_develop_trace(continuations, preempt)("push_resume_%s_adapter() initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT " fp: " INTPTR_FORMAT,
interpreted ? "interpreter" : "compiler", p2i(sp + frame::metadata_words), p2i(sp), p2i(fp));
return sp;
}

inline intptr_t* ThawBase::push_preempt_monitorenter_redo(stackChunkOop chunk) {

// fprintf(stderr, "push_preempt_monitorenter_redo\n");
inline intptr_t* ThawBase::push_resume_monitor_operation(stackChunkOop chunk) {
frame enterSpecial = new_entry_frame();
intptr_t* sp = enterSpecial.sp();

Expand All @@ -329,15 +354,15 @@ inline intptr_t* ThawBase::push_preempt_monitorenter_redo(stackChunkOop chunk) {
sp[1] = (intptr_t)StubRoutines::cont_returnBarrier();
sp[0] = (intptr_t)enterSpecial.fp();

// Now push the ObjectMonitor*
// Now push the ObjectWaiter*
sp -= frame::metadata_words;
sp[1] = (intptr_t)chunk->objectMonitor(); // alignment
sp[0] = (intptr_t)chunk->objectMonitor();
sp[1] = (intptr_t)chunk->object_waiter(); // alignment
sp[0] = (intptr_t)chunk->object_waiter();

// Finally arrange to return to the monitorenter_redo stub
sp[-1] = (intptr_t)StubRoutines::cont_preempt_monitorenter_redo();
// Finally arrange to return to the resume_monitor_operation stub
sp[-1] = (intptr_t)StubRoutines::cont_resume_monitor_operation();
sp[-2] = (intptr_t)enterSpecial.fp();
log_develop_trace(continuations, preempt)("push_preempt_monitorenter_redo initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
log_develop_trace(continuations, preempt)("push_resume_monitor_operation initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
return sp;
}

Expand All @@ -349,7 +374,9 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");

// Make sure that extended_sp is kept relativized.
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
}

#endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP
17 changes: 7 additions & 10 deletions src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,18 @@ static inline intptr_t** link_address(const frame& f) {
}

static inline void patch_return_pc_with_preempt_stub(frame& f) {
// Unlike x86 we don't know where in the callee frame the return pc is
// saved so we can't patch the return from the VM call back to Java. If
// the target is coming from compiled code we will patch the return from
// the safepoint handler blob back to the compiled method instead. If
// it's coming from the interpreter, the target will check for preemption
// once it returns to the interpreter and will manually jump to the
// preempt stub.
if (!f.is_interpreted_frame()) {
assert(f.is_runtime_frame(), "invariant");
if (f.is_runtime_frame()) {
// Unlike x86 we don't know where in the callee frame the return pc is
// saved so we can't patch the return from the VM call back to Java.
// Instead, we will patch the return from the runtime stub back to the
// compiled method so that the target returns to the preempt cleanup stub.
intptr_t* caller_sp = f.sp() + f.cb()->frame_size();
caller_sp[-1] = (intptr_t)StubRoutines::cont_preempt_stub();
} else {
// The target will check for preemption once it returns to the interpreter
// or the native wrapper code and will manually jump to the preempt stub.
JavaThread *thread = JavaThread::current();
thread->set_preempt_alternate_return(StubRoutines::cont_preempt_stub());
thread->set_preempt_alternate_return_sp((address)f.sp());
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/cpu/aarch64/frame_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
sender_sp_offset = 2,

// Interpreter frames
interpreter_frame_oop_temp_offset = 3, // for native calls only
interpreter_frame_result_handler_offset = 3, // for native calls only
interpreter_frame_oop_temp_offset = 2, // for native calls only

interpreter_frame_sender_sp_offset = -1,
// outgoing sp before a call to an invoked method
Expand Down
5 changes: 0 additions & 5 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1965,13 +1965,8 @@ void MacroAssembler::call_VM_leaf_base(address entry_point,
if (entry_point == CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter) ||
entry_point == CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj)) {
ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
ldr(rscratch2, Address(rthread, in_bytes(JavaThread::preempt_alternate_return_offset()) + wordSize));
cbz(rscratch1, not_preempted);
mov(r4, sp); // r4 is clobbered by VM calls, so free here
cmp(rscratch2, r4);
br(LT, not_preempted);
str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
str(zr, Address(rthread, in_bytes(JavaThread::preempt_alternate_return_offset()) + wordSize));
br(rscratch1);
}

Expand Down
24 changes: 16 additions & 8 deletions src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1754,9 +1754,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Change state to native (we save the return address in the thread, since it might not
// be pushed on the stack when we do a stack traversal).
// We use the same pc/oopMap repeatedly when we call out

Label native_return;
__ set_last_Java_frame(sp, noreg, native_return, rscratch1);
Label resume_pc;
__ set_last_Java_frame(sp, noreg, resume_pc, rscratch1);

Label dtrace_method_entry, dtrace_method_entry_done;
{
Expand Down Expand Up @@ -1863,11 +1862,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

__ rt_call(native_func);

__ bind(native_return);

intptr_t return_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(return_pc - start, map);

// Verify or restore cpu control state after JNI call
__ restore_cpu_control_state_after_jni(rscratch1, rscratch2);

Expand Down Expand Up @@ -1934,6 +1928,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ stlrw(rscratch1, rscratch2);
__ bind(after_transition);

// Check preemption for Object.wait()
if (method->is_object_wait0()) {
Label not_preempted;
__ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ cbz(rscratch1, not_preempted);
__ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ br(rscratch1);
__ bind(not_preempted);
}
__ bind(resume_pc);

intptr_t the_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(the_pc - start, map);

Label reguard;
Label reguard_done;
__ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
f.interpreted_frame_oop_map(&mask);
return mask.num_oops()
+ 1 // for the mirror oop
+ (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
+ pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
(intptr_t*)f.interpreter_frame_monitor_end())/BasicObjectLock::size();
}
Expand Down
24 changes: 11 additions & 13 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7158,18 +7158,16 @@ class StubGenerator: public StubCodeGenerator {
__ mov(sp, rscratch2);

Label preemption_cancelled;
// FIXME: Whose responsibility is it to clear this flag?
__ ldrb(rscratch1, Address(rthread, JavaThread::preemption_cancelled_offset()));
__ cbnz(rscratch1, preemption_cancelled);

//__ trace("Remove enterSpecial frame from the stack and return to Continuation.run()");
// Remove enterSpecial frame from the stack and return to Continuation.run()
SharedRuntime::continuation_enter_cleanup(_masm);
__ leave();
__ ret(lr);

__ bind(preemption_cancelled);
//__ trace("preemption_cancelled");
__ strb(zr, Address(rthread, JavaThread::preemption_cancelled_offset()));
__ lea(rfp, Address(sp, checked_cast<int32_t>(ContinuationEntry::size())));
__ lea(rscratch1, ExternalAddress((address)&ContinuationEntry::_thaw_call_pc));
__ ldr(rscratch1, Address(rscratch1));
Expand All @@ -7178,9 +7176,9 @@ class StubGenerator: public StubCodeGenerator {
return start;
}

address generate_cont_preempt_rerun_compiler_adapter() {
address generate_cont_resume_compiler_adapter() {
if (!Continuations::enabled()) return nullptr;
StubCodeMark mark(this, "StubRoutines", "Continuation preempt safepoint blob adapter");
StubCodeMark mark(this, "StubRoutines", "Continuation resume compiler adapter");
address start = __ pc();

// The safepoint blob handler expects that r20, being a callee saved register, will be preserved
Expand All @@ -7196,25 +7194,25 @@ class StubGenerator: public StubCodeGenerator {
return start;
}

address generate_cont_preempt_monitorenter_redo() {
address generate_cont_resume_monitor_operation() {
if (!Continuations::enabled()) return nullptr;
StubCodeMark mark(this, "StubRoutines","Continuation monitorenter redo stub");
StubCodeMark mark(this, "StubRoutines","Continuation resume monitor operation");
address start = __ pc();

const Register mon_reg = c_rarg1;
__ ldr(mon_reg, __ post(sp, 2 * wordSize));
const Register waiter_reg = c_rarg1;
__ ldr(waiter_reg, __ post(sp, 2 * wordSize));

#ifdef ASSERT
{ Label L;
__ cbnz(mon_reg, L);
__ cbnz(waiter_reg, L);
__ stop("ObjectMonitor to use is null");
__ bind(L);
}
#endif // ASSERT

__ set_last_Java_frame(sp, rfp, lr, rscratch1);
__ mov(c_rarg0, rthread);
__ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::redo_monitorenter));
__ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::resume_monitor_operation));
__ reset_last_Java_frame(true);

Label failAcquire;
Expand Down Expand Up @@ -8510,8 +8508,8 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier();
StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception();
StubRoutines::_cont_preempt_stub = generate_cont_preempt_stub();
StubRoutines::_cont_preempt_monitorenter_redo = generate_cont_preempt_monitorenter_redo();
StubRoutines::_cont_preempt_rerun_compiler_adapter = generate_cont_preempt_rerun_compiler_adapter();
StubRoutines::_cont_resume_monitor_operation = generate_cont_resume_monitor_operation();
StubRoutines::_cont_resume_compiler_adapter = generate_cont_resume_compiler_adapter();

JFR_ONLY(generate_jfr_stubs();)
}
Expand Down
23 changes: 19 additions & 4 deletions src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(
return entry;
}

address TemplateInterpreterGenerator::generate_cont_preempt_rerun_interpreter_adapter() {
address TemplateInterpreterGenerator::generate_cont_resume_interpreter_adapter() {
if (!Continuations::enabled()) return nullptr;
address start = __ pc();

Expand Down Expand Up @@ -1354,6 +1354,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// result handler is in r0
// set result handler
__ mov(result_handler, r0);
__ str(r0, Address(rfp, frame::interpreter_frame_result_handler_offset * wordSize));

// pass mirror handle if static call
{
Label L;
Expand Down Expand Up @@ -1392,8 +1394,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// Set the last Java PC in the frame anchor to be the return address from
// the call to the native method: this will allow the debugger to
// generate an accurate stack trace.
Label native_return;
__ set_last_Java_frame(esp, rfp, native_return, rscratch1);
Label resume_pc;
__ set_last_Java_frame(esp, rfp, resume_pc, rscratch1);

// change thread state
#ifdef ASSERT
Expand All @@ -1414,7 +1416,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {

// Call the native method.
__ blr(r10);
__ bind(native_return);

__ get_method(rmethod);
// result potentially in r0 or v0

Expand Down Expand Up @@ -1479,6 +1481,18 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);

// Check preemption for Object.wait()
Label not_preempted;
__ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ cbz(rscratch1, not_preempted);
__ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
__ br(rscratch1);
__ bind(resume_pc);
// On resume we need to set up stack as expected
__ push(dtos);
__ push(ltos);
__ bind(not_preempted);

// reset_last_Java_frame
__ reset_last_Java_frame(true);

Expand All @@ -1497,6 +1511,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
{
Label no_oop;
__ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
__ ldr(result_handler, Address(rfp, frame::interpreter_frame_result_handler_offset*wordSize));
__ cmp(t, result_handler);
__ br(Assembler::NE, no_oop);
// Unbox oop result, e.g. JNIHandles::resolve result.
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/arm/continuationFreezeThaw_arm.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
Unimplemented();
}

inline intptr_t* ThawBase::push_preempt_rerun_adapter(frame top, bool is_interpreted_frame) {
inline intptr_t* ThawBase::push_resume_adapter(frame& top, bool is_interpreted_frame) {
Unimplemented();
return nullptr;
}

inline intptr_t* ThawBase::push_preempt_monitorenter_redo(stackChunkOop chunk) {
inline intptr_t* ThawBase::push_resume_monitor_operation(stackChunkOop chunk) {
Unimplemented();
return nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
return entry;
}

address TemplateInterpreterGenerator::generate_cont_preempt_rerun_interpreter_adapter() {
address TemplateInterpreterGenerator::generate_cont_resume_interpreter_adapter() {
return nullptr;
}

Expand Down
Loading

0 comments on commit 756743d

Please sign in to comment.