Skip to content

Commit

Permalink
[GC] Add option G1BarrierSimple to use simple g1 post barrier
Browse files Browse the repository at this point in the history
Summary: Provide option G1BarrierSimple to use simple G1 post barrier for better mutator performance

Testing: CI pipeline

Reviewers: yude, yifeng

Issue: dragonwell-project#56
  • Loading branch information
mmyxym committed Jun 19, 2024
1 parent 976b298 commit 693ec2e
Show file tree
Hide file tree
Showing 13 changed files with 190 additions and 7 deletions.
40 changes: 40 additions & 0 deletions src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm

void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register start, Register count, Register scratch, RegSet saved_regs) {
if (G1BarrierSimple) {
Label L_loop, L_done;
const Register end = count;

__ cbz(count, L_done); // zero count - nothing to do

__ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop
__ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
__ lsr(start, start, CardTable::card_shift());
__ lsr(end, end, CardTable::card_shift());
__ sub(count, end, start); // number of bytes to copy

__ load_byte_map_base(scratch);
__ add(start, start, scratch);
__ bind(L_loop);
assert((int)CardTable::dirty_card_val() == 0, "must be 0");
__ strb(zr, Address(start, count));
__ subs(count, count, 1);
__ br(Assembler::GE, L_loop);
__ bind(L_done);
return;
}

__ push(saved_regs, sp);
assert_different_registers(start, count, scratch);
assert_different_registers(c_rarg0, count);
Expand Down Expand Up @@ -204,6 +227,16 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Label done;
Label runtime;

if (G1BarrierSimple) {
const Register card_addr = tmp1;
__ lsr(card_addr, store_addr, CardTable::card_shift());
__ load_byte_map_base(tmp2);
__ add(card_addr, card_addr, tmp2);
__ strb(zr, Address(card_addr));
__ bind(done);
return;
}

// Does store cross heap regions?

__ eor(tmp1, store_addr, new_val);
Expand Down Expand Up @@ -444,6 +477,13 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
__ load_parameter(0, card_offset);
__ lsr(card_offset, card_offset, CardTable::card_shift());
__ load_byte_map_base(byte_map_base);

if (G1BarrierSimple) {
__ strb(zr, Address(byte_map_base, card_offset));
__ bind(done);
__ epilogue();
return;
}
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);
Expand Down
47 changes: 47 additions & 0 deletions src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@

#define __ masm->

#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)

void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count) {
bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
Expand Down Expand Up @@ -98,6 +100,36 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm

void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
#ifdef _LP64
if (G1BarrierSimple) {
CardTableBarrierSet* ct =
barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
intptr_t disp = (intptr_t) ct->card_table()->byte_map_base();

Label L_loop, L_done;
const Register end = count;
assert_different_registers(addr, end);

__ testl(count, count);
__ jcc(Assembler::zero, L_done); // zero count - nothing to do

__ leaq(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size
__ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
__ shrptr(addr, CardTable::card_shift());
__ shrptr(end, CardTable::card_shift());
__ subptr(end, addr); // end --> cards count

__ mov64(tmp, disp);
__ addptr(addr, tmp);
__ bind(L_loop);
__ movb(Address(addr, count, Address::times_1), G1CardTable::dirty_card_val());
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_loop);
__ bind(L_done);
return;
}
#endif

__ push_call_clobbered_registers(false /* save_fpu */);
#ifdef _LP64
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
Expand Down Expand Up @@ -308,6 +340,12 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
__ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
__ addptr(card_addr, cardtable);

if (G1BarrierSimple) {
__ movb(Address(card_addr, 0), G1CardTable::dirty_card_val());
__ bind(done);
return;
}

__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

Expand Down Expand Up @@ -542,6 +580,15 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*

NOT_LP64(__ get_thread(thread);)

if (G1BarrierSimple) {
__ movb(Address(card_addr, 0), G1CardTable::dirty_card_val());
__ bind(done);
__ pop(rcx);
__ pop(rax);
__ epilogue();
return;
}

__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

Expand Down
11 changes: 11 additions & 0 deletions src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,13 @@ void G1BarrierSetC2::post_barrier(GraphKit* kit,
// Combine card table base and card offset
Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset );

if (G1BarrierSimple) {
__ store(__ ctrl(), card_adr, dirty_card, T_BYTE, Compile::AliasIdxRaw, MemNode::unordered);
// Final sync IdealKit and GraphKit.
kit->final_sync(ideal);
return;
}

// If we know the value being stored does it cross regions?

if (val != nullptr) {
Expand Down Expand Up @@ -715,6 +722,10 @@ void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) c
macro->replace_node(node, macro->zerocon(node->as_Load()->bottom_type()->basic_type()));
} else {
assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
if (G1BarrierSimple) {
CardTableBarrierSetC2::eliminate_gc_barrier(macro, node);
return;
}
assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
// It could be only one user, URShift node, in Object.clone() intrinsic
// but the new allocation is passed to arraycopy stub and it could not
Expand Down
19 changes: 19 additions & 0 deletions src/hotspot/share/gc/g1/g1Arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,25 @@ void G1Arguments::initialize() {
FLAG_SET_ERGO(ParallelGCThreads, 1);
}

if (G1BarrierSimple) {
#if !defined(_LP64) || !(defined(X86) || defined(AARCH64))
warning("G1BarrierSimple is not supported with current platform"
"; ignoring G1BarrierSimple flag.");
FLAG_SET_DEFAULT(G1BarrierSimple, false);
#else
#if INCLUDE_JVMCI
if (EnableJVMCI) {
warning("G1BarrierSimple is incompatible with JVMCI"
"; ignoring G1BarrierSimple flag.");
FLAG_SET_DEFAULT(G1BarrierSimple, false);
} else
#endif
{
FLAG_SET_DEFAULT(G1UseConcRefinement, false);
}
#endif
}

if (!G1UseConcRefinement) {
if (!FLAG_IS_DEFAULT(G1ConcRefinementThreads)) {
log_warning(gc, ergo)("Ignoring -XX:G1ConcRefinementThreads "
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/g1/g1BarrierSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ void G1BarrierSet::invalidate(JavaThread* thread, MemRegion mr) {
}
volatile CardValue* byte = _card_table->byte_for(mr.start());
CardValue* last_byte = _card_table->byte_for(mr.last());
if (G1BarrierSimple) {
memset((void*)byte, G1CardTable::dirty_card_val(), last_byte - byte + 1);
return;
}

// skip young gen cards
if (*byte == G1CardTable::g1_young_card_val()) {
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ inline void G1BarrierSet::write_ref_array_work(MemRegion mr) {
template <DecoratorSet decorators, typename T>
inline void G1BarrierSet::write_ref_field_post(T* field) {
volatile CardValue* byte = _card_table->byte_for(field);
if (G1BarrierSimple) {
*byte = G1CardTable::dirty_card_val();
return;
}
if (*byte != G1CardTable::g1_young_card_val()) {
// Take a slow path for cards in old
write_ref_field_post_slow(byte);
Expand Down
10 changes: 9 additions & 1 deletion src/hotspot/share/gc/g1/g1CardTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
#include "logging/log.hpp"

void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
if (G1BarrierSimple) {
return;
}
CardValue *const first = byte_for(mr.start());
CardValue *const last = byte_after(mr.last());

Expand All @@ -37,7 +40,9 @@ void G1CardTable::g1_mark_as_young(const MemRegion& mr) {

#ifndef PRODUCT
void G1CardTable::verify_g1_young_region(MemRegion mr) {
verify_region(mr, g1_young_gen, true);
if (!G1BarrierSimple) {
verify_region(mr, g1_young_gen, true);
}
}
#endif

Expand Down Expand Up @@ -69,6 +74,9 @@ void G1CardTable::initialize(G1RegionToSpaceMapper* mapper) {
}

bool G1CardTable::is_in_young(const void* p) const {
if (G1BarrierSimple) {
return G1CollectedHeap::heap()->heap_region_containing(p)->is_young();
}
volatile CardValue* card = byte_for(p);
return *card == G1CardTable::g1_young_card_val();
}
11 changes: 11 additions & 0 deletions src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,17 @@ BufferNodeList G1RedirtyCardsQueueSet::take_all_completed_buffers() {
return result;
}

void G1RedirtyCardsQueueSet::abandon_completed_buffers() {
BufferNodeList list = take_all_completed_buffers();
BufferNode* buffers_to_delete = list._head;
while (buffers_to_delete != nullptr) {
BufferNode* bn = buffers_to_delete;
buffers_to_delete = bn->next();
bn->set_next(nullptr);
deallocate_buffer(bn);
}
}

void G1RedirtyCardsQueueSet::update_tail(BufferNode* node) {
// Node is the tail of a (possibly single element) list just prepended to
// _list. If, after that prepend, node's follower is null, then node is
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class G1RedirtyCardsQueueSet : public PtrQueueSet {
// precondition: Must not be concurrent with buffer collection.
BufferNode* all_completed_buffers() const;
BufferNodeList take_all_completed_buffers();

void abandon_completed_buffers();
};

#endif // SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
39 changes: 34 additions & 5 deletions src/hotspot/share/gc/g1/g1RemSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,27 @@ class G1MergeHeapRootsTask : public WorkerTask {
size_t cards_skipped() const { return _cards_skipped; }
};

class G1MergeSimpleDirtyCardsRegionClosure : public HeapRegionClosure {
private:
G1RemSetScanState* _scan_state;
G1CardTable* _ct;

public:
G1MergeSimpleDirtyCardsRegionClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state) :
_scan_state(scan_state),
_ct(g1h->card_table())
{}

bool do_heap_region(HeapRegion* r) {
if (r->is_old_or_humongous() && !r->in_collection_set()) {
uint region_index = r->hrm_index();
_scan_state->add_dirty_region(region_index);
_scan_state->set_chunk_range_dirty(region_index << HeapRegion::LogCardsPerRegion, HeapRegion::CardsPerRegion);
}
return false;
}
};

HeapRegionClaimer _hr_claimer;
G1RemSetScanState* _scan_state;
BufferNode::Stack _dirty_card_buffers;
Expand All @@ -1278,7 +1299,7 @@ class G1MergeHeapRootsTask : public WorkerTask {
_initial_evacuation(initial_evacuation),
_fast_reclaim_handled(false)
{
if (initial_evacuation) {
if (initial_evacuation && !G1BarrierSimple) {
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
BufferNodeList buffers = dcqs.take_all_completed_buffers();
if (buffers._entry_count != 0) {
Expand Down Expand Up @@ -1338,11 +1359,19 @@ class G1MergeHeapRootsTask : public WorkerTask {
assert(merge_remset_phase == G1GCPhaseTimes::MergeRS, "Wrong merge phase");
G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeLB, worker_id);

G1MergeLogBufferCardsClosure cl(g1h, _scan_state);
apply_closure_to_dirty_card_buffers(&cl, worker_id);
if (G1BarrierSimple) {
G1MergeSimpleDirtyCardsRegionClosure cl(g1h, _scan_state);
G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&cl, &_hr_claimer, worker_id);
p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, 0, G1GCPhaseTimes::MergeLBDirtyCards);
p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, 0, G1GCPhaseTimes::MergeLBSkippedCards);
} else {
G1MergeLogBufferCardsClosure cl(g1h, _scan_state);
apply_closure_to_dirty_card_buffers(&cl, worker_id);

p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards);
p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards);
}

p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards);
p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards);
}
}
};
Expand Down
6 changes: 5 additions & 1 deletion src/hotspot/share/gc/g1/g1YoungGCPostEvacuateTasks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,11 @@ class G1PostEvacuateCollectionSetCleanupTask2::RedirtyLoggedCardsTask : public G

virtual ~RedirtyLoggedCardsTask() {
G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
dcq.merge_bufferlists(_rdcqs);
if (G1BarrierSimple) {
_rdcqs->abandon_completed_buffers();
} else {
dcq.merge_bufferlists(_rdcqs);
}
_rdcqs->verify_empty();
}

Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/share/gc/g1/g1_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@
"related prediction sample. That sample must involve the same or "\
"more than that number of cards to be used.") \
\
product(bool, G1BarrierSimple, false, \
"Use simple G1 post barrier") \
\
GC_G1_EVACUATION_FAILURE_FLAGS(develop, \
develop_pd, \
product, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* @test TestG1ConcRefinementThreads
* @bug 8047976
* @requires vm.gc.G1 & vm.opt.G1ConcRefinementThreads == null
* @requires vm.opt.G1BarrierSimple == null
* @summary Tests argument processing for G1ConcRefinementThreads
* @library /test/lib
* @library /
Expand Down

0 comments on commit 693ec2e

Please sign in to comment.