Commit 890b23de authored by Ben Gamari's avatar Ben Gamari 🐢

Merge branches 'wip/gc/optimize' and 'wip/gc/test' into wip/gc/everything

......@@ -950,19 +950,23 @@
return (dst);
//
// Nonmoving write barrier helpers
//
// See Note [Update remembered set] in NonMovingMark.c.
#if defined(THREADED_RTS)
#define IF_WRITE_BARRIER_ENABLED \
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
#else
// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
#define IF_WRITE_BARRIER_ENABLED \
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (0)
#define nonmoving_write_barrier_enabled 0
#endif
// A useful helper for pushing a pointer to the update remembered set.
// See Note [Update remembered set] in NonMovingMark.c.
#define updateRemembSetPushPtr(p) \
IF_WRITE_BARRIER_ENABLED { \
IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
}
......@@ -74,6 +74,10 @@ extern "C" {
#define RTS_UNREACHABLE abort()
#endif
/* Prefetch primitives */
#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0)
#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1)
/* Fix for mingw stat problem (done here so it's early enough) */
#if defined(mingw32_HOST_OS)
#define __MSVCRT__ 1
......
......@@ -21,4 +21,7 @@ void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p);
// Note that RTS code should not condition on this directly by rather
// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that
// the barrier is eliminated in the non-threaded RTS.
extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
......@@ -74,7 +74,7 @@ test('length001',
# excessive amounts of stack space. So we specifically set a low
# stack limit and mark it as failing under a few conditions.
[extra_run_opts('+RTS -K8m -RTS'),
expect_fail_for(['normal', 'threaded1', 'llvm'])],
expect_fail_for(['normal', 'threaded1', 'llvm', 'nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc'])],
compile_and_run, [''])
test('ratio001', normal, compile_and_run, [''])
......
......@@ -2,7 +2,11 @@ test('heap_all',
[when(have_profiling(), extra_ways(['prof'])),
# These ways produce slightly different heap representations.
# Currently we don't test them.
omit_ways(['ghci', 'hpc'])
omit_ways(['ghci', 'hpc',
'nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']),
# The debug RTS initializes some fields with 0xaa and so
# this test spuriously fails.
when(compiler_debugged(), skip)
],
compile_and_run, [''])
......
......@@ -262,7 +262,7 @@ loop:
// point to the BLOCKING_QUEUE from the BLACKHOLE
write_barrier(); // make the BQ visible, see Note [Heap memory barriers].
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushClosure(cap, (StgClosure*)p);
}
((StgInd*)bh)->indirectee = (StgClosure *)bq;
......@@ -293,7 +293,7 @@ loop:
}
#endif
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
// We are about to overwrite bq->queue; make sure its current value
// makes it into the update remembered set
updateRemembSetPushClosure(cap, (StgClosure*)bq->queue);
......
......@@ -478,7 +478,7 @@ stg_copyArray_barrier ( W_ hdr_size, gcptr dst, W_ dst_off, W_ n)
end = p + WDS(n);
again:
IF_WRITE_BARRIER_ENABLED {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[p] "ptr");
}
p = p + WDS(1);
......@@ -494,7 +494,7 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
W_ dst_p, src_p, bytes;
if (n > 0) {
IF_WRITE_BARRIER_ENABLED {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
dst, dst_off, n);
}
......@@ -515,7 +515,7 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
W_ dst_p, src_p, bytes;
if (n > 0) {
IF_WRITE_BARRIER_ENABLED {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
dst, dst_off, n);
}
......
......@@ -297,8 +297,10 @@ static StgClosure *lock_tvar(Capability *cap,
} while (cas((void *)&(s -> current_value),
(StgWord)result, (StgWord)trec) != (StgWord)result);
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) {
updateRemembSetPushClosure(cap, result);
IF_NONMOVING_WRITE_BARRIER_ENABLED {
if (result)
updateRemembSetPushClosure(cap, result);
}
return result;
}
......@@ -323,8 +325,9 @@ static StgBool cond_lock_tvar(Capability *cap,
TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec);
result = (StgClosure *)w;
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) {
updateRemembSetPushClosure(cap, expected);
IF_NONMOVING_WRITE_BARRIER_ENABLED {
if (result)
updateRemembSetPushClosure(cap, expected);
}
TRACE("%p : %s", trec, result ? "success" : "failure");
return (result == expected);
......
......@@ -164,7 +164,8 @@ static void scheduleHandleThreadBlocked( StgTSO *t );
static bool scheduleHandleThreadFinished( Capability *cap, Task *task,
StgTSO *t );
static bool scheduleNeedHeapProfile(bool ready_to_gc);
static void scheduleDoGC(Capability **pcap, Task *task, bool force_major);
static void scheduleDoGC( Capability **pcap, Task *task,
bool force_major, bool deadlock_detect );
static void deleteThread (StgTSO *tso);
static void deleteAllThreads (void);
......@@ -264,7 +265,7 @@ schedule (Capability *initialCapability, Task *task)
case SCHED_INTERRUPTING:
debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
/* scheduleDoGC() deletes all the threads */
scheduleDoGC(&cap,task,true);
scheduleDoGC(&cap,task,true,false);
// after scheduleDoGC(), we must be shutting down. Either some
// other Capability did the final GC, or we did it above,
......@@ -561,7 +562,7 @@ run_thread:
}
if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
scheduleDoGC(&cap,task,false);
scheduleDoGC(&cap,task,false,false);
}
} /* end of while() */
}
......@@ -935,7 +936,7 @@ scheduleDetectDeadlock (Capability **pcap, Task *task)
// they are unreachable and will therefore be sent an
// exception. Any threads thus released will be immediately
// runnable.
scheduleDoGC (pcap, task, true/*force major GC*/);
scheduleDoGC (pcap, task, true/*force major GC*/, true/*deadlock detection*/);
cap = *pcap;
// when force_major == true. scheduleDoGC sets
// recent_activity to ACTIVITY_DONE_GC and turns off the timer
......@@ -1005,7 +1006,7 @@ scheduleProcessInbox (Capability **pcap USED_IF_THREADS)
while (!emptyInbox(cap)) {
// Executing messages might use heap, so we should check for GC.
if (doYouWantToGC(cap)) {
scheduleDoGC(pcap, cap->running_task, false);
scheduleDoGC(pcap, cap->running_task, false, false);
cap = *pcap;
}
......@@ -1552,9 +1553,11 @@ void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task)
* Perform a garbage collection if necessary
* -------------------------------------------------------------------------- */
// N.B. See Note [Deadlock detection under nonmoving collector] for rationale
// behind deadlock_detect argument.
static void
scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
bool force_major)
bool force_major, bool deadlock_detect)
{
Capability *cap = *pcap;
bool heap_census;
......@@ -1847,9 +1850,9 @@ delete_threads_and_gc:
// emerge they don't immediately re-enter the GC.
pending_sync = 0;
signalCondition(&sync_finished_cond);
GarbageCollect(collect_gen, heap_census, gc_type, cap, idle_cap);
GarbageCollect(collect_gen, heap_census, deadlock_detect, gc_type, cap, idle_cap);
#else
GarbageCollect(collect_gen, heap_census, 0, cap, NULL);
GarbageCollect(collect_gen, heap_census, deadlock_detect, 0, cap, NULL);
#endif
// If we're shutting down, don't leave any idle GC work to do.
......@@ -2506,7 +2509,7 @@ resumeThread (void *task_)
incall->suspended_tso = NULL;
incall->suspended_cap = NULL;
// we will modify tso->_link
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushClosure(cap, (StgClosure *)tso->_link);
}
tso->_link = END_TSO_QUEUE;
......@@ -2723,7 +2726,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS)
nonmovingStop();
Capability *cap = task->cap;
waitForCapability(&cap,task);
scheduleDoGC(&cap,task,true);
scheduleDoGC(&cap,task,true,false);
ASSERT(task->incall->tso == NULL);
releaseCapability(cap);
}
......@@ -2791,7 +2794,7 @@ performGC_(bool force_major)
// TODO: do we need to traceTask*() here?
waitForCapability(&cap,task);
scheduleDoGC(&cap,task,force_major);
scheduleDoGC(&cap,task,force_major,false);
releaseCapability(cap);
boundTaskExiting(task);
}
......
......@@ -334,15 +334,16 @@ threadPaused(Capability *cap, StgTSO *tso)
}
#endif
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled
&& ip_THUNK(INFO_PTR_TO_STRUCT(bh_info)))) {
// We are about to replace a thunk with a blackhole.
// Add the free variables of the closure we are about to
// overwrite to the update remembered set.
// N.B. We caught the WHITEHOLE case above.
updateRemembSetPushThunkEager(cap,
THUNK_INFO_PTR_TO_STRUCT(bh_info),
(StgThunk *) bh);
IF_NONMOVING_WRITE_BARRIER_ENABLED {
if (ip_THUNK(INFO_PTR_TO_STRUCT(bh_info))) {
// We are about to replace a thunk with a blackhole.
// Add the free variables of the closure we are about to
// overwrite to the update remembered set.
// N.B. We caught the WHITEHOLE case above.
updateRemembSetPushThunkEager(cap,
THUNK_INFO_PTR_TO_STRUCT(bh_info),
(StgThunk *) bh);
}
}
// The payload of the BLACKHOLE points to the TSO
......
......@@ -723,7 +723,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso)
barf("threadStackUnderflow: not enough space for return values");
}
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
// ensure that values that we copy into the new stack are marked
// for the nonmoving collector. Note that these values won't
// necessarily form a full closure so we need to handle them
......
......@@ -50,7 +50,7 @@
\
prim_write_barrier; \
OVERWRITING_CLOSURE(p1); \
IF_WRITE_BARRIER_ENABLED { \
IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushThunk_(BaseReg, p1 "ptr"); \
} \
StgInd_indirectee(p1) = p2; \
......@@ -81,7 +81,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
/* See Note [Heap memory barriers] in SMP.h */
write_barrier();
OVERWRITING_CLOSURE(p1);
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) {
IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushThunk(cap, (StgThunk*)p1);
}
((StgInd *)p1)->indirectee = p2;
......
......@@ -69,12 +69,6 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
{
ASSERT(gen_no < RtsFlags.GcFlags.generations);
if (RtsFlags.GcFlags.useNonmoving && major_gc) {
// unconditionally promote to non-moving heap in major gc
gct->copied += size;
return nonmovingAllocate(gct->cap, size);
}
StgPtr to;
gen_workspace *ws;
......@@ -91,9 +85,34 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
}
}
if (RtsFlags.GcFlags.useNonmoving && gen_no == oldest_gen->no) {
gct->copied += size;
return nonmovingAllocate(gct->cap, size);
if (RtsFlags.GcFlags.useNonmoving) {
/* See Note [Deadlock detection under nonmoving collector]. */
if (deadlock_detect_gc)
gen_no = oldest_gen->no;
if (gen_no == oldest_gen->no) {
gct->copied += size;
to = nonmovingAllocate(gct->cap, size);
// Add segment to the todo list unless it's already there
// current->todo_link == NULL means not in todo list
struct NonmovingSegment *seg = nonmovingGetSegment(to);
if (!seg->todo_link) {
gen_workspace *ws = &gct->gens[oldest_gen->no];
seg->todo_link = ws->todo_seg;
ws->todo_seg = seg;
}
// The object which refers to this closure may have been aged (i.e.
// retained in a younger generation). Consequently, we must add the
// closure to the mark queue to ensure that it will be marked.
//
// However, if we are in a deadlock detection GC then we disable aging
// so there is no need.
if (major_gc && !deadlock_detect_gc)
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) to);
return to;
}
}
ws = &gct->gens[gen_no]; // zero memory references here
......@@ -312,9 +331,10 @@ evacuate_large(StgPtr p)
*/
new_gen_no = bd->dest_no;
if (RtsFlags.GcFlags.useNonmoving && major_gc) {
if (deadlock_detect_gc) {
/* See Note [Deadlock detection under nonmoving collector]. */
new_gen_no = oldest_gen->no;
} else if (new_gen_no < gct->evac_gen_no) {
} else if (new_gen_no < gct->evac_gen_no) {
if (gct->eager_promotion) {
new_gen_no = gct->evac_gen_no;
} else {
......@@ -363,6 +383,13 @@ evacuate_large(StgPtr p)
STATIC_INLINE void
evacuate_static_object (StgClosure **link_field, StgClosure *q)
{
if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) {
// See Note [Static objects under the nonmoving collector] in Storage.c.
if (major_gc && !deadlock_detect_gc)
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
return;
}
StgWord link = (StgWord)*link_field;
// See Note [STATIC_LINK fields] for how the link field bits work
......@@ -603,6 +630,8 @@ loop:
// NOTE: large objects in nonmoving heap are also marked with
// BF_NONMOVING. Those are moved to scavenged_large_objects list in
// mark phase.
if (major_gc && !deadlock_detect_gc)
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
return;
}
......@@ -629,6 +658,13 @@ loop:
// they are not)
if (bd->flags & BF_COMPACT) {
evacuate_compact((P_)q);
// We may have evacuated the block to the nonmoving generation. If so
// we need to make sure it is added to the mark queue since the only
// reference to it may be from the moving heap.
if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) {
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
}
return;
}
......@@ -636,6 +672,13 @@ loop:
*/
if (bd->flags & BF_LARGE) {
evacuate_large((P_)q);
// We may have evacuated the block to the nonmoving generation. If so
// we need to make sure it is added to the mark queue since the only
// reference to it may be from the moving heap.
if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) {
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
}
return;
}
......@@ -937,6 +980,8 @@ evacuate_BLACKHOLE(StgClosure **p)
ASSERT((bd->flags & BF_COMPACT) == 0);
if (bd->flags & BF_NONMOVING) {
if (major_gc && !deadlock_detect_gc)
markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
return;
}
......
......@@ -104,6 +104,7 @@
*/
uint32_t N;
bool major_gc;
bool deadlock_detect_gc;
/* Data used for allocation area sizing.
*/
......@@ -194,6 +195,7 @@ StgPtr mark_sp; // pointer to the next unallocated mark stack entry
void
GarbageCollect (uint32_t collect_gen,
const bool do_heap_census,
const bool deadlock_detect,
uint32_t gc_type USED_IF_THREADS,
Capability *cap,
bool idle_cap[])
......@@ -271,7 +273,25 @@ GarbageCollect (uint32_t collect_gen,
N = collect_gen;
major_gc = (N == RtsFlags.GcFlags.generations-1);
if (major_gc) {
/* See Note [Deadlock detection under nonmoving collector]. */
deadlock_detect_gc = deadlock_detect;
#if defined(THREADED_RTS)
if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
/* If there is already a concurrent major collection running then
* there is no benefit to starting another.
* TODO: Catch heap-size runaway.
*/
N--;
collect_gen--;
major_gc = false;
}
#endif
/* N.B. The nonmoving collector works a bit differently. See
* Note [Static objects under the nonmoving collector].
*/
if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
prev_static_flag = static_flag;
static_flag =
static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
......@@ -718,6 +738,14 @@ GarbageCollect (uint32_t collect_gen,
}
} // for all generations
// Flush the update remembered set. See Note [Eager update remembered set
// flushing] in NonMovingMark.c
if (RtsFlags.GcFlags.useNonmoving) {
RELEASE_SM_LOCK;
nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
ACQUIRE_SM_LOCK;
}
// Mark and sweep the oldest generation.
// N.B. This can only happen after we've moved
// oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
......@@ -744,6 +772,11 @@ GarbageCollect (uint32_t collect_gen,
// so we need to mark those too.
// Note that in sequential case these lists will be appended with more
// weaks and threads found to be dead in mark.
#if !defined(THREADED_RTS)
// In the non-threaded runtime this is the only time we push to the
// upd_rem_set
nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
#endif
nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
ACQUIRE_SM_LOCK;
}
......
......@@ -17,9 +17,12 @@
#include "HeapAlloc.h"
void GarbageCollect (uint32_t force_major_gc,
void GarbageCollect (uint32_t collect_gen,
bool do_heap_census,
uint32_t gc_type, Capability *cap, bool idle_cap[]);
bool deadlock_detect,
uint32_t gc_type,
Capability *cap,
bool idle_cap[]);
typedef void (*evac_fn)(void *user, StgClosure **root);
......@@ -30,6 +33,8 @@ bool doIdleGCWork(Capability *cap, bool all);
extern uint32_t N;
extern bool major_gc;
/* See Note [Deadlock detection under nonmoving collector]. */
extern bool deadlock_detect_gc;
extern bdescr *mark_stack_bd;
extern bdescr *mark_stack_top_bd;
......
......@@ -148,14 +148,14 @@ markCAFs (evac_fn evac, void *user)
StgIndStatic *c;
for (c = dyn_caf_list;
c != (StgIndStatic*)END_OF_CAF_LIST;
((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
evac(user, &c->indirectee);
}
for (c = revertible_caf_list;
c != (StgIndStatic*)END_OF_CAF_LIST;
((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
......
This diff is collapsed.
......@@ -93,6 +93,10 @@ extern struct NonmovingHeap nonmovingHeap;
extern memcount nonmoving_live_words;
#if defined(THREADED_RTS)
extern bool concurrent_coll_running;
#endif
void nonmovingInit(void);
void nonmovingStop(void);
void nonmovingExit(void);
......@@ -161,22 +165,34 @@ INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *se
return 1 << seg->block_size;
}
// How many blocks does the given segment contain? Also the size of the bitmap.
INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
// How many blocks does a segment with the given block size have?
INLINE_HEADER unsigned int nonmovingBlockCount(uint8_t log_block_size)
{
unsigned int sz = nonmovingSegmentBlockSize(seg);
unsigned int segment_data_size = NONMOVING_SEGMENT_SIZE - sizeof(struct NonmovingSegment);
segment_data_size -= segment_data_size % SIZEOF_VOID_P;
return segment_data_size / (sz + 1);
unsigned int blk_size = 1 << log_block_size;
// N.B. +1 accounts for the byte in the mark bitmap.
return segment_data_size / (blk_size + 1);
}
// Get a pointer to the given block index
INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size);
// How many blocks does the given segment contain? Also the size of the bitmap.
INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
{
return nonmovingBlockCountFromSize(seg->block_size);
}
// Get a pointer to the given block index assuming that the block size is as
// given (avoiding a potential cache miss when this information is already
// available). The log_block_size argument must be equal to seg->block_size.
INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i)
{
ASSERT(log_block_size == seg->block_size);
// Block size in bytes
unsigned int blk_size = nonmovingSegmentBlockSize(seg);
unsigned int blk_size = 1 << log_block_size;
// Bitmap size in bytes
W_ bitmap_size = nonmovingSegmentBlockCount(seg) * sizeof(uint8_t);
W_ bitmap_size = nonmovingBlockCountFromSize(log_block_size) * sizeof(uint8_t);
// Where the actual data starts (address of the first block).
// Use ROUNDUP_BYTES_TO_WDS to align to word size. Note that
// ROUNDUP_BYTES_TO_WDS returns in _words_, not in _bytes_, so convert it back
......@@ -185,15 +201,26 @@ INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmo
return (void*)(data + i*blk_size);
}
// Get a pointer to the given block index.
INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
{
return nonmovingSegmentGetBlock_(seg, seg->block_size, i);
}
// Get the segment which a closure resides in. Assumes that pointer points into
// non-moving heap.
INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p)
INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment_unchecked(StgPtr p)
{
ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
const uintptr_t mask = ~NONMOVING_SEGMENT_MASK;
return (struct NonmovingSegment *) (((uintptr_t) p) & mask);
}
INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p)
{
ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
return nonmovingGetSegment_unchecked(p);
}
INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p)
{
ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
......
This diff is collapsed.
......@@ -43,9 +43,16 @@ enum EntryType {
*/
typedef struct {
enum EntryType type;
// All pointers should be untagged
// Which kind of mark queue entry we have is determined by the low bits of
// the second word: they must be zero in the case of a mark_closure entry
// (since the second word of a mark_closure entry points to a pointer and
// pointers must be word-aligned). In the case of a mark_array we set them
// to 0x3 (the value of start_index is shifted to the left to accomodate
// this). null_entry where p==NULL is used to indicate the end of the queue.
union {
struct {
void *p; // must be NULL
} null_entry;
struct {
StgClosure *p; // the object to be marked
StgClosure **origin; // field where this reference was found.
......@@ -53,11 +60,23 @@ typedef struct {
} mark_closure;
struct {
const StgMutArrPtrs *array;
StgWord start_index;
StgWord start_index; // start index is shifted to the left by 16 bits
} mark_array;
};
} MarkQueueEnt;
INLINE_HEADER enum EntryType nonmovingMarkQueueEntryType(MarkQueueEnt *ent)
{
if (ent->null_entry.p == NULL) {
return NULL_ENTRY;
} else if (((uintptr_t) ent->mark_closure.origin & TAG_BITS) == 0) {
return MARK_CLOSURE;
} else {
ASSERT((ent->mark_array.start_index & TAG_BITS) == 0x3);
return MARK_ARRAY;
}
}
typedef struct {
// index of first *unused* queue entry
uint32_t head;
......@@ -65,6 +84,9 @@ typedef struct {
MarkQueueEnt entries[];
} MarkQueueBlock;
// How far ahead in mark queue to prefetch?
#define MARK_PREFETCH_QUEUE_DEPTH 5
/* The mark queue is not capable of concurrent read or write.
*
* invariants:
......@@ -83,9 +105,12 @@ typedef struct MarkQueue_ {
// Is this a mark queue or a capability-local update remembered set?
bool is_upd_rem_set;
// Marked objects outside of nonmoving heap, namely large and static
// objects.
HashTable *marked_objects;
#if MARK_PREFETCH_QUEUE_DEPTH > 0
// A ring-buffer of entries which we will mark next
MarkQueueEnt prefetch_queue[MARK_PREFETCH_QUEUE_DEPTH];
// The first free slot in prefetch_queue.
uint8_t prefetch_head;
#endif
} MarkQueue;
/* While it shares its representation with MarkQueue, UpdRemSet differs in
......@@ -97,8 +122,11 @@ typedef struct {
MarkQueue queue;
} UpdRemSet;
// Number of blocks to allocate for a mark queue
#define MARK_QUEUE_BLOCKS 16
// The length of MarkQueueBlock.entries
#define MARK_QUEUE_BLOCK_ENTRIES ((BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt))
#define MARK_QUEUE_BLOCK_ENTRIES ((MARK_QUEUE_BLOCKS * BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt))
extern bdescr *nonmoving_large_objects, *nonmoving_marked_large_objects;
extern memcount n_nonmoving_large_blocks, n_nonmoving_marked_large_blocks;
......@@ -115,6 +143,15 @@ extern StgIndStatic *debug_caf_list_snapshot;
extern MarkQueue *current_mark_queue;
extern bdescr *upd_rem_set_block_list;
// A similar macro is defined in includes/Cmm.h for C-- code.
#if defined(THREADED_RTS)
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
#else
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (0)
#endif
void nonmovingMarkInitUpdRemSet(void);
void init_upd_rem_set(UpdRemSet *rset);
......@@ -143,8 +180,10 @@ void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_th
bool nonmovingIsAlive(StgClosure *p);
void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w);
void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w);
void nonmovingAddUpdRemSetBlocks(struct MarkQueue_ *rset);
void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent);
void markQueuePushClosureGC(MarkQueue *q, StgClosure *p);
void markQueuePushClosure(MarkQueue *q,
StgClosure *p,
StgClosure **origin);
......
......@@ -16,6 +16,7 @@ nonmovingScavengeOne (StgClosure *q)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
StgPtr p = (StgPtr)q;
const StgInfoTable *info = get_itbl(q);
const bool saved_eager_promotion = gct->eager_promotion;
switch (info->type) {
......@@ -23,9 +24,11 @@ nonmovingScavengeOne (StgClosure *q)
case MVAR_DIRTY:
{
StgMVar *mvar = ((StgMVar *)p);
gct->eager_promotion = false;
evacuate((StgClosure **)&mvar->head);
evacuate((StgClosure **)&mvar->tail);
evacuate((StgClosure **)&mvar->value);
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
mvar->header.info = &stg_MVAR_DIRTY_info;
} else {
......@@ -37,8 +40,10 @@ nonmovingScavengeOne (StgClosure *q)
case TVAR:
{
StgTVar *tvar = ((StgTVar *)p);
gct->eager_promotion = false;
evacuate((StgClosure **)&tvar->current_value);
evacuate((StgClosure **)&tvar->first_watch_queue_entry);
gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {