Commit 18896fa2 authored by Simon Marlow's avatar Simon Marlow

GC refactoring and cleanup

Now we keep any partially-full blocks in the gc_thread[] structs after
each GC, rather than moving them to the generation.  This should give
us slightly better locality (though I wasn't able to measure any
difference).

Also in this patch: better sanity checking with THREADED.
parent 8d809e0a
......@@ -87,7 +87,7 @@ typedef struct generation_ {
#if defined(THREADED_RTS)
char pad[128]; // make sure the following is
// on a separate cache line.
SpinLock sync_large_objects; // lock for large_objects
SpinLock sync; // lock for large_objects
// and scavenged_large_objects
#endif
......@@ -101,9 +101,6 @@ typedef struct generation_ {
unsigned int n_old_blocks; // number of blocks in from-space
unsigned int live_estimate; // for sweeping: estimate of live data
bdescr * part_blocks; // partially-full scanned blocks
unsigned int n_part_blocks; // count of above
bdescr * scavenged_large_objects; // live large objs after GC (d-link)
unsigned int n_scavenged_large_blocks; // size (not count) of above
......
......@@ -592,7 +592,7 @@ removeFromMVarBlockedQueue (StgTSO *tso)
if (mvar->head == q) {
mvar->head = q->link;
q->header.info = &stg_IND_info;
OVERWRITE_INFO(q, &stg_IND_info);
if (mvar->tail == q) {
mvar->tail = (StgMVarTSOQueue*)END_TSO_QUEUE;
}
......@@ -602,10 +602,10 @@ removeFromMVarBlockedQueue (StgTSO *tso)
// we lose the tail pointer when the GC shorts out the IND.
// So we use MSG_NULL as a kind of non-dupable indirection;
// these are ignored by takeMVar/putMVar.
q->header.info = &stg_MSG_NULL_info;
OVERWRITE_INFO(q, &stg_MSG_NULL_info);
}
else {
q->header.info = &stg_IND_info;
OVERWRITE_INFO(q, &stg_IND_info);
}
// revoke the MVar operation
......
......@@ -710,7 +710,7 @@ stat_exit(int alloc)
statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
}
}
#endif
......@@ -772,8 +772,9 @@ void
statDescribeGens(void)
{
nat g, mut, lge, i;
lnat live, slop;
lnat gen_slop;
lnat tot_live, tot_slop;
lnat gen_live, gen_blocks;
bdescr *bd;
generation *gen;
......@@ -785,25 +786,32 @@ statDescribeGens(void)
tot_live = 0;
tot_slop = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
gen = &generations[g];
for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
lge++;
}
gen_live = genLiveWords(gen);
gen_blocks = genLiveBlocks(gen);
mut = 0;
for (i = 0; i < n_capabilities; i++) {
mut += countOccupied(capabilities[i].mut_lists[g]);
gen_live += gcThreadLiveWords(i,g);
gen_blocks += gcThreadLiveBlocks(i,g);
}
gen = &generations[g];
debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
lge++;
}
live = gen->n_words + countOccupied(gen->large_objects);
slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
live*sizeof(W_), slop*sizeof(W_));
tot_live += live;
tot_slop += slop;
gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
gen_live*sizeof(W_), gen_slop*sizeof(W_));
tot_live += gen_live;
tot_slop += gen_slop;
}
debugBelch("----------------------------------------------------------\n");
debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
......
......@@ -628,7 +628,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
// will be discarded after the first overflow, being replaced by a
// non-moving 32k chunk.
if (old_stack->sp == old_stack->stack + old_stack->stack_size) {
frame->next_chunk = new_stack;
frame->next_chunk = (StgStack*)END_TSO_QUEUE; // dummy
}
tso->stackobj = new_stack;
......
......@@ -248,7 +248,7 @@ evacuate_large(StgPtr p)
bd = Bdescr(p);
gen = bd->gen;
gen_no = bd->gen_no;
ACQUIRE_SPIN_LOCK(&gen->sync_large_objects);
ACQUIRE_SPIN_LOCK(&gen->sync);
// already evacuated?
if (bd->flags & BF_EVACUATED) {
......@@ -259,7 +259,7 @@ evacuate_large(StgPtr p)
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
RELEASE_SPIN_LOCK(&gen->sync_large_objects);
RELEASE_SPIN_LOCK(&gen->sync);
return;
}
......@@ -297,16 +297,16 @@ evacuate_large(StgPtr p)
// them straight on the scavenged_large_objects list.
if (bd->flags & BF_PINNED) {
ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS);
if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); }
if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); }
dbl_link_onto(bd, &new_gen->scavenged_large_objects);
new_gen->n_scavenged_large_blocks += bd->blocks;
if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); }
if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); }
} else {
bd->link = ws->todo_large_objects;
ws->todo_large_objects = bd;
}
RELEASE_SPIN_LOCK(&gen->sync_large_objects);
RELEASE_SPIN_LOCK(&gen->sync);
}
/* ----------------------------------------------------------------------------
......
......@@ -137,8 +137,8 @@ DECLARE_GCT
static void mark_root (void *user, StgClosure **root);
static void zero_static_object_list (StgClosure* first_static);
static nat initialise_N (rtsBool force_major_gc);
static void init_collected_gen (nat g, nat threads);
static void init_uncollected_gen (nat g, nat threads);
static void prepare_collected_gen (generation *gen);
static void prepare_uncollected_gen (generation *gen);
static void init_gc_thread (gc_thread *t);
static void resize_generations (void);
static void resize_nursery (void);
......@@ -148,6 +148,7 @@ static StgWord inc_running (void);
static StgWord dec_running (void);
static void wakeup_gc_threads (nat n_threads, nat me);
static void shutdown_gc_threads (nat n_threads, nat me);
static void collect_gct_blocks (void);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
......@@ -174,7 +175,7 @@ GarbageCollect (rtsBool force_major_gc,
{
bdescr *bd;
generation *gen;
lnat live, allocated, max_copied, avg_copied, slop;
lnat live_blocks, live_words, allocated, max_copied, avg_copied;
gc_thread *saved_gct;
nat g, t, n;
......@@ -274,7 +275,7 @@ GarbageCollect (rtsBool force_major_gc,
#endif
// check sanity *before* GC
IF_DEBUG(sanity, checkSanity(rtsTrue));
IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
// Initialise all our gc_thread structures
for (t = 0; t < n_gc_threads; t++) {
......@@ -283,12 +284,11 @@ GarbageCollect (rtsBool force_major_gc,
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
init_collected_gen(g,n_gc_threads);
prepare_collected_gen(&generations[g]);
}
// Initialise all the generations/steps that we're *not* collecting.
for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
init_uncollected_gen(g,n_gc_threads);
prepare_uncollected_gen(&generations[g]);
}
/* Allocate a mark stack if we're doing a major collection.
......@@ -420,76 +420,6 @@ SET_GCT(gc_threads[0]);
}
}
// For each workspace, in each thread, move the copied blocks to the step
{
gc_thread *thr;
gen_workspace *ws;
bdescr *prev, *next;
for (t = 0; t < n_gc_threads; t++) {
thr = gc_threads[t];
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
ws = &thr->gens[g];
// Push the final block
if (ws->todo_bd) {
push_scanned_block(ws->todo_bd, ws);
}
ASSERT(gct->scan_bd == NULL);
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
prev = NULL;
for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
ws->gen->n_words += bd->free - bd->start;
prev = bd;
}
if (prev != NULL) {
prev->link = ws->gen->blocks;
ws->gen->blocks = ws->scavd_list;
}
ws->gen->n_blocks += ws->n_scavd_blocks;
}
}
// Add all the partial blocks *after* we've added all the full
// blocks. This is so that we can grab the partial blocks back
// again and try to fill them up in the next GC.
for (t = 0; t < n_gc_threads; t++) {
thr = gc_threads[t];
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
ws = &thr->gens[g];
prev = NULL;
for (bd = ws->part_list; bd != NULL; bd = next) {
next = bd->link;
if (bd->free == bd->start) {
if (prev == NULL) {
ws->part_list = next;
} else {
prev->link = next;
}
freeGroup(bd);
ws->n_part_blocks--;
} else {
ws->gen->n_words += bd->free - bd->start;
prev = bd;
}
}
if (prev != NULL) {
prev->link = ws->gen->blocks;
ws->gen->blocks = ws->part_list;
}
ws->gen->n_blocks += ws->n_part_blocks;
ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
}
}
}
// Finally: compact or sweep the oldest generation.
if (major_gc && oldest_gen->mark) {
if (oldest_gen->compact)
......@@ -498,8 +428,6 @@ SET_GCT(gc_threads[0]);
sweep(oldest_gen);
}
/* run through all the generations/steps and tidy up
*/
copied = 0;
max_copied = 0;
avg_copied = 0;
......@@ -525,6 +453,16 @@ SET_GCT(gc_threads[0]);
}
}
// Run through all the generations/steps and tidy up.
// We're going to:
// - count the amount of "live" data (live_words, live_blocks)
// - count the amount of "copied" data in this GC (copied)
// - free from-space
// - make to-space the new from-space (set BF_EVACUATED on all blocks)
//
live_words = 0;
live_blocks = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
if (g == N) {
......@@ -622,7 +560,6 @@ SET_GCT(gc_threads[0]);
gen->large_objects = gen->scavenged_large_objects;
gen->n_large_blocks = gen->n_scavenged_large_blocks;
gen->n_new_large_words = 0;
ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
}
else // for generations > N
{
......@@ -637,16 +574,31 @@ SET_GCT(gc_threads[0]);
// add the new blocks we promoted during this GC
gen->n_large_blocks += gen->n_scavenged_large_blocks;
ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
}
ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
gen->scavenged_large_objects = NULL;
gen->n_scavenged_large_blocks = 0;
// Count "live" data
live_words += genLiveWords(gen);
live_blocks += genLiveBlocks(gen);
// add in the partial blocks in the gen_workspaces, but ignore gen 0
// if this is a local GC (we can't count another capability's part_list)
{
nat i;
for (i = 0; i < n_capabilities; i++) {
live_words += gcThreadLiveWords(i, gen->no);
live_blocks += gcThreadLiveBlocks(i, gen->no);
}
}
} // for all generations
// update the max size of older generations after a major GC
resize_generations();
// Calculate the amount of live data for stats.
live = calcLiveWords();
// Start a new pinned_object_block
for (n = 0; n < n_capabilities; n++) {
capabilities[n].pinned_object_block = NULL;
......@@ -698,11 +650,6 @@ SET_GCT(gc_threads[0]);
}
}
// send exceptions to any threads which were about to die
RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
ACQUIRE_SM_LOCK;
// Update the stable pointer hash table.
updateStablePtrTable(major_gc);
......@@ -717,6 +664,17 @@ SET_GCT(gc_threads[0]);
scheduleFinalizers(cap, old_weak_ptr_list);
ACQUIRE_SM_LOCK;
// check sanity after GC
// before resurrectThreads(), because that might overwrite some
// closures, which will cause problems with THREADED where we don't
// fill slop.
IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
// send exceptions to any threads which were about to die
RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
ACQUIRE_SM_LOCK;
if (major_gc) {
nat need, got;
need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
......@@ -730,10 +688,7 @@ SET_GCT(gc_threads[0]);
}
}
// check sanity after GC
IF_DEBUG(sanity, checkSanity(rtsTrue));
// extra GC trace info
// extra GC trace info
IF_DEBUG(gc, statDescribeGens());
#ifdef DEBUG
......@@ -758,8 +713,8 @@ SET_GCT(gc_threads[0]);
#endif
// ok, GC over: tell the stats department what happened.
slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */);
// Guess which generation we'll collect *next* time
initialise_N(force_major_gc);
......@@ -858,7 +813,21 @@ new_gc_thread (nat n, gc_thread *t)
ASSERT(g == ws->gen->no);
ws->my_gct = t;
ws->todo_bd = NULL;
// We want to call
// alloc_todo_block(ws,0);
// but can't, because it uses gct which isn't set up at this point.
// Hence, allocate a block for todo_bd manually:
{
bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
initBdescr(bd, ws->gen, ws->gen->to);
bd->flags = BF_EVACUATED;
bd->u.scan = bd->free = bd->start;
ws->todo_bd = bd;
ws->todo_free = bd->free;
ws->todo_lim = bd->start + BLOCK_SIZE_W;
}
ws->todo_q = newWSDeque(128);
ws->todo_overflow = NULL;
ws->n_todo_overflow = 0;
......@@ -1012,6 +981,8 @@ loop:
scavenge_loop();
#endif
collect_gct_blocks();
// scavenge_loop() only exits when there's no work to do
r = dec_running();
......@@ -1197,16 +1168,16 @@ releaseGCThreads (Capability *cap USED_IF_THREADS)
------------------------------------------------------------------------- */
static void
init_collected_gen (nat g, nat n_threads)
prepare_collected_gen (generation *gen)
{
nat t, i;
nat i, g, n;
gen_workspace *ws;
generation *gen;
bdescr *bd;
bdescr *bd, *next;
// Throw away the current mutable list. Invariant: the mutable
// list always has at least one block; this means we can avoid a
// check for NULL in recordMutable().
g = gen->no;
if (g != 0) {
for (i = 0; i < n_capabilities; i++) {
freeChain(capabilities[i].mut_lists[g]);
......@@ -1231,9 +1202,35 @@ init_collected_gen (nat g, nat n_threads)
gen->live_estimate = 0;
// initialise the large object queues.
gen->scavenged_large_objects = NULL;
gen->n_scavenged_large_blocks = 0;
ASSERT(gen->scavenged_large_objects == NULL);
ASSERT(gen->n_scavenged_large_blocks == 0);
// grab all the partial blocks stashed in the gc_thread workspaces and
// move them to the old_blocks list of this gen.
for (n = 0; n < n_capabilities; n++) {
ws = &gc_threads[n]->gens[gen->no];
for (bd = ws->part_list; bd != NULL; bd = next) {
next = bd->link;
bd->link = gen->old_blocks;
gen->old_blocks = bd;
gen->n_old_blocks += bd->blocks;
}
ws->part_list = NULL;
ws->n_part_blocks = 0;
ASSERT(ws->scavd_list == NULL);
ASSERT(ws->n_scavd_blocks == 0);
if (ws->todo_free != ws->todo_bd->start) {
ws->todo_bd->free = ws->todo_free;
ws->todo_bd->link = gen->old_blocks;
gen->old_blocks = ws->todo_bd;
gen->n_old_blocks += ws->todo_bd->blocks;
alloc_todo_block(ws,0); // always has one block.
}
}
// mark the small objects as from-space
for (bd = gen->old_blocks; bd; bd = bd->link) {
bd->flags &= ~BF_EVACUATED;
......@@ -1285,106 +1282,83 @@ init_collected_gen (nat g, nat n_threads)
}
}
}
// For each GC thread, for each step, allocate a "todo" block to
// store evacuated objects to be scavenged, and a block to store
// evacuated objects that do not need to be scavenged.
for (t = 0; t < n_threads; t++) {
ws = &gc_threads[t]->gens[g];
ws->todo_large_objects = NULL;
ws->part_list = NULL;
ws->n_part_blocks = 0;
// allocate the first to-space block; extra blocks will be
// chained on as necessary.
ws->todo_bd = NULL;
ASSERT(looksEmptyWSDeque(ws->todo_q));
alloc_todo_block(ws,0);
ws->todo_overflow = NULL;
ws->n_todo_overflow = 0;
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
}
/* ----------------------------------------------------------------------------
Save the mutable lists in saved_mut_lists
------------------------------------------------------------------------- */
static void
stash_mut_list (Capability *cap, nat gen_no)
{
cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
cap->mut_lists[gen_no] = allocBlock_sync();
}
/* ----------------------------------------------------------------------------
Initialise a generation that is *not* to be collected
------------------------------------------------------------------------- */
static void
init_uncollected_gen (nat g, nat threads)
prepare_uncollected_gen (generation *gen)
{
nat t, n;
gen_workspace *ws;
generation *gen;
bdescr *bd;
nat i;
ASSERT(gen->no > 0);
// save the current mutable lists for this generation, and
// allocate a fresh block for each one. We'll traverse these
// mutable lists as roots early on in the GC.
for (n = 0; n < n_capabilities; n++) {
capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
capabilities[n].mut_lists[g] = allocBlock();
for (i = 0; i < n_capabilities; i++) {
stash_mut_list(&capabilities[i], gen->no);
}
gen = &generations[g];
ASSERT(gen->scavenged_large_objects == NULL);
ASSERT(gen->n_scavenged_large_blocks == 0);
}
gen->scavenged_large_objects = NULL;
gen->n_scavenged_large_blocks = 0;
/* -----------------------------------------------------------------------------
Collect the completed blocks from a GC thread and attach them to
the generation.
-------------------------------------------------------------------------- */
for (t = 0; t < threads; t++) {
ws = &gc_threads[t]->gens[g];
ASSERT(looksEmptyWSDeque(ws->todo_q));
ws->todo_large_objects = NULL;
ws->part_list = NULL;
ws->n_part_blocks = 0;
static void
collect_gct_blocks (void)
{
nat g;
gen_workspace *ws;
bdescr *bd, *prev;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
ws = &gct->gens[g];
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
// there may still be a block attached to ws->todo_bd;
// leave it there to use next time.
if (ws->scavd_list != NULL) {
ACQUIRE_SPIN_LOCK(&ws->gen->sync);
ASSERT(gct->scan_bd == NULL);
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
// If the block at the head of the list in this generation
// is less than 3/4 full, then use it as a todo block.
if (gen->blocks && isPartiallyFull(gen->blocks))
{
ws->todo_bd = gen->blocks;
ws->todo_free = ws->todo_bd->free;
ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
gen->blocks = gen->blocks->link;
gen->n_blocks -= 1;
gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
ws->todo_bd->link = NULL;
// we must scan from the current end point.
ws->todo_bd->u.scan = ws->todo_bd->free;
}
else
{
ws->todo_bd = NULL;
alloc_todo_block(ws,0);
}
}
prev = NULL;
for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
ws->gen->n_words += bd->free - bd->start;
prev = bd;
}