Commit 214b3663 authored by Simon Marlow's avatar Simon Marlow

GC refactoring, remove "steps"

The GC had a two-level structure, G generations each of T steps.
Steps are for aging within a generation, mostly to avoid premature
promotion.  

Measurements show that more than 2 steps is almost never worthwhile,
and 1 step is usually worse than 2.  In theory fractional steps are
possible, so the ideal number of steps is somewhere between 1 and 3.
GHC's default has always been 2.

We can implement 2 steps quite straightforwardly by having each block
point to the generation to which objects in that block should be
promoted, so blocks in the nursery point to generation 0, and blocks
in gen 0 point to gen 1, and so on.

This commit removes the explicit step structures, merging generations
with steps, thus simplifying a lot of code.  Performance is
unaffected.  The tunable number of steps is now gone, although it may
be replaced in the future by a way to tune the aging in generation 0.
parent 7cb1d927
......@@ -385,7 +385,7 @@
// allocate() - this includes many of the primops.
#define MAYBE_GC(liveness,reentry) \
if (bdescr_link(CurrentNursery) == NULL || \
step_n_large_blocks(StgRegTable_rNursery(BaseReg)) >= CInt[alloc_blocks_lim]) { \
generation_n_large_blocks(W_[g0]) >= CInt[alloc_blocks_lim]) { \
R9 = liveness; \
R10 = reentry; \
HpAlloc = 0; \
......
......@@ -249,8 +249,7 @@ main(int argc, char *argv[])
struct_size(generation);
struct_field(generation, mut_list);
struct_field(step, n_large_blocks);
struct_field(generation, n_large_blocks);
struct_size(CostCentreStack);
struct_field(CostCentreStack, ccsID);
......
......@@ -57,8 +57,8 @@ typedef struct bdescr_ {
StgPtr scan; /* scan pointer for copying GC */
} u;
struct step_ *step; /* step */
struct step_ *dest; /* destination step */
struct generation_ *gen; /* generation */
struct generation_ *dest; /* destination gen */
StgWord32 blocks; /* no. of blocks (if grp head, 0 otherwise) */
......
......@@ -53,24 +53,32 @@
*
* ------------------------------------------------------------------------- */
typedef struct step_ {
unsigned int no; // step number in this generation
unsigned int abs_no; // absolute step number
typedef struct nursery_ {
bdescr * blocks;
unsigned int n_blocks;
} nursery;
struct generation_ * gen; // generation this step belongs to
unsigned int gen_no; // generation number (cached)
bdescr * blocks; // blocks in this step
unsigned int n_blocks; // number of blocks
unsigned int n_words; // number of words
typedef struct generation_ {
unsigned int no; // generation number
struct step_ * to; // destination step for live objects
bdescr * blocks; // blocks in this gen
unsigned int n_blocks; // number of blocks
unsigned int n_words; // number of words
bdescr * large_objects; // large objects (doubly linked)
unsigned int n_large_blocks; // no. of blocks used by large objs
bdescr * large_objects; // large objects (doubly linked)
unsigned int n_large_blocks; // no. of blocks used by large objs
StgTSO * threads; // threads in this step
unsigned int max_blocks; // max blocks
bdescr *mut_list; // mut objects in this gen (not G0)
StgTSO * threads; // threads in this gen
// linked via global_link
struct generation_ *to; // destination gen for live objects
// stats information
unsigned int collections;
unsigned int par_collections;
unsigned int failed_promotions;
// ------------------------------------
// Fields below are used during GC only
......@@ -85,13 +93,15 @@ typedef struct step_ {
int mark; // mark (not copy)? (old gen only)
int compact; // compact (not sweep)? (old gen only)
// During GC, if we are collecting this step, blocks and n_blocks
// During GC, if we are collecting this gen, blocks and n_blocks
// are copied into the following two fields. After GC, these blocks
// are freed.
bdescr * old_blocks; // bdescr of first from-space block
unsigned int n_old_blocks; // number of blocks in from-space
unsigned int live_estimate; // for sweeping: estimate of live data
bdescr * saved_mut_list;
bdescr * part_blocks; // partially-full scanned blocks
unsigned int n_part_blocks; // count of above
......@@ -101,32 +111,11 @@ typedef struct step_ {
bdescr * bitmap; // bitmap for compacting collection
StgTSO * old_threads;
} step;
typedef struct generation_ {
unsigned int no; // generation number
step * steps; // steps
unsigned int n_steps; // number of steps
unsigned int max_blocks; // max blocks in step 0
bdescr *mut_list; // mut objects in this gen (not G0)
// stats information
unsigned int collections;
unsigned int par_collections;
unsigned int failed_promotions;
// temporary use during GC:
bdescr *saved_mut_list;
} generation;
extern generation * generations;
extern generation * g0;
extern generation * oldest_gen;
extern step * all_steps;
extern nat total_steps;
/* -----------------------------------------------------------------------------
Generic allocation
......@@ -194,11 +183,11 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
/* (needed when dynamic libraries are used). */
extern rtsBool keepCAFs;
INLINE_HEADER void initBdescr(bdescr *bd, step *step)
INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)
{
bd->step = step;
bd->gen_no = step->gen_no;
bd->dest = step->to;
bd->gen = gen;
bd->gen_no = gen->no;
bd->dest = dest;
}
#endif /* RTS_STORAGE_GC_H */
......@@ -80,10 +80,10 @@ typedef struct StgRegTable_ {
StgPtr rSpLim;
StgPtr rHp;
StgPtr rHpLim;
struct StgTSO_ *rCurrentTSO;
struct step_ *rNursery;
struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */
struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */
struct StgTSO_ * rCurrentTSO;
struct nursery_ * rNursery;
struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */
struct bdescr_ * rCurrentAlloc; /* for allocation using allocate() */
StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
StgWord rRet; // holds the return code of the thread
} StgRegTable;
......
......@@ -85,7 +85,7 @@ arenaAlloc( Arena *arena, size_t size )
arena_blocks += req_blocks;
bd->gen_no = 0;
bd->step = NULL;
bd->gen = NULL;
bd->dest = NULL;
bd->flags = 0;
bd->free = bd->start;
......
......@@ -943,7 +943,7 @@ findPtrBlocks (StgPtr p, bdescr *bd, StgPtr arr[], int arr_size, int i)
void
findPtr(P_ p, int follow)
{
nat s, g;
nat g;
bdescr *bd;
#if defined(__GNUC__)
const int arr_size = 1024;
......@@ -955,13 +955,11 @@ findPtr(P_ p, int follow)
searched = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
bd = generations[g].steps[s].blocks;
i = findPtrBlocks(p,bd,arr,arr_size,i);
bd = generations[g].steps[s].large_objects;
i = findPtrBlocks(p,bd,arr,arr_size,i);
if (i >= arr_size) return;
}
bd = generations[g].blocks;
i = findPtrBlocks(p,bd,arr,arr_size,i);
bd = generations[g].large_objects;
i = findPtrBlocks(p,bd,arr,arr_size,i);
if (i >= arr_size) return;
}
if (follow && i == 1) {
debugBelch("-->\n");
......
......@@ -1067,7 +1067,7 @@ heapCensusChain( Census *census, bdescr *bd )
void
heapCensus( void )
{
nat g, s;
nat g;
Census *census;
census = &censuses[era];
......@@ -1085,17 +1085,11 @@ heapCensus( void )
#endif
// Traverse the heap, collecting the census info
if (RtsFlags.GcFlags.generations == 1) {
heapCensusChain( census, g0->steps[0].blocks );
} else {
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
heapCensusChain( census, generations[g].steps[s].blocks );
// Are we interested in large objects? might be
// confusing to include the stack in a heap profile.
heapCensusChain( census, generations[g].steps[s].large_objects );
}
}
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
heapCensusChain( census, generations[g].blocks );
// Are we interested in large objects? might be
// confusing to include the stack in a heap profile.
heapCensusChain( census, generations[g].large_objects );
}
// dump out the census info
......
......@@ -1118,7 +1118,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
{
bdescr *x;
for (x = bd; x < bd + blocks; x++) {
initBdescr(x,cap->r.rNursery);
initBdescr(x,g0,g0);
x->free = x->start;
x->flags = 0;
}
......@@ -1378,7 +1378,7 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
if (sched_state < SCHED_INTERRUPTING
&& RtsFlags.ParFlags.parGcEnabled
&& N >= RtsFlags.ParFlags.parGcGen
&& ! oldest_gen->steps[0].mark)
&& ! oldest_gen->mark)
{
gc_type = PENDING_GC_PAR;
} else {
......@@ -1580,7 +1580,7 @@ forkProcess(HsStablePtr *entry
pid_t pid;
StgTSO* t,*next;
Capability *cap;
nat s;
nat g;
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.nNodes > 1) {
......@@ -1628,8 +1628,8 @@ forkProcess(HsStablePtr *entry
// all Tasks, because they correspond to OS threads that are
// now gone.
for (s = 0; s < total_steps; s++) {
for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
if (t->what_next == ThreadRelocated) {
next = t->_link;
} else {
......@@ -1655,8 +1655,8 @@ forkProcess(HsStablePtr *entry
// Empty the threads lists. Otherwise, the garbage
// collector may attempt to resurrect some of these threads.
for (s = 0; s < total_steps; s++) {
all_steps[s].threads = END_TSO_QUEUE;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
generations[g].threads = END_TSO_QUEUE;
}
// Wipe the task list, except the current Task.
......@@ -1710,19 +1710,19 @@ deleteAllThreads ( Capability *cap )
// NOTE: only safe to call if we own all capabilities.
StgTSO* t, *next;
nat s;
nat g;
debugTrace(DEBUG_sched,"deleting all threads");
for (s = 0; s < total_steps; s++) {
for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
if (t->what_next == ThreadRelocated) {
next = t->_link;
} else {
next = t->global_link;
deleteThread(cap,t);
}
}
}
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
if (t->what_next == ThreadRelocated) {
next = t->_link;
} else {
next = t->global_link;
deleteThread(cap,t);
}
}
}
// The run queue now contains a bunch of ThreadKilled threads. We
// must not throw these away: the main thread(s) will be in there
......@@ -2655,14 +2655,14 @@ resurrectThreads (StgTSO *threads)
{
StgTSO *tso, *next;
Capability *cap;
step *step;
generation *gen;
for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
next = tso->global_link;
step = Bdescr((P_)tso)->step;
tso->global_link = step->threads;
step->threads = tso;
gen = Bdescr((P_)tso)->gen;
tso->global_link = gen->threads;
gen->threads = tso;
debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
......@@ -2719,7 +2719,7 @@ performPendingThrowTos (StgTSO *threads)
StgTSO *tso, *next;
Capability *cap;
Task *task, *saved_task;;
step *step;
generation *gen;
task = myTask();
cap = task->cap;
......@@ -2727,9 +2727,9 @@ performPendingThrowTos (StgTSO *threads)
for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
next = tso->global_link;
step = Bdescr((P_)tso)->step;
tso->global_link = step->threads;
step->threads = tso;
gen = Bdescr((P_)tso)->gen;
tso->global_link = gen->threads;
gen->threads = tso;
debugTrace(DEBUG_sched, "performing blocked throwTo to thread %lu", (unsigned long)tso->id);
......
......@@ -701,14 +701,12 @@ stat_exit(int alloc)
#endif
#if defined(THREADED_RTS) && defined(PROF_SPIN)
{
nat g, s;
nat g;
statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
}
statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
}
}
#endif
......@@ -769,17 +767,17 @@ stat_exit(int alloc)
void
statDescribeGens(void)
{
nat g, s, mut, lge;
nat g, mut, lge;
lnat live, slop;
lnat tot_live, tot_slop;
bdescr *bd;
step *step;
generation *gen;
debugBelch(
"-----------------------------------------------------------------\n"
" Gen Max Mut-list Step Blocks Large Live Slop\n"
" Blocks Bytes Objects \n"
"-----------------------------------------------------------------\n");
"----------------------------------------------------------\n"
" Gen Max Mut-list Blocks Large Live Slop\n"
" Blocks Bytes Objects \n"
"----------------------------------------------------------\n");
tot_live = 0;
tot_slop = 0;
......@@ -789,27 +787,23 @@ statDescribeGens(void)
mut += (bd->free - bd->start) * sizeof(W_);
}
debugBelch("%5d %7d %9d", g, generations[g].max_blocks, mut);
gen = &generations[g];
for (s = 0; s < generations[g].n_steps; s++) {
step = &generations[g].steps[s];
for (bd = step->large_objects, lge = 0; bd; bd = bd->link) {
lge++;
}
live = step->n_words + countOccupied(step->large_objects);
if (s != 0) {
debugBelch("%23s","");
debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
lge++;
}
slop = (step->n_blocks + step->n_large_blocks) * BLOCK_SIZE_W - live;
debugBelch("%6d %8d %8d %8ld %8ld\n", s, step->n_blocks, lge,
live = gen->n_words + countOccupied(gen->large_objects);
slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
live*sizeof(W_), slop*sizeof(W_));
tot_live += live;
tot_slop += slop;
}
}
debugBelch("-----------------------------------------------------------------\n");
debugBelch("%48s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
debugBelch("-----------------------------------------------------------------\n");
debugBelch("----------------------------------------------------------\n");
debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
debugBelch("----------------------------------------------------------\n");
debugBelch("\n");
}
......
......@@ -102,8 +102,8 @@ createThread(Capability *cap, nat size)
*/
ACQUIRE_LOCK(&sched_mutex);
tso->id = next_thread_id++; // while we have the mutex
tso->global_link = cap->r.rNursery->threads;
cap->r.rNursery->threads = tso;
tso->global_link = g0->threads;
g0->threads = tso;
RELEASE_LOCK(&sched_mutex);
// ToDo: report the stack size in the event?
......@@ -387,7 +387,7 @@ void
printAllThreads(void)
{
StgTSO *t, *next;
nat i, s;
nat i, g;
Capability *cap;
# if defined(GRAN)
......@@ -415,8 +415,8 @@ printAllThreads(void)
}
debugBelch("other threads:\n");
for (s = 0; s < total_steps; s++) {
for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
if (t->why_blocked != NotBlocked) {
printThreadStatus(t);
}
......
......@@ -58,7 +58,7 @@ static void initMBlock(void *mblock);
The following fields are not used by the allocator:
bd->flags
bd->gen_no
bd->step
bd->gen
bd->dest
Exceptions: we don't maintain invariants for all the blocks within a
......@@ -470,7 +470,7 @@ freeGroup(bdescr *p)
ASSERT(p->free != (P_)-1);
p->free = (void *)-1; /* indicates that this block is free */
p->step = NULL;
p->gen = NULL;
p->gen_no = 0;
/* fill the block group with garbage if sanity checking is on */
IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
......
......@@ -875,7 +875,7 @@ update_fwd_compact( bdescr *blocks )
}
static nat
update_bkwd_compact( step *stp )
update_bkwd_compact( generation *gen )
{
StgPtr p, free;
#if 0
......@@ -886,7 +886,7 @@ update_bkwd_compact( step *stp )
nat size, free_blocks;
StgWord iptr;
bd = free_bd = stp->old_blocks;
bd = free_bd = gen->old_blocks;
free = free_bd->start;
free_blocks = 1;
......@@ -965,8 +965,8 @@ update_bkwd_compact( step *stp )
void
compact(StgClosure *static_objects)
{
nat g, s, blocks;
step *stp;
nat g, blocks;
generation *gen;
// 1. thread the roots
markCapabilities((evac_fn)thread_root, NULL);
......@@ -1000,8 +1000,8 @@ compact(StgClosure *static_objects)
}
// the global thread list
for (s = 0; s < total_steps; s++) {
thread((void *)&all_steps[s].threads);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
thread((void *)&generations[g].threads);
}
// any threads resurrected during this GC
......@@ -1031,30 +1031,24 @@ compact(StgClosure *static_objects)
// 2. update forward ptrs
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
if (g==0 && s ==0) continue;
stp = &generations[g].steps[s];
debugTrace(DEBUG_gc, "update_fwd: %d.%d",
stp->gen->no, stp->no);
update_fwd(stp->blocks);
update_fwd_large(stp->scavenged_large_objects);
if (g == RtsFlags.GcFlags.generations-1 && stp->old_blocks != NULL) {
debugTrace(DEBUG_gc, "update_fwd: %d.%d (compact)",
stp->gen->no, stp->no);
update_fwd_compact(stp->old_blocks);
}
gen = &generations[g];
debugTrace(DEBUG_gc, "update_fwd: %d", g);
update_fwd(gen->blocks);
update_fwd_large(gen->scavenged_large_objects);
if (g == RtsFlags.GcFlags.generations-1 && gen->old_blocks != NULL) {
debugTrace(DEBUG_gc, "update_fwd: %d (compact)", g);
update_fwd_compact(gen->old_blocks);
}
}
// 3. update backward ptrs
stp = &oldest_gen->steps[0];
if (stp->old_blocks != NULL) {
blocks = update_bkwd_compact(stp);
gen = oldest_gen;
if (gen->old_blocks != NULL) {
blocks = update_bkwd_compact(gen);
debugTrace(DEBUG_gc,
"update_bkwd: %d.%d (compact, old: %d blocks, now %d blocks)",
stp->gen->no, stp->no,
stp->n_old_blocks, blocks);
stp->n_old_blocks = blocks;
"update_bkwd: %d (compact, old: %d blocks, now %d blocks)",
gen->no, gen->n_old_blocks, blocks);
gen->n_old_blocks = blocks;
}
}
......@@ -51,28 +51,28 @@ STATIC_INLINE void evacuate_large(StgPtr p);
-------------------------------------------------------------------------- */
STATIC_INLINE StgPtr
alloc_for_copy (nat size, step *stp)
alloc_for_copy (nat size, generation *gen)
{
StgPtr to;
step_workspace *ws;
gen_workspace *ws;
/* Find out where we're going, using the handy "to" pointer in
* the step of the source object. If it turns out we need to
* the gen of the source object. If it turns out we need to
* evacuate to an older generation, adjust it here (see comment
* by evacuate()).
*/
if (stp < gct->evac_step) {
if (gen < gct->evac_gen) {
if (gct->eager_promotion) {
stp = gct->evac_step;
gen = gct->evac_gen;
} else {
gct->failed_to_evac = rtsTrue;
}
}
ws = &gct->steps[stp->abs_no];
// this compiles to a single mem access to stp->abs_no only
ws = &gct->gens[gen->no];
// this compiles to a single mem access to gen->abs_no only
/* chain a new block onto the to-space for the destination step if
/* chain a new block onto the to-space for the destination gen if
* necessary.
*/
to = ws->todo_free;
......@@ -91,12 +91,12 @@ alloc_for_copy (nat size, step *stp)
STATIC_INLINE GNUC_ATTR_HOT void
copy_tag(StgClosure **p, const StgInfoTable *info,
StgClosure *src, nat size, step *stp, StgWord tag)
StgClosure *src, nat size, generation *gen, StgWord tag)
{
StgPtr to, from;
nat i;
to = alloc_for_copy(size,stp);