Commit dbbf15c0 authored by simonmarhaskell@gmail.com's avatar simonmarhaskell@gmail.com
Browse files

Allow work units smaller than a block to improve load balancing

parent 4b123ceb
......@@ -54,6 +54,7 @@ typedef struct bdescr_ {
union {
struct bdescr_ *back; /* used (occasionally) for doubly-linked lists*/
StgWord *bitmap;
StgPtr scan; /* scan pointer for copying GC */
} u;
unsigned int gen_no; /* generation */
struct step_ *step; /* step */
......
......@@ -91,6 +91,9 @@ typedef struct step_ {
bdescr * todos_last;
unsigned int n_todos; // count of above
bdescr * part_blocks; // partially-full scanned blocks
unsigned int n_part_blocks; // count of above
bdescr * scavenged_large_objects; // live large objs after GC (d-link)
unsigned int n_scavenged_large_blocks; // size (not count) of above
......
......@@ -63,7 +63,7 @@ alloc_for_copy (nat size, step *stp)
*/
to = ws->todo_free;
if (to + size > ws->todo_lim) {
to = gc_alloc_todo_block(ws);
to = todo_block_full(size, ws);
}
ws->todo_free = to + size;
ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim);
......
......@@ -422,22 +422,34 @@ GarbageCollect ( rtsBool force_major_gc )
ws = &thr->steps[s];
// Not true?
// ASSERT( ws->scan_bd == ws->todo_bd );
ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
ASSERT( ws->scan_bd ? ws->scan_bd->u.scan == ws->scan_bd->free : 1 );
// Push the final block
if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
if (ws->scan_bd) { push_scanned_block(ws->scan_bd, ws); }
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
prev = ws->scavd_list;
prev = ws->part_list;
for (bd = ws->part_list; bd != NULL; bd = bd->link) {
bd->flags &= ~BF_EVACUATED; // now from-space
prev = bd;
}
if (prev != NULL) {
prev->link = ws->scavd_list;
}
for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
bd->flags &= ~BF_EVACUATED; // now from-space
prev = bd;
}
prev->link = ws->stp->blocks;
ws->stp->blocks = ws->scavd_list;
ws->stp->n_blocks += ws->n_scavd_blocks;
ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
prev->link = ws->step->blocks;
if (ws->part_list != NULL) {
ws->step->blocks = ws->part_list;
} else {
ws->step->blocks = ws->scavd_list;
}
ws->step->n_blocks += ws->n_part_blocks;
ws->step->n_blocks += ws->n_scavd_blocks;
ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
}
}
}
......@@ -981,16 +993,18 @@ alloc_gc_thread (int n)
for (s = 0; s < total_steps; s++)
{
ws = &t->steps[s];
ws->stp = &all_steps[s];
ASSERT(s == ws->stp->abs_no);
ws->step = &all_steps[s];
ASSERT(s == ws->step->abs_no);
ws->gct = t;
ws->scan_bd = NULL;
ws->scan = NULL;
ws->todo_bd = NULL;
ws->buffer_todo_bd = NULL;
ws->part_list = NULL;
ws->n_part_blocks = 0;
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
......@@ -1304,15 +1318,17 @@ init_collected_gen (nat g, nat n_threads)
ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
ws->scan_bd = NULL;
ws->scan = NULL;
ws->todo_large_objects = NULL;
ws->part_list = NULL;
ws->n_part_blocks = 0;
// allocate the first to-space block; extra blocks will be
// chained on as necessary.
ws->todo_bd = NULL;
ws->buffer_todo_bd = NULL;
gc_alloc_todo_block(ws);
alloc_todo_block(ws,0);
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
......@@ -1343,11 +1359,14 @@ init_uncollected_gen (nat g, nat threads)
for (s = 0; s < generations[g].n_steps; s++) {
ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
stp = ws->stp;
stp = ws->step;
ws->buffer_todo_bd = NULL;
ws->todo_large_objects = NULL;
ws->part_list = NULL;
ws->n_part_blocks = 0;
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
......@@ -1365,7 +1384,7 @@ init_uncollected_gen (nat g, nat threads)
// this block is also the scan block; we must scan
// from the current end point.
ws->scan_bd = ws->todo_bd;
ws->scan = ws->scan_bd->free;
ws->scan_bd->u.scan = ws->scan_bd->free;
// subtract the contents of this block from the stats,
// because we'll count the whole block later.
......@@ -1374,9 +1393,8 @@ init_uncollected_gen (nat g, nat threads)
else
{
ws->scan_bd = NULL;
ws->scan = NULL;
ws->todo_bd = NULL;
gc_alloc_todo_block(ws);
alloc_todo_block(ws,0);
}
}
}
......
......@@ -73,12 +73,11 @@
------------------------------------------------------------------------- */
typedef struct step_workspace_ {
step * stp; // the step for this workspace
step * step; // the step for this workspace
struct gc_thread_ * gct; // the gc_thread that contains this workspace
// block that is currently being scanned
bdescr * scan_bd;
StgPtr scan; // the scan pointer
// where objects to be scavenged go
bdescr * todo_bd;
......@@ -91,10 +90,14 @@ typedef struct step_workspace_ {
// where large objects to be scavenged go
bdescr * todo_large_objects;
// Objects that need not be, or have already been, scavenged.
// Objects that have already been, scavenged.
bdescr * scavd_list;
lnat n_scavd_blocks; // count of blocks in this list
// Partially-full, scavenged, blocks
bdescr * part_list;
unsigned int n_part_blocks; // count of above
} step_workspace;
/* ----------------------------------------------------------------------------
......@@ -137,7 +140,7 @@ typedef struct gc_thread_ {
// variable).
rtsBool failed_to_evac; // failure to evacuate an object typically
// causes it to be recorded in the mutable
// Causes it to be recorded in the mutable
// object list
rtsBool eager_promotion; // forces promotion to the evac gen
......@@ -194,4 +197,6 @@ extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS;
StgClosure * isAlive(StgClosure *p);
#define WORK_UNIT_WORDS 128
#endif /* GC_H */
......@@ -51,7 +51,7 @@ grab_todo_block (step_workspace *ws)
bdescr *bd;
step *stp;
stp = ws->stp;
stp = ws->step;
bd = NULL;
if (ws->buffer_todo_bd)
......@@ -76,90 +76,144 @@ grab_todo_block (step_workspace *ws)
return bd;
}
static void
push_todo_block (bdescr *bd, step *stp)
void
push_scanned_block (bdescr *bd, step_workspace *ws)
{
ASSERT(bd != NULL);
ASSERT(bd->link == NULL);
ACQUIRE_SPIN_LOCK(&stp->sync_todo);
if (stp->todos_last == NULL) {
stp->todos_last = bd;
stp->todos = bd;
} else {
stp->todos_last->link = bd;
stp->todos_last = bd;
ASSERT(bd->step == ws->step);
ASSERT(bd->u.scan == bd->free);
if (bd->start + BLOCK_SIZE_W - bd->free > WORK_UNIT_WORDS)
{
// a partially full block: put it on the part_list list.
bd->link = ws->part_list;
ws->part_list = bd;
ws->n_part_blocks++;
IF_DEBUG(sanity,
ASSERT(countBlocks(ws->part_list) == ws->n_part_blocks));
}
else
{
// put the scan block on the ws->scavd_list.
bd->link = ws->scavd_list;
ws->scavd_list = bd;
ws->n_scavd_blocks ++;
IF_DEBUG(sanity,
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks));
}
stp->n_todos++;
trace(TRACE_gc|DEBUG_gc, "step %d, n_todos: %d", stp->abs_no, stp->n_todos);
RELEASE_SPIN_LOCK(&stp->sync_todo);
}
void
push_scan_block (bdescr *bd, step_workspace *ws)
StgPtr
todo_block_full (nat size, step_workspace *ws)
{
bdescr *bd;
bd = ws->todo_bd;
ASSERT(bd != NULL);
ASSERT(bd->link == NULL);
ASSERT(bd->step == ws->step);
bd->free = ws->todo_free;
// If the global list is not empty, or there's not much work in
// this block to push, and there's enough room in
// this block to evacuate the current object, then just increase
// the limit.
if (ws->step->todos != NULL ||
(bd->free - bd->u.scan < WORK_UNIT_WORDS / 2)) {
if (bd->free + size < bd->start + BLOCK_SIZE_W) {
debugTrace(DEBUG_gc, "increasing limit for %p", bd->start);
ws->todo_lim = stg_min(bd->start + BLOCK_SIZE_W,
ws->todo_lim + stg_max(WORK_UNIT_WORDS,size));
return ws->todo_free;
}
}
ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free);
// If this block is not the scan block, we want to push it out and
// make room for a new todo block.
if (bd != ws->scan_bd)
{
// If this block does not have enough space to allocate the
// current object, but it also doesn't have any work to push, then
// push it on to the scanned list. It cannot be empty, because
// then there would be enough room to copy the current object.
if (bd->u.scan == bd->free)
{
ASSERT(bd->free != bd->start);
push_scanned_block(bd, ws);
}
// Otherwise, push this block out to the global list.
else
{
step *stp;
stp = ws->step;
trace(TRACE_gc|DEBUG_gc, "push todo block %p (%d words), step %d, n_todos: %d",
bd->start, bd->free - bd->u.scan, stp->abs_no, stp->n_todos);
// ToDo: use buffer_todo
ACQUIRE_SPIN_LOCK(&stp->sync_todo);
if (stp->todos_last == NULL) {
stp->todos_last = bd;
stp->todos = bd;
} else {
stp->todos_last->link = bd;
stp->todos_last = bd;
}
stp->n_todos++;
RELEASE_SPIN_LOCK(&stp->sync_todo);
}
}
// update stats: this is a block that has been copied & scavenged
gct->copied += bd->free - bd->start;
ws->todo_bd = NULL;
ws->todo_free = NULL;
ws->todo_lim = NULL;
// put the scan block on the ws->scavd_list.
bd->link = ws->scavd_list;
ws->scavd_list = bd;
ws->n_scavd_blocks ++;
alloc_todo_block(ws, size);
IF_DEBUG(sanity,
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks));
return ws->todo_free;
}
StgPtr
gc_alloc_todo_block (step_workspace *ws)
alloc_todo_block (step_workspace *ws, nat size)
{
bdescr *bd;
if (ws->todo_bd != NULL) {
ws->todo_bd->free = ws->todo_free;
// Grab a part block if we have one, and it has enough room
if (ws->part_list != NULL &&
ws->part_list->start + BLOCK_SIZE_W - ws->part_list->free > (int)size)
{
bd = ws->part_list;
ws->part_list = bd->link;
ws->n_part_blocks--;
}
// If we already have a todo block, it must be full, so we push it
// out: first to the buffer_todo_bd, then to the step. BUT, don't
// push out the block out if it is already the scan block.
if (ws->todo_bd != NULL && ws->scan_bd != ws->todo_bd) {
ASSERT(ws->todo_bd->link == NULL);
if (ws->buffer_todo_bd == NULL) {
// If the global todo list is empty, push this block
// out immediately rather than caching it in
// buffer_todo_bd, because there might be other threads
// waiting for work.
if (ws->stp->todos == NULL) {
push_todo_block(ws->todo_bd, ws->stp);
} else {
ws->buffer_todo_bd = ws->todo_bd;
}
} else {
ASSERT(ws->buffer_todo_bd->link == NULL);
push_todo_block(ws->buffer_todo_bd, ws->stp);
ws->buffer_todo_bd = ws->todo_bd;
else
{
bd = allocBlock_sync();
bd->gen_no = ws->step->gen_no;
bd->step = ws->step;
bd->u.scan = bd->start;
// blocks in to-space in generations up to and including N
// get the BF_EVACUATED flag.
if (ws->step->gen_no <= N) {
bd->flags = BF_EVACUATED;
} else {
bd->flags = 0;
}
ws->todo_bd = NULL;
}
bd = allocBlock_sync();
}
bd->gen_no = ws->stp->gen_no;
bd->step = ws->stp;
bd->link = NULL;
// blocks in to-space in generations up to and including N
// get the BF_EVACUATED flag.
if (ws->stp->gen_no <= N) {
bd->flags = BF_EVACUATED;
} else {
bd->flags = 0;
}
ws->todo_bd = bd;
ws->todo_free = bd->start;
ws->todo_lim = bd->start + BLOCK_SIZE_W;
ws->todo_free = bd->free;
ws->todo_lim = stg_min(bd->start + BLOCK_SIZE_W,
bd->free + stg_max(WORK_UNIT_WORDS,size));
debugTrace(DEBUG_gc, "alloc new todo block %p for step %d",
bd->start, ws->step->abs_no);
return ws->todo_free;
}
......
......@@ -20,18 +20,18 @@ extern SpinLock gc_alloc_block_sync;
bdescr *allocBlock_sync(void);
void freeChain_sync(bdescr *bd);
void push_scan_block (bdescr *bd, step_workspace *ws);
void push_scanned_block (bdescr *bd, step_workspace *ws);
bdescr *grab_todo_block (step_workspace *ws);
StgPtr gc_alloc_todo_block (step_workspace *ws);
bdescr *gc_alloc_scavd_block (step_workspace *ws);
StgPtr todo_block_full (nat size, step_workspace *ws);
StgPtr alloc_todo_block (step_workspace *ws, nat size);
// Returns true if a block is 3/4 full. This predicate is used to try
// Returns true if a block is partially full. This predicate is used to try
// to re-use partial blocks wherever possible, and to reduce wastage.
// We might need to tweak the actual value.
INLINE_HEADER rtsBool
isPartiallyFull(bdescr *bd)
{
return (bd->free + BLOCK_SIZE_W/4 < bd->start + BLOCK_SIZE_W);
return (bd->free + WORK_UNIT_WORDS < bd->start + BLOCK_SIZE_W);
}
......
......@@ -1355,7 +1355,7 @@ scavenge_large (step_workspace *ws)
bdescr *bd;
StgPtr p;
gct->evac_step = ws->stp;
gct->evac_step = ws->step;
bd = ws->todo_large_objects;
......@@ -1367,15 +1367,15 @@ scavenge_large (step_workspace *ws)
// the front when evacuating.
ws->todo_large_objects = bd->link;
ACQUIRE_SPIN_LOCK(&ws->stp->sync_large_objects);
dbl_link_onto(bd, &ws->stp->scavenged_large_objects);
ws->stp->n_scavenged_large_blocks += bd->blocks;
RELEASE_SPIN_LOCK(&ws->stp->sync_large_objects);
ACQUIRE_SPIN_LOCK(&ws->step->sync_large_objects);
dbl_link_onto(bd, &ws->step->scavenged_large_objects);
ws->step->n_scavenged_large_blocks += bd->blocks;
RELEASE_SPIN_LOCK(&ws->step->sync_large_objects);
p = bd->start;
if (scavenge_one(p)) {
if (ws->stp->gen_no > 0) {
recordMutableGen_GC((StgClosure *)p, ws->stp->gen);
if (ws->step->gen_no > 0) {
recordMutableGen_GC((StgClosure *)p, ws->step->gen);
}
}
}
......@@ -1439,30 +1439,27 @@ loop:
if (ws->scan_bd == NULL && ws->todo_bd != NULL)
{
ws->scan_bd = ws->todo_bd;
ws->scan = ws->scan_bd->start;
}
// If we have a scan block with some work to do,
// scavenge everything up to the free pointer.
if (ws->scan != NULL && ws->scan < ws->scan_bd->free)
if (ws->scan_bd != NULL && ws->scan_bd->u.scan < ws->scan_bd->free)
{
if (n_gc_threads == 1) {
scavenge_block1(ws->scan_bd, ws->scan);
scavenge_block1(ws->scan_bd);
} else {
scavenge_block(ws->scan_bd, ws->scan);
scavenge_block(ws->scan_bd);
}
ws->scan = ws->scan_bd->free;
did_something = rtsTrue;
}
if (ws->scan_bd != NULL && ws->scan == ws->scan_bd->free
&& ws->scan_bd != ws->todo_bd)
if (ws->scan_bd != NULL && ws->scan_bd != ws->todo_bd)
{
ASSERT(ws->scan_bd->u.scan == ws->scan_bd->free);
// we're not going to evac any more objects into
// this block, so push it now.
push_scan_block(ws->scan_bd, ws);
push_scanned_block(ws->scan_bd, ws);
ws->scan_bd = NULL;
ws->scan = NULL;
// we might be able to scan the todo block now.
did_something = rtsTrue;
}
......@@ -1482,11 +1479,11 @@ loop:
// our scavd list. This saves pushing out the
// scan_bd block, which might be partial.
if (n_gc_threads == 1) {
scavenge_block1(bd, bd->start);
scavenge_block1(bd);
} else {
scavenge_block(bd, bd->start);
scavenge_block(bd);
}
push_scan_block(bd, ws);
push_scanned_block(bd, ws);
did_something = rtsTrue;
break;
}
......@@ -1561,7 +1558,7 @@ any_work (void)
}
ws = &gct->steps[s];
if (ws->todo_large_objects) return rtsTrue;
if (ws->stp->todos) return rtsTrue;
if (ws->step->todos) return rtsTrue;
}
gct->no_work++;
......
......@@ -15,7 +15,7 @@
// defined, the second time without.
#ifndef PARALLEL_GC
#define scavenge_block(a,b) scavenge_block1(a,b)
#define scavenge_block(a) scavenge_block1(a)
#define evacuate(a) evacuate1(a)
#define recordMutableGen_GC(a,b) recordMutableGen(a,b)
#else
......@@ -24,7 +24,7 @@
#undef recordMutableGen_GC
#endif
static void scavenge_block (bdescr *bd, StgPtr scan);
static void scavenge_block (bdescr *bd);
/* -----------------------------------------------------------------------------
Scavenge a block from the given scan pointer up to bd->free.
......@@ -39,7 +39,7 @@ static void scavenge_block (bdescr *bd, StgPtr scan);
-------------------------------------------------------------------------- */
static void
scavenge_block (bdescr *bd, StgPtr scan)
scavenge_block (bdescr *bd)
{
StgPtr p, q;
StgInfoTable *info;
......@@ -47,10 +47,10 @@ scavenge_block (bdescr *bd, StgPtr scan)
rtsBool saved_eager_promotion;
step_workspace *ws;
p = scan;
p = bd->u.scan;
debugTrace(DEBUG_gc, "scavenging block %p (gen %d, step %d) @ %p",
bd->start, bd->gen_no, bd->step->no, scan);
bd->start, bd->gen_no, bd->step->no, p);
gct->evac_step = bd->step;
saved_evac_step = gct->evac_step;
......@@ -448,7 +448,12 @@ scavenge_block (bdescr *bd, StgPtr scan)
}
debugTrace(DEBUG_gc, " scavenged %ld bytes",
(unsigned long)((bd->free - scan) * sizeof(W_)));
(unsigned long)((bd->free - bd->u.scan) * sizeof(W_)));
// update stats: this is a block that has been copied & scavenged
gct->copied += bd->free - bd->u.scan;
bd->u.scan = bd->free;
}
#undef scavenge_block
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment