Commit 713cf473 authored by Simon Marlow's avatar Simon Marlow

Parallelise clearNurseries() in the parallel GC

The clearNurseries() operation resets the free pointer in each nursery
block to the start of the block, emptying the nursery.  In the
parallel GC this was done on the main GC thread, but that's bad
because it accesses the bdescr of every nursery block, and move all
those cache lines onto the CPU of the main GC thread.  With large
nurseries, this can be especially bad.  So instead we want to clear
each nursery in its local GC thread.

Thanks to Andreas Voellmy <andreas.voellmy@gmail.com> for idenitfying
the issue.

After this change and the previous patch to make the last GC a major
one, I see these results for nofib/parallel on 8 cores:

   blackscholes          +0.0%     +0.0%     -3.7%     -3.3%     +0.3%
          coins          +0.0%     +0.0%     -5.1%     -5.0%     +0.4%
           gray          +0.0%     +0.0%     -4.5%     -2.1%     +0.8%
         mandel          +0.0%     -0.0%     -7.6%     -5.1%     -2.3%
        matmult          +0.0%     +5.5%     -2.8%     -1.9%     -5.8%
        minimax          +0.0%     +0.0%    -10.6%    -10.5%     +0.0%
          nbody          +0.0%     -4.4%     +0.0%      0.07     +0.0%
         parfib          +0.0%     +1.0%     +0.5%     +0.9%     +0.0%
        partree          +0.0%     +0.0%     -2.4%     -2.5%     +1.7%
           prsa          +0.0%     -0.2%     +1.8%     +4.2%     +0.0%
         queens          +0.0%     -0.0%     -1.8%     -1.4%     -4.8%
            ray          +0.0%     -0.6%    -18.5%    -17.8%     +0.0%
       sumeuler          +0.0%     -0.0%     -3.7%     -3.7%     +0.0%
      transclos          +0.0%     -0.0%    -25.7%    -26.6%     +0.0%
--------------------------------------------------------------------------------
            Min          +0.0%     -4.4%    -25.7%    -26.6%     -5.8%
            Max          +0.0%     +5.5%     +1.8%     +4.2%     +1.7%
 Geometric Mean          +0.0%     +0.1%     -6.3%     -6.1%     -0.7%
parent 2f3a41d9
......@@ -629,7 +629,16 @@ GarbageCollect (rtsBool force_major_gc,
}
// Reset the nursery: make the blocks empty
allocated += clearNurseries();
if (n_gc_threads == 1) {
for (n = 0; n < n_capabilities; n++) {
allocated += clearNursery(&capabilities[n]);
}
} else {
gct->allocated = clearNursery(cap);
for (n = 0; n < n_capabilities; n++) {
allocated += gc_threads[n]->allocated;
}
}
resize_nursery();
......@@ -1094,6 +1103,8 @@ gcWorkerThread (Capability *cap)
scavenge_until_all_done();
gct->allocated = clearNursery(cap);
#ifdef THREADED_RTS
// Now that the whole heap is marked, we discard any sparks that
// were found to be unreachable. The main GC thread is currently
......@@ -1477,6 +1488,7 @@ init_gc_thread (gc_thread *t)
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
t->allocated = 0;
t->copied = 0;
t->scanned = 0;
t->any_work = 0;
......
......@@ -176,6 +176,7 @@ typedef struct gc_thread_ {
// -------------------
// stats
lnat allocated; // result of clearNursery()
lnat copied;
lnat scanned;
lnat any_work;
......
......@@ -496,22 +496,19 @@ allocNurseries (nat from, nat to)
assignNurseriesToCapabilities(from, to);
}
lnat // words allocated
clearNurseries (void)
lnat
clearNursery (Capability *cap)
{
lnat allocated = 0;
nat i;
bdescr *bd;
lnat allocated = 0;
for (i = 0; i < n_capabilities; i++) {
for (bd = nurseries[i].blocks; bd; bd = bd->link) {
allocated += (lnat)(bd->free - bd->start);
capabilities[i].total_allocated += (lnat)(bd->free - bd->start);
bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->gen == g0);
IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
}
for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) {
allocated += (lnat)(bd->free - bd->start);
cap->total_allocated += (lnat)(bd->free - bd->start);
bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->gen == g0);
IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
}
return allocated;
......
......@@ -81,7 +81,7 @@ void dirty_MVAR(StgRegTable *reg, StgClosure *p);
extern nursery *nurseries;
void resetNurseries ( void );
lnat clearNurseries ( void );
lnat clearNursery ( Capability *cap );
void resizeNurseries ( nat blocks );
void resizeNurseriesFixed ( nat blocks );
lnat countNurseryBlocks ( void );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment