Commit 67f4ab7e authored by Simon Marlow's avatar Simon Marlow

Allocate pinned object blocks from the nursery, not the global

allocator.

Prompted by a benchmark posted to parallel-haskell@haskell.org by
Andreas Voellmy <andreas.voellmy@gmail.com>.  This program exhibits
contention for the block allocator when run with -N2 and greater
without the fix:

{-# LANGUAGE MagicHash, UnboxedTuples, BangPatterns #-}
module Main where

import Control.Monad
import Control.Concurrent
import System.Environment
import GHC.IO
import GHC.Exts
import GHC.Conc

main = do
 [m] <- fmap (fmap read) getArgs
 n <- getNumCapabilities
 ms <- replicateM n newEmptyMVar
 sequence [ forkIO $ busyWorkerB (m `quot` n) >> putMVar mv () | mv <- ms ]
 mapM takeMVar ms

busyWorkerB :: Int -> IO ()
busyWorkerB n_loops = go 0
  where go !n | n >= n_loops = return ()
              | otherwise    =
          do p <- (IO $ \s ->
                    case newPinnedByteArray# 1024# s      of
                      { (# s', mbarr# #) ->
                           (# s', () #)
                      }
                  )
             go (n+1)
parent 86ebfef9
......@@ -273,6 +273,7 @@ initCapability( Capability *cap, nat i )
cap->transaction_tokens = 0;
cap->context_switch = 0;
cap->pinned_object_block = NULL;
cap->pinned_object_blocks = NULL;
#ifdef PROFILING
cap->r.rCCCS = CCS_SYSTEM;
......
......@@ -76,6 +76,8 @@ struct Capability_ {
// block for allocating pinned objects into
bdescr *pinned_object_block;
// full pinned object blocks allocated since the last GC
bdescr *pinned_object_blocks;
// Context switch flag. When non-zero, this means: stop running
// Haskell code, and switch threads.
......
......@@ -150,6 +150,7 @@ static StgWord dec_running (void);
static void wakeup_gc_threads (nat me);
static void shutdown_gc_threads (nat me);
static void collect_gct_blocks (void);
static lnat collect_pinned_object_blocks (void);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
......@@ -285,6 +286,10 @@ GarbageCollect (rtsBool force_major_gc,
// check sanity *before* GC
IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
// gather blocks allocated using allocatePinned() from each capability
// and put them on the g0->large_object list.
collect_pinned_object_blocks();
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
prepare_collected_gen(&generations[g]);
......@@ -1421,6 +1426,43 @@ collect_gct_blocks (void)
}
}
/* -----------------------------------------------------------------------------
During mutation, any blocks that are filled by allocatePinned() are
stashed on the local pinned_object_blocks list, to avoid needing to
take a global lock. Here we collect those blocks from the
cap->pinned_object_blocks lists and put them on the
main g0->large_object list.
Returns: the number of words allocated this way, for stats
purposes.
-------------------------------------------------------------------------- */
static lnat
collect_pinned_object_blocks (void)
{
nat n;
bdescr *bd, *prev;
lnat allocated = 0;
for (n = 0; n < n_capabilities; n++) {
prev = NULL;
for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
allocated += bd->free - bd->start;
prev = bd;
}
if (prev != NULL) {
prev->link = g0->large_objects;
if (g0->large_objects != NULL) {
g0->large_objects->u.back = prev;
}
g0->large_objects = capabilities[n].pinned_object_blocks;
capabilities[n].pinned_object_blocks = 0;
}
}
return allocated;
}
/* -----------------------------------------------------------------------------
Initialise a gc_thread before GC
-------------------------------------------------------------------------- */
......
......@@ -869,7 +869,7 @@ memInventory (rtsBool show)
gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list);
gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list);
gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd);
}
}
gen_blocks[g] += genBlocks(&generations[g]);
}
......@@ -880,6 +880,7 @@ memInventory (rtsBool show)
if (capabilities[i].pinned_object_block != NULL) {
nursery_blocks += capabilities[i].pinned_object_block->blocks;
}
nursery_blocks += countBlocks(capabilities[i].pinned_object_blocks);
}
retainer_blocks = 0;
......
......@@ -744,8 +744,54 @@ allocatePinned (Capability *cap, lnat n)
bd = cap->pinned_object_block;
// If we don't have a block of pinned objects yet, or the current
// one isn't large enough to hold the new object, allocate a new one.
// one isn't large enough to hold the new object, get a new one.
if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
// stash the old block on cap->pinned_object_blocks. On the
// next GC cycle these objects will be moved to
// g0->large_objects.
if (bd != NULL) {
dbl_link_onto(bd, &cap->pinned_object_blocks);
}
// We need to find another block. We could just allocate one,
// but that means taking a global lock and we really want to
// avoid that (benchmarks that allocate a lot of pinned
// objects scale really badly if we do this).
//
// So first, we try taking the next block from the nursery, in
// the same way as allocate(), but note that we can only take
// an *empty* block, because we're about to mark it as
// BF_PINNED | BF_LARGE.
bd = cap->r.rCurrentNursery->link;
if (bd == NULL || bd->free != bd->start) { // must be empty!
// The nursery is empty, or the next block is non-empty:
// allocate a fresh block (we can't fail here).
// XXX in the case when the next nursery block is
// non-empty we aren't exerting any pressure to GC soon,
// so if this case ever happens then we could in theory
// keep allocating for ever without calling the GC. We
// can't bump g0->n_new_large_words because that will be
// counted towards allocation, and we're already counting
// our pinned obects as allocation in
// collect_pinned_object_blocks in the GC.
ACQUIRE_SM_LOCK;
bd = allocBlock();
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
} else {
// we have a block in the nursery: steal it
cap->r.rCurrentNursery->link = bd->link;
if (bd->link != NULL) {
bd->link->u.back = cap->r.rCurrentNursery;
}
cap->r.rNursery->n_blocks--;
}
cap->pinned_object_block = bd;
bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
// The pinned_object_block remains attached to the capability
// until it is full, even if a GC occurs. We want this
// behaviour because otherwise the unallocated portion of the
......@@ -759,17 +805,6 @@ allocatePinned (Capability *cap, lnat n)
// the next GC the BF_EVACUATED flag will be cleared, and the
// block will be promoted as usual (if anything in it is
// live).
ACQUIRE_SM_LOCK;
if (bd != NULL) {
dbl_link_onto(bd, &g0->large_objects);
g0->n_large_blocks++;
g0->n_new_large_words += bd->free - bd->start;
}
cap->pinned_object_block = bd = allocBlock();
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
bd->free = bd->start;
}
p = bd->free;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment