Commit 693550d9 authored by simonmar's avatar simonmar
Browse files

[project @ 2005-04-12 09:04:23 by simonmar]

Per-task nurseries for SMP.  This was kind-of implemented before, but
it's much cleaner now.  There is now one *step* per capability, so we
have somewhere to hang the block count.  So for SMP, there are simply
multiple instances of generation 0 step 0.  The rNursery entry in the
register table now points to the step rather than the head block of
the nurersy.
parent 11d45138
......@@ -56,7 +56,7 @@ typedef struct bdescr_ {
StgWord *bitmap;
} u;
unsigned int gen_no; /* generation */
struct _step *step; /* step */
struct step_ *step; /* step */
StgWord32 blocks; /* no. of blocks (if grp head, 0 otherwise) */
StgWord32 flags; /* block is in to-space */
#if SIZEOF_VOID_P == 8
......
......@@ -86,7 +86,7 @@ typedef struct StgRegTable_ {
StgPtr rHp;
StgPtr rHpLim;
struct StgTSO_ *rCurrentTSO;
struct bdescr_ *rNursery;
struct step_ *rNursery;
struct bdescr_ *rCurrentNursery;
StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
#if defined(SMP) || defined(PAR)
......
......@@ -38,12 +38,8 @@
/* TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of
* single-entry thunks.
*
* SMP needs EAGER_BLACKHOLING because it has to lock thunks
* synchronously, in case another thread is trying to evaluate the
* same thunk simultaneously.
*/
#if defined(SMP) || defined(TICKY_TICKY)
#if defined(TICKY_TICKY)
# define EAGER_BLACKHOLING
#else
# define LAZY_BLACKHOLING
......
......@@ -50,12 +50,12 @@
*
* ------------------------------------------------------------------------- */
typedef struct _step {
typedef struct step_ {
unsigned int no; /* step number */
bdescr * blocks; /* blocks in this step */
unsigned int n_blocks; /* number of blocks */
struct _step * to; /* destination step for live objects */
struct _generation * gen; /* generation this step belongs to */
struct step_ * to; /* destination step for live objects */
struct generation_ * gen; /* generation this step belongs to */
unsigned int gen_no; /* generation number (cached) */
bdescr * large_objects; /* large objects (doubly linked) */
unsigned int n_large_blocks; /* no. of blocks used by large objs */
......@@ -75,7 +75,7 @@ typedef struct _step {
bdescr * bitmap; /* bitmap for compacting collection */
} step;
typedef struct _generation {
typedef struct generation_ {
unsigned int no; /* generation number */
step * steps; /* steps */
unsigned int n_steps; /* number of steps */
......@@ -332,11 +332,11 @@ INLINE_HEADER StgWord stack_frame_sizeW( StgClosure *frame )
Nursery manipulation
-------------------------------------------------------------------------- */
extern void allocNurseries ( void );
extern void resetNurseries ( void );
extern bdescr * allocNursery ( bdescr *last_bd, nat blocks );
extern void resizeNursery ( nat blocks );
extern void tidyAllocateLists ( void );
extern void allocNurseries ( void );
extern void resetNurseries ( void );
extern void resizeNurseries ( nat blocks );
extern void tidyAllocateLists ( void );
extern lnat countNurseryBlocks ( void );
/* -----------------------------------------------------------------------------
Functions from GC.c
......
......@@ -27,6 +27,7 @@
Capability MainCapability; /* for non-SMP, we have one global capability */
#endif
Capability *capabilities = NULL;
nat rts_n_free_capabilities;
#if defined(RTS_SUPPORTS_THREADS)
......@@ -105,35 +106,6 @@ initCapability( Capability *cap )
cap->f.stgGCFun = (F_)__stg_gc_fun;
}
/* -----------------------------------------------------------------------------
* Function: initCapabilities_(nat)
*
* Purpose: upon startup, allocate and fill in table
* holding 'n' Capabilities. Only for SMP, since
* it is the only build that supports multiple
* capabilities within the RTS.
* -------------------------------------------------------------------------- */
#if defined(SMP)
static void
initCapabilities_(nat n)
{
nat i;
Capability *cap, *prev;
cap = NULL;
prev = NULL;
for (i = 0; i < n; i++) {
cap = stgMallocBytes(sizeof(Capability), "initCapabilities");
initCapability(cap);
cap->link = prev;
prev = cap;
}
free_capabilities = cap;
rts_n_free_capabilities = n;
IF_DEBUG(scheduler,
sched_belch("allocated %d capabilities", rts_n_free_capabilities));
}
#endif /* SMP */
/* ---------------------------------------------------------------------------
* Function: initCapabilities()
*
......@@ -146,17 +118,31 @@ void
initCapabilities( void )
{
#if defined(SMP)
initCapabilities_(RtsFlags.ParFlags.nNodes);
nat i,n;
n = RtsFlags.ParFlags.nNodes;
capabilities = stgMallocBytes(n * sizeof(Capability), "initCapabilities");
for (i = 0; i < n; i++) {
initCapability(&capabilities[i]);
capabilities[i].link = &capabilities[i+1];
}
capabilities[n-1].link = NULL;
free_capabilities = &capabilities[0];
rts_n_free_capabilities = n;
IF_DEBUG(scheduler, sched_belch("allocated %d capabilities", n));
#else
initCapability(&MainCapability);
capabilities = &MainCapability;
initCapability(&MainCapability);
rts_n_free_capabilities = 1;
#endif
#if defined(RTS_SUPPORTS_THREADS)
initCondition(&returning_worker_cond);
initCondition(&thread_ready_cond);
initCondition(&returning_worker_cond);
initCondition(&thread_ready_cond);
#endif
rts_n_free_capabilities = 1;
}
/* ----------------------------------------------------------------------------
......
......@@ -23,6 +23,9 @@
#ifndef __CAPABILITY_H__
#define __CAPABILITY_H__
// All the capabilities
extern Capability *capabilities;
// Initialised the available capabilities.
//
extern void initCapabilities( void );
......
......@@ -47,6 +47,12 @@
#include <string.h>
// Turn off inlining when debugging - it obfuscates things
#ifdef DEBUG
# undef STATIC_INLINE
# define STATIC_INLINE static
#endif
/* STATIC OBJECT LIST.
*
* During GC:
......@@ -978,7 +984,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
blocks = RtsFlags.GcFlags.minAllocAreaSize;
}
}
resizeNursery(blocks);
resizeNurseries(blocks);
} else {
/* Generational collector:
......@@ -995,7 +1001,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
* percentage of g0s0 that was live at the last minor GC.
*/
if (N == 0) {
g0s0_pcnt_kept = (new_blocks * 100) / g0s0->n_blocks;
g0s0_pcnt_kept = (new_blocks * 100) / countNurseryBlocks();
}
/* Estimate a size for the allocation area based on the
......@@ -1018,12 +1024,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
blocks = RtsFlags.GcFlags.minAllocAreaSize;
}
resizeNursery((nat)blocks);
resizeNurseries((nat)blocks);
} else {
// we might have added extra large blocks to the nursery, so
// resize back to minAllocAreaSize again.
resizeNursery(RtsFlags.GcFlags.minAllocAreaSize);
resizeNurseries(RtsFlags.GcFlags.minAllocAreaSize);
}
}
......@@ -3938,7 +3944,7 @@ threadLazyBlackHole(StgTSO *tso)
if (bh->header.info != &stg_CAF_BLACKHOLE_info) {
#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
debugBelch("Unexpected lazy BHing required at 0x%04x",(int)bh);
debugBelch("Unexpected lazy BHing required at 0x%04x\n",(int)bh);
#endif
#ifdef PROFILING
// @LDV profiling
......
......@@ -1466,13 +1466,12 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
if (cap->r.rCurrentNursery->u.back != NULL) {
cap->r.rCurrentNursery->u.back->link = bd;
} else {
#ifdef SMP
cap->r.rNursery = g0s0->blocks = bd;
#else
#if !defined(SMP)
ASSERT(g0s0->blocks == cap->r.rCurrentNursery &&
g0s0->blocks == cap->r.rNursery);
cap->r.rNursery = g0s0->blocks = bd;
g0s0->blocks = bd;
#endif
cap->r.rNursery = bd;
}
cap->r.rCurrentNursery->u.back = bd;
......
......@@ -45,6 +45,9 @@ step *g0s0 = NULL; /* generation 0, step 0, for convenience */
ullong total_allocated = 0; /* total memory allocated during run */
nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
step *nurseries = NULL; /* array of nurseries, >1 only if SMP */
/*
* Storage manager mutex: protects all the above state from
* simultaneous access by two STG threads.
......@@ -72,11 +75,33 @@ extern Mutex sm_mutex;
#define RELEASE_SM_LOCK
#endif
static void
initStep (step *stp, int g, int s)
{
stp->no = s;
stp->blocks = NULL;
stp->n_to_blocks = 0;
stp->n_blocks = 0;
stp->gen = &generations[g];
stp->gen_no = g;
stp->hp = NULL;
stp->hpLim = NULL;
stp->hp_bd = NULL;
stp->scan = NULL;
stp->scan_bd = NULL;
stp->large_objects = NULL;
stp->n_large_blocks = 0;
stp->new_large_objects = NULL;
stp->scavenged_large_objects = NULL;
stp->n_scavenged_large_blocks = 0;
stp->is_compacted = 0;
stp->bitmap = NULL;
}
void
initStorage( void )
{
nat g, s;
step *stp;
generation *gen;
if (generations != NULL) {
......@@ -112,7 +137,7 @@ initStorage( void )
/* allocate generation info array */
generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
* sizeof(struct _generation),
* sizeof(struct generation_),
"initStorage: gens");
/* Initialise all generations */
......@@ -136,47 +161,44 @@ initStorage( void )
/* Oldest generation: one step */
oldest_gen->n_steps = 1;
oldest_gen->steps =
stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
/* set up all except the oldest generation with 2 steps */
for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
generations[g].n_steps = RtsFlags.GcFlags.steps;
generations[g].steps =
stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
"initStorage: steps");
}
} else {
/* single generation, i.e. a two-space collector */
g0->n_steps = 1;
g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
}
#ifdef SMP
n_nurseries = RtsFlags.ParFlags.nNodes;
nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
"initStorage: nurseries");
#else
n_nurseries = 1;
nurseries = g0->steps; // just share nurseries[0] with g0s0
#endif
/* Initialise all steps */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
stp = &generations[g].steps[s];
stp->no = s;
stp->blocks = NULL;
stp->n_to_blocks = 0;
stp->n_blocks = 0;
stp->gen = &generations[g];
stp->gen_no = g;
stp->hp = NULL;
stp->hpLim = NULL;
stp->hp_bd = NULL;
stp->scan = NULL;
stp->scan_bd = NULL;
stp->large_objects = NULL;
stp->n_large_blocks = 0;
stp->new_large_objects = NULL;
stp->scavenged_large_objects = NULL;
stp->n_scavenged_large_blocks = 0;
stp->is_compacted = 0;
stp->bitmap = NULL;
initStep(&generations[g].steps[s], g, s);
}
}
#ifdef SMP
for (s = 0; s < n_nurseries; s++) {
initStep(&nurseries[s], 0, s);
}
#endif
/* Set up the destination pointers in each younger gen. step */
for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
for (s = 0; s < generations[g].n_steps-1; s++) {
......@@ -184,8 +206,15 @@ initStorage( void )
}
generations[g].steps[s].to = &generations[g+1].steps[0];
}
oldest_gen->steps[0].to = &oldest_gen->steps[0];
/* The oldest generation has one step and it is compacted. */
#ifdef SMP
for (s = 0; s < n_nurseries; s++) {
nurseries[s].to = generations[0].steps[0].to;
}
#endif
/* The oldest generation has one step. */
if (RtsFlags.GcFlags.compact) {
if (RtsFlags.GcFlags.generations == 1) {
errorBelch("WARNING: compaction is incompatible with -G1; disabled");
......@@ -193,7 +222,18 @@ initStorage( void )
oldest_gen->steps[0].is_compacted = 1;
}
}
oldest_gen->steps[0].to = &oldest_gen->steps[0];
#ifdef SMP
if (RtsFlags.GcFlags.generations == 1) {
errorBelch("-G1 is incompatible with SMP");
stg_exit(1);
}
// No -H, for now
if (RtsFlags.GcFlags.heapSizeSuggestion > 0) {
errorBelch("-H<size> is incompatible with SMP");
stg_exit(1);
}
#endif
/* generation 0 is special: that's the nursery */
generations[0].max_blocks = 0;
......@@ -341,114 +381,123 @@ newDynCAF(StgClosure *caf)
Nursery management.
-------------------------------------------------------------------------- */
void
allocNurseries( void )
{
static bdescr *
allocNursery (step *stp, bdescr *tail, nat blocks)
{
bdescr *bd;
nat i;
// Allocate a nursery: we allocate fresh blocks one at a time and
// cons them on to the front of the list, not forgetting to update
// the back pointer on the tail of the list to point to the new block.
for (i=0; i < blocks; i++) {
// @LDV profiling
/*
processNursery() in LdvProfile.c assumes that every block group in
the nursery contains only a single block. So, if a block group is
given multiple blocks, change processNursery() accordingly.
*/
bd = allocBlock();
bd->link = tail;
// double-link the nursery: we might need to insert blocks
if (tail != NULL) {
tail->u.back = bd;
}
bd->step = stp;
bd->gen_no = 0;
bd->flags = 0;
bd->free = bd->start;
tail = bd;
}
tail->u.back = NULL;
return tail;
}
static void
assignNurseriesToCapabilities (void)
{
#ifdef SMP
Capability *cap;
nat i;
g0s0->blocks = NULL;
g0s0->n_blocks = 0;
for (cap = free_capabilities; cap != NULL; cap = cap->link) {
cap->r.rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
cap->r.rCurrentNursery = cap->r.rNursery;
}
for (i = 0; i < n_nurseries; i++) {
capabilities[i].r.rNursery = &nurseries[i];
capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
}
#else /* SMP */
g0s0->blocks = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
g0s0->n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
g0s0->to_blocks = NULL;
g0s0->n_to_blocks = 0;
MainCapability.r.rNursery = g0s0->blocks;
MainCapability.r.rCurrentNursery = g0s0->blocks;
/* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
MainCapability.r.rNursery = &nurseries[0];
MainCapability.r.rCurrentNursery = nurseries[0].blocks;
#endif
}
void
allocNurseries( void )
{
nat i;
for (i = 0; i < n_nurseries; i++) {
nurseries[i].blocks =
allocNursery(&nurseries[i], NULL,
RtsFlags.GcFlags.minAllocAreaSize);
nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
nurseries[i].to_blocks = NULL;
nurseries[i].n_to_blocks = 0;
/* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
}
assignNurseriesToCapabilities();
}
void
resetNurseries( void )
{
bdescr *bd;
Capability *cap;
nat i;
bdescr *bd;
step *stp;
#ifdef SMP
/* All tasks must be stopped */
ASSERT(rts_n_free_capabilities == RtsFlags.ParFlags.nNodes);
for (cap = free_capabilities; cap != NULL; cap = cap->link)
#else
cap = &MainCapability;
ASSERT(cap->r.rNursery == g0s0->blocks);
#endif
{
for (bd = cap->r.rNursery; bd; bd = bd->link) {
bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->step == g0s0);
IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
for (i = 0; i < n_nurseries; i++) {
stp = &nurseries[i];
for (bd = stp->blocks; bd; bd = bd->link) {
bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->step == stp);
IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
}
}
cap->r.rCurrentNursery = cap->r.rNursery;
}
assignNurseriesToCapabilities();
}
bdescr *
allocNursery (bdescr *tail, nat blocks)
lnat
countNurseryBlocks (void)
{
bdescr *bd;
nat i;
nat i;
lnat blocks = 0;
// Allocate a nursery: we allocate fresh blocks one at a time and
// cons them on to the front of the list, not forgetting to update
// the back pointer on the tail of the list to point to the new block.
for (i=0; i < blocks; i++) {
// @LDV profiling
/*
processNursery() in LdvProfile.c assumes that every block group in
the nursery contains only a single block. So, if a block group is
given multiple blocks, change processNursery() accordingly.
*/
bd = allocBlock();
bd->link = tail;
// double-link the nursery: we might need to insert blocks
if (tail != NULL) {
tail->u.back = bd;
for (i = 0; i < n_nurseries; i++) {
blocks += nurseries[i].n_blocks;
}
bd->step = g0s0;
bd->gen_no = 0;
bd->flags = 0;
bd->free = bd->start;
tail = bd;
}
tail->u.back = NULL;
return tail;
return blocks;
}
void
resizeNursery ( nat blocks )
static void
resizeNursery ( step *stp, nat blocks )
{
bdescr *bd;
nat nursery_blocks;
#ifdef SMP
barf("resizeNursery: can't resize in SMP mode");
#endif
nursery_blocks = g0s0->n_blocks;
if (nursery_blocks == blocks) {
return;
}
nursery_blocks = stp->n_blocks;
if (nursery_blocks == blocks) return;
else if (nursery_blocks < blocks) {
if (nursery_blocks < blocks) {
IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
blocks));
g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
}
else {
bdescr *next_bd;
IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
blocks));
bd = g0s0->blocks;
bd = stp->blocks;
while (nursery_blocks > blocks) {
next_bd = bd->link;
next_bd->u.back = NULL;
......@@ -456,18 +505,28 @@ resizeNursery ( nat blocks )
freeGroup(bd);
bd = next_bd;
}
g0s0->blocks = bd;
stp->blocks = bd;
// might have gone just under, by freeing a large block, so make
// up the difference.
if (nursery_blocks < blocks) {
g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
}
}
g0s0->n_blocks = blocks;
ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
stp->n_blocks = blocks;
ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
}
MainCapability.r.rNursery = g0s0->blocks;
//
// Resize each of the nurseries to the specified size.
//
void
resizeNurseries