Reorganisation to fix problems related to the gct register variable

  - GCAux.c contains code not compiled with the gct register enabled,
    it is callable from outside the GC
  - marking functions are moved to their relevant subsystems, outside
    the GC
  - mark_root needs to save the gct register, as it is called from
    outside the GC
parent ae267d04
......@@ -59,8 +59,8 @@ extern void exitStablePtrTable ( void );
extern void enlargeStablePtrTable ( void );
extern StgWord lookupStableName ( StgPtr p );
extern void markStablePtrTable ( evac_fn evac );
extern void threadStablePtrTable ( evac_fn evac );
extern void markStablePtrTable ( evac_fn evac, void *user );
extern void threadStablePtrTable ( evac_fn evac, void *user );
extern void gcStablePtrTable ( void );
extern void updateStablePtrTable ( rtsBool full );
......
......@@ -536,16 +536,17 @@ extern void resizeNurseries ( nat blocks );
extern void resizeNurseriesFixed ( nat blocks );
extern lnat countNurseryBlocks ( void );
/* -----------------------------------------------------------------------------
Functions from GC.c
-------------------------------------------------------------------------- */
typedef void (*evac_fn)(StgClosure **);
typedef void (*evac_fn)(void *user, StgClosure **root);
extern void threadPaused ( Capability *cap, StgTSO * );
extern StgClosure * isAlive ( StgClosure *p );
extern void markCAFs ( evac_fn evac );
extern void GetRoots ( evac_fn evac );
extern void markCAFs ( evac_fn evac, void *user );
extern void GetRoots ( evac_fn evac, void *user );
/* -----------------------------------------------------------------------------
Stats 'n' DEBUG stuff
......
......@@ -759,3 +759,53 @@ freeCapability (Capability *cap) {
#endif
}
/* ---------------------------------------------------------------------------
Mark everything directly reachable from the Capabilities. When
using multiple GC threads, each GC thread marks all Capabilities
for which (c `mod` n == 0), for Capability c and thread n.
------------------------------------------------------------------------ */
void
markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta)
{
nat i;
Capability *cap;
Task *task;
// Each GC thread is responsible for following roots from the
// Capability of the same number. There will usually be the same
// or fewer Capabilities as GC threads, but just in case there
// are more, we mark every Capability whose number is the GC
// thread's index plus a multiple of the number of GC threads.
for (i = i0; i < n_capabilities; i += delta) {
cap = &capabilities[i];
evac(user, (StgClosure **)(void *)&cap->run_queue_hd);
evac(user, (StgClosure **)(void *)&cap->run_queue_tl);
#if defined(THREADED_RTS)
evac(user, (StgClosure **)(void *)&cap->wakeup_queue_hd);
evac(user, (StgClosure **)(void *)&cap->wakeup_queue_tl);
#endif
for (task = cap->suspended_ccalling_tasks; task != NULL;
task=task->next) {
debugTrace(DEBUG_sched,
"evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
evac(user, (StgClosure **)(void *)&task->suspended_tso);
}
#if defined(THREADED_RTS)
markSparkQueue (evac, user, cap);
#endif
}
#if !defined(THREADED_RTS)
evac(user, (StgClosure **)(void *)&blocked_queue_hd);
evac(user, (StgClosure **)(void *)&blocked_queue_tl);
evac(user, (StgClosure **)(void *)&sleeping_queue);
#endif
}
void
markCapabilities (evac_fn evac, void *user)
{
markSomeCapabilities(evac, user, 0, 1);
}
......@@ -235,6 +235,10 @@ extern void grabCapability (Capability **pCap);
// Free a capability on exit
void freeCapability (Capability *cap);
// FOr the GC:
void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta);
void markCapabilities (evac_fn evac, void *user);
/* -----------------------------------------------------------------------------
* INLINE functions... private below here
* -------------------------------------------------------------------------- */
......
......@@ -1800,7 +1800,7 @@ inner_loop:
* Compute the retainer set for every object reachable from *tl.
* -------------------------------------------------------------------------- */
static void
retainRoot( StgClosure **tl )
retainRoot(void *user STG_UNUSED, StgClosure **tl)
{
StgClosure *c;
......@@ -1837,7 +1837,7 @@ computeRetainerSet( void )
RetainerSet tmpRetainerSet;
#endif
GetRoots(retainRoot); // for scheduler roots
markCapabilities(retainRoot, NULL); // for scheduler roots
// This function is called after a major GC, when key, value, and finalizer
// all are guaranteed to be valid, or reachable.
......@@ -1846,10 +1846,10 @@ computeRetainerSet( void )
// for retainer profilng.
for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
// retainRoot((StgClosure *)weak);
retainRoot((StgClosure **)&weak);
retainRoot((StgClosure **)&weak, NULL);
// Consider roots from the stable ptr table.
markStablePtrTable(retainRoot);
markStablePtrTable(retainRoot, NULL);
// The following code resets the rs field of each unvisited mutable
// object (computing sumOfNewCostExtra and updating costArray[] when
......
......@@ -73,7 +73,7 @@ extern void awaitUserSignals(void);
* Evacuate the handler queue. _Assumes_ that console event delivery
* has already been blocked.
*/
extern void markSignalHandlers (evac_fn evac);
extern void markSignalHandlers (evac_fn evac, void *user);
#endif /* RTS_USER_SIGNALS */
......
......@@ -162,6 +162,74 @@ newSpark (StgRegTable *reg, StgClosure *p)
return 1;
}
/* -----------------------------------------------------------------------------
* Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
* implicit slide i.e. after marking all sparks are at the beginning of the
* spark pool and the spark pool only contains sparkable closures
* -------------------------------------------------------------------------- */
void
markSparkQueue (evac_fn evac, void *user, Capability *cap)
{
StgClosure **sparkp, **to_sparkp;
nat n, pruned_sparks; // stats only
StgSparkPool *pool;
PAR_TICKY_MARK_SPARK_QUEUE_START();
n = 0;
pruned_sparks = 0;
pool = &(cap->r.rSparks);
ASSERT_SPARK_POOL_INVARIANTS(pool);
#if defined(PARALLEL_HASKELL)
// stats only
n = 0;
pruned_sparks = 0;
#endif
sparkp = pool->hd;
to_sparkp = pool->hd;
while (sparkp != pool->tl) {
ASSERT(*sparkp!=NULL);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
// ToDo?: statistics gathering here (also for GUM!)
if (closure_SHOULD_SPARK(*sparkp)) {
evac(user, sparkp);
*to_sparkp++ = *sparkp;
if (to_sparkp == pool->lim) {
to_sparkp = pool->base;
}
n++;
} else {
pruned_sparks++;
}
sparkp++;
if (sparkp == pool->lim) {
sparkp = pool->base;
}
}
pool->tl = to_sparkp;
PAR_TICKY_MARK_SPARK_QUEUE_END(n);
#if defined(PARALLEL_HASKELL)
debugTrace(DEBUG_sched,
"marked %d sparks and pruned %d sparks on [%x]",
n, pruned_sparks, mytid);
#else
debugTrace(DEBUG_sched,
"marked %d sparks and pruned %d sparks",
n, pruned_sparks);
#endif
debugTrace(DEBUG_sched,
"new spark queue len=%d; (hd=%p; tl=%p)\n",
sparkPoolSize(pool), pool->hd, pool->tl);
}
#else
StgInt
......@@ -171,6 +239,7 @@ newSpark (StgRegTable *reg STG_UNUSED, StgClosure *p STG_UNUSED)
return 1;
}
#endif /* PARALLEL_HASKELL || THREADED_RTS */
......
......@@ -14,6 +14,7 @@ StgClosure * findSpark (Capability *cap);
void initSparkPools (void);
void freeSparkPool (StgSparkPool *pool);
void createSparkThread (Capability *cap, StgClosure *p);
void markSparkQueue (evac_fn evac, void *user, Capability *cap);
INLINE_HEADER void discardSparks (StgSparkPool *pool);
INLINE_HEADER nat sparkPoolSize (StgSparkPool *pool);
......
......@@ -323,7 +323,7 @@ enlargeStablePtrTable(void)
* -------------------------------------------------------------------------- */
void
markStablePtrTable(evac_fn evac)
markStablePtrTable(evac_fn evac, void *user)
{
snEntry *p, *end_stable_ptr_table;
StgPtr q;
......@@ -347,7 +347,7 @@ markStablePtrTable(evac_fn evac)
// if the ref is non-zero, treat addr as a root
if (p->ref != 0) {
evac((StgClosure **)&p->addr);
evac(user, (StgClosure **)&p->addr);
}
}
}
......@@ -362,7 +362,7 @@ markStablePtrTable(evac_fn evac)
* -------------------------------------------------------------------------- */
void
threadStablePtrTable( evac_fn evac )
threadStablePtrTable( evac_fn evac, void *user )
{
snEntry *p, *end_stable_ptr_table;
StgPtr q;
......@@ -372,12 +372,12 @@ threadStablePtrTable( evac_fn evac )
for (p = stable_ptr_table+1; p < end_stable_ptr_table; p++) {
if (p->sn_obj != NULL) {
evac((StgClosure **)&p->sn_obj);
evac(user, (StgClosure **)&p->sn_obj);
}
q = p->addr;
if (q && (q < (P_)stable_ptr_table || q >= (P_)end_stable_ptr_table)) {
evac((StgClosure **)&p->addr);
evac(user, (StgClosure **)&p->addr);
}
}
}
......
......@@ -17,8 +17,6 @@
#include "Profiling.h"
#include "GetTime.h"
#include "GC.h"
#include "GCUtils.h"
#include "Evac.h"
#if USE_PAPI
#include "Papi.h"
......
......@@ -392,19 +392,19 @@ startSignalHandlers(Capability *cap)
#if !defined(THREADED_RTS)
void
markSignalHandlers (evac_fn evac)
markSignalHandlers (evac_fn evac, void *user)
{
StgPtr *p;
p = next_pending_handler;
while (p != pending_handler_buf) {
p--;
evac((StgClosure **)p);
evac(user, (StgClosure **)p);
}
}
#else
void
markSignalHandlers (evac_fn evac STG_UNUSED)
markSignalHandlers (evac_fn evac STG_UNUSED, void *user STG_UNUSED)
{
}
#endif
......
......@@ -109,6 +109,12 @@ thread (StgClosure **p)
}
}
static void
thread_root (void *user STG_UNUSED, StgClosure **p)
{
thread(p);
}
// This version of thread() takes a (void *), used to circumvent
// warnings from gcc about pointer punning and strict aliasing.
STATIC_INLINE void thread_ (void *p) { thread((StgClosure **)p); }
......@@ -955,13 +961,13 @@ update_bkwd_compact( step *stp )
}
void
compact(void)
compact(StgClosure *static_objects)
{
nat g, s, blocks;
step *stp;
// 1. thread the roots
GetRoots((evac_fn)thread);
markCapabilities((evac_fn)thread_root, NULL);
// the weak pointer lists...
if (weak_ptr_list != NULL) {
......@@ -999,13 +1005,13 @@ compact(void)
}
// the static objects
thread_static(gct->scavenged_static_objects /* ToDo: ok? */);
thread_static(static_objects /* ToDo: ok? */);
// the stable pointer table
threadStablePtrTable((evac_fn)thread);
threadStablePtrTable((evac_fn)thread_root, NULL);
// the CAF list (used by GHCi)
markCAFs((evac_fn)thread);
markCAFs((evac_fn)thread_root, NULL);
// 2. update forward ptrs
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
......
......@@ -74,6 +74,6 @@ is_marked(StgPtr p, bdescr *bd)
return (*bitmap_word & bit_mask);
}
void compact(void);
extern void compact (StgClosure *static_objects);
#endif /* GCCOMPACT_H */
......@@ -16,6 +16,7 @@
#include "MBlock.h"
#include "Evac.h"
#include "GC.h"
#include "GCThread.h"
#include "GCUtils.h"
#include "Compact.h"
#include "Prelude.h"
......
......@@ -31,7 +31,3 @@ REGPARM1 void evacuate (StgClosure **p);
REGPARM1 void evacuate1 (StgClosure **p);
extern lnat thunk_selector_depth;
#if defined(PROF_SPIN) && defined(THREADED_RTS)
StgWord64 whitehole_spin;
#endif
This diff is collapsed.
......@@ -14,172 +14,8 @@
#ifndef GC_H
#define GC_H
#include "OSThreads.h"
/* -----------------------------------------------------------------------------
General scheme
ToDo: move this to the wiki when the implementation is done.
We're only going to try to parallelise the copying GC for now. The
Plan is as follows.
Each thread has a gc_thread structure (see below) which holds its
thread-local data. We'll keep a pointer to this in a thread-local
variable, or possibly in a register.
In the gc_thread structure is a step_workspace for each step. The
primary purpose of the step_workspace is to hold evacuated objects;
when an object is evacuated, it is copied to the "todo" block in
the thread's workspace for the appropriate step. When the todo
block is full, it is pushed to the global step->todos list, which
is protected by a lock. (in fact we intervene a one-place buffer
here to reduce contention).
A thread repeatedly grabs a block of work from one of the
step->todos lists, scavenges it, and keeps the scavenged block on
its own ws->scavd_list (this is to avoid unnecessary contention
returning the completed buffers back to the step: we can just
collect them all later).
When there is no global work to do, we start scavenging the todo
blocks in the workspaces. This is where the scan_bd field comes
in: we can scan the contents of the todo block, when we have
scavenged the contents of the todo block (up to todo_bd->free), we
don't want to move this block immediately to the scavd_list,
because it is probably only partially full. So we remember that we
have scanned up to this point by saving the block in ws->scan_bd,
with the current scan pointer in ws->scan. Later, when more
objects have been copied to this block, we can come back and scan
the rest. When we visit this workspace again in the future,
scan_bd may still be the same as todo_bd, or it might be different:
if enough objects were copied into this block that it filled up,
then we will have allocated a new todo block, but *not* pushed the
old one to the step, because it is partially scanned.
The reason to leave scanning the todo blocks until last is that we
want to deal with full blocks as far as possible.
------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
Step Workspace
A step workspace exists for each step for each GC thread. The GC
thread takes a block from the todos list of the step into the
scanbd and then scans it. Objects referred to by those in the scan
block are copied into the todo or scavd blocks of the relevant step.
------------------------------------------------------------------------- */
typedef struct step_workspace_ {
step * step; // the step for this workspace
struct gc_thread_ * gct; // the gc_thread that contains this workspace
// where objects to be scavenged go
bdescr * todo_bd;
StgPtr todo_free; // free ptr for todo_bd
StgPtr todo_lim; // lim for todo_bd
bdescr * buffer_todo_bd; // buffer to reduce contention
// on the step's todos list
// where large objects to be scavenged go
bdescr * todo_large_objects;
// Objects that have already been, scavenged.
bdescr * scavd_list;
nat n_scavd_blocks; // count of blocks in this list
// Partially-full, scavenged, blocks
bdescr * part_list;
unsigned int n_part_blocks; // count of above
} step_workspace;
/* ----------------------------------------------------------------------------
GC thread object
Every GC thread has one of these. It contains all the step specific
workspaces and other GC thread loacl information. At some later
point it maybe useful to move this other into the TLS store of the
GC threads
------------------------------------------------------------------------- */
typedef struct gc_thread_ {
#ifdef THREADED_RTS
OSThreadId id; // The OS thread that this struct belongs to
Mutex wake_mutex;
Condition wake_cond; // So we can go to sleep between GCs
rtsBool wakeup;
rtsBool exit;
#endif
nat thread_index; // a zero based index identifying the thread
bdescr * free_blocks; // a buffer of free blocks for this thread
// during GC without accessing the block
// allocators spin lock.
StgClosure* static_objects; // live static objects
StgClosure* scavenged_static_objects; // static objects scavenged so far
lnat gc_count; // number of GCs this thread has done
// block that is currently being scanned
bdescr * scan_bd;
// --------------------
// evacuate flags
step *evac_step; // Youngest generation that objects
// should be evacuated to in
// evacuate(). (Logically an
// argument to evacuate, but it's
// static a lot of the time so we
// optimise it into a per-thread
// variable).
rtsBool failed_to_evac; // failure to evacuate an object typically
// Causes it to be recorded in the mutable
// object list
rtsBool eager_promotion; // forces promotion to the evac gen
// instead of the to-space
// corresponding to the object
lnat thunk_selector_depth; // ummm.... not used as of now
#ifdef USE_PAPI
int papi_events;
#endif
// -------------------
// stats
lnat copied;
lnat scanned;
lnat any_work;
lnat no_work;
lnat scav_find_work;
// -------------------
// workspaces
// array of workspaces, indexed by stp->abs_no. This is placed
// directly at the end of the gc_thread structure so that we can get from
// the gc_thread pointer to a workspace using only pointer
// arithmetic, no memory access. This happens in the inner loop
// of the GC, see Evac.c:alloc_for_copy().
step_workspace steps[];
} gc_thread;
extern nat N;
extern rtsBool major_gc;
extern nat n_gc_threads;
extern gc_thread **gc_threads;
register gc_thread *gct __asm__("%rbx");
// extern gc_thread *gct; // this thread's gct TODO: make thread-local
extern bdescr *mark_stack_bdescr;
extern StgPtr *mark_stack;
......@@ -196,7 +32,15 @@ extern long copied;
extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS;
#endif
StgClosure * isAlive(StgClosure *p);
extern void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta);
#ifdef THREADED_RTS
extern SpinLock gc_alloc_block_sync;
#endif
#if defined(PROF_SPIN) && defined(THREADED_RTS)
StgWord64 whitehole_spin;
#endif
#define WORK_UNIT_WORDS 128
......
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team 1998-2008
*
* Functions called from outside the GC need to be separate from GC.c,
* because GC.c is compiled with register variable(s).
*
* ---------------------------------------------------------------------------*/
#include "Rts.h"
#include "Storage.h"
#include "MBlock.h"
#include "GC.h"
#include "Compact.h"
#include "Task.h"
#include "Capability.h"
#include "Trace.h"
#include "Schedule.h"
// DO NOT include "GCThread.h", we don't want the register variable
/* -----------------------------------------------------------------------------
isAlive determines whether the given closure is still alive (after
a garbage collection) or not. It returns the new address of the
closure if it is alive, or NULL otherwise.
NOTE: Use it before compaction only!
It untags and (if needed) retags pointers to closures.
-------------------------------------------------------------------------- */
StgClosure *
isAlive(StgClosure *p)
{
const StgInfoTable *info;
bdescr *bd;
StgWord tag;
StgClosure *q;
while (1) {
/* The tag and the pointer are split, to be merged later when needed. */
tag = GET_CLOSURE_TAG(p);
q = UNTAG_CLOSURE(p);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
info = get_itbl(q);
// ignore static closures
//
// ToDo: for static closures, check the static link field.
// Problem here is that we sometimes don't set the link field, eg.
// for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
//
if (!HEAP_ALLOCED(q)) {
return p;
}
// ignore closures in generations that we're not collecting.
bd = Bdescr((P_)q);
if (bd->gen_no > N) {
return p;
}
// if it's a pointer into to-space, then we're done
if (bd->flags & BF_EVACUATED) {
return p;
}
// large objects use the evacuated flag
if (bd->flags & BF_LARGE) {
return NULL;
}
// check the mark bit for compacted steps
if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
return p;
}
switch (info->type) {
case IND:
case IND_STATIC:
case IND_PERM:
case IND_OLDGEN: // rely on compatible layout with StgInd
case IND_OLDGEN_PERM:
// follow indirections
p = ((StgInd *)q)->indirectee;
continue;
case EVACUATED:
// alive!
return ((StgEvacuated *)q)->evacuee;
case TSO:
if (((StgTSO *)q)->what_next == ThreadRelocated) {
p = (StgClosure *)((StgTSO *)q)->link;
continue;
}
return NULL;
default:
// dead.
return NULL;
}
}
}
/* -----------------------------------------------------------------------------
Reverting CAFs
-------------------------------------------------------------------------- */
void
revertCAFs( void )
{
StgIndStatic *c;
for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
c = (StgIndStatic *)c->static_link)
{
SET_INFO(c, c->saved_info);
c->saved_info = NULL;
// could, but not necessary: c->static_link = NULL;
}
revertible_caf_list = NULL;
}
void
markCAFs (evac_fn evac, void *user)
{
StgIndStatic *c;
for (c = (StgIndStatic *)caf_list; c != NULL;
c = (StgIndStatic *)c->static_link)
{
evac(user, &c->indirectee);
}
for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
c = (StgIndStatic *)c->static_link)
{
evac(user, &c->indirectee);
}
}
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team 1998-2006
*
* Generational garbage collector
*
* Documentation on the architecture of the Garbage Collector can be
* found in the online commentary:
*
* http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
*
* ---------------------------------------------------------------------------*/
#ifndef GCTHREAD_H
#define GCTHREAD_H
#include "OSThreads.h"
/* -----------------------------------------------------------------------------
General scheme
ToDo: move this to the wiki when the implementation is done.
We're only going to try to parallelise the copying GC for now. The
Plan is as follows.