GCThread.h 9.89 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team 1998-2008
4 5 6 7 8 9 10 11 12 13
 *
 * Generational garbage collector
 *
 * Documentation on the architecture of the Garbage Collector can be
 * found in the online commentary:
 * 
 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
 *
 * ---------------------------------------------------------------------------*/

Simon Marlow's avatar
Simon Marlow committed
14 15
#ifndef SM_GCTHREAD_H
#define SM_GCTHREAD_H
16

17
#include "WSDeque.h"
18

19
#include "BeginPrivate.h"
20

21 22 23 24 25 26 27 28 29 30 31 32
/* -----------------------------------------------------------------------------
   General scheme
   
   ToDo: move this to the wiki when the implementation is done.

   We're only going to try to parallelise the copying GC for now.  The
   Plan is as follows.

   Each thread has a gc_thread structure (see below) which holds its
   thread-local data.  We'll keep a pointer to this in a thread-local
   variable, or possibly in a register.

Simon Marlow's avatar
Simon Marlow committed
33 34
   In the gc_thread structure is a gen_workspace for each generation.  The
   primary purpose of the gen_workspace is to hold evacuated objects;
35
   when an object is evacuated, it is copied to the "todo" block in
Simon Marlow's avatar
Simon Marlow committed
36 37
   the thread's workspace for the appropriate generation.  When the todo
   block is full, it is pushed to the global gen->todos list, which
38 39 40 41
   is protected by a lock.  (in fact we intervene a one-place buffer
   here to reduce contention).

   A thread repeatedly grabs a block of work from one of the
Simon Marlow's avatar
Simon Marlow committed
42
   gen->todos lists, scavenges it, and keeps the scavenged block on
43
   its own ws->scavd_list (this is to avoid unnecessary contention
Simon Marlow's avatar
Simon Marlow committed
44
   returning the completed buffers back to the generation: we can just
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
   collect them all later).

   When there is no global work to do, we start scavenging the todo
   blocks in the workspaces.  This is where the scan_bd field comes
   in: we can scan the contents of the todo block, when we have
   scavenged the contents of the todo block (up to todo_bd->free), we
   don't want to move this block immediately to the scavd_list,
   because it is probably only partially full.  So we remember that we
   have scanned up to this point by saving the block in ws->scan_bd,
   with the current scan pointer in ws->scan.  Later, when more
   objects have been copied to this block, we can come back and scan
   the rest.  When we visit this workspace again in the future,
   scan_bd may still be the same as todo_bd, or it might be different:
   if enough objects were copied into this block that it filled up,
   then we will have allocated a new todo block, but *not* pushed the
Simon Marlow's avatar
Simon Marlow committed
60
   old one to the generation, because it is partially scanned.
61 62 63 64 65 66 67

   The reason to leave scanning the todo blocks until last is that we
   want to deal with full blocks as far as possible.
   ------------------------------------------------------------------------- */


/* -----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
68
   Generation Workspace
69
  
Simon Marlow's avatar
Simon Marlow committed
70 71 72 73 74
   A generation workspace exists for each generation for each GC
   thread. The GC thread takes a block from the todos list of the
   generation into the scanbd and then scans it.  Objects referred to
   by those in the scan block are copied into the todo or scavd blocks
   of the relevant generation.
75 76 77
  
   ------------------------------------------------------------------------- */

Simon Marlow's avatar
Simon Marlow committed
78 79
typedef struct gen_workspace_ {
    generation * gen;		// the gen for this workspace 
80
    struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
81 82 83 84 85 86

    // where objects to be scavenged go
    bdescr *     todo_bd;
    StgPtr       todo_free;            // free ptr for todo_bd
    StgPtr       todo_lim;             // lim for todo_bd

87 88 89
    WSDeque *    todo_q;
    bdescr *     todo_overflow;
    nat          n_todo_overflow;
90 91 92 93

    // where large objects to be scavenged go
    bdescr *     todo_large_objects;

94
    // Objects that have already been scavenged.
95 96 97 98 99 100 101
    bdescr *     scavd_list;
    nat          n_scavd_blocks;     // count of blocks in this list

    // Partially-full, scavenged, blocks
    bdescr *     part_list;
    unsigned int n_part_blocks;      // count of above

102
    StgWord pad[3];
103

Simon Marlow's avatar
Simon Marlow committed
104 105
} gen_workspace ATTRIBUTE_ALIGNED(64);
// align so that computing gct->gens[n] is a shift, not a multiply
106
// fails if the size is <64, which is why we need the pad above
107 108 109 110

/* ----------------------------------------------------------------------------
   GC thread object

Simon Marlow's avatar
Simon Marlow committed
111 112 113 114
   Every GC thread has one of these. It contains all the generation
   specific workspaces and other GC thread local information. At some
   later point it maybe useful to move this other into the TLS store
   of the GC threads
115 116 117 118 119
   ------------------------------------------------------------------------- */

typedef struct gc_thread_ {
#ifdef THREADED_RTS
    OSThreadId id;                 // The OS thread that this struct belongs to
120 121 122
    SpinLock   gc_spin;
    SpinLock   mut_spin;
    volatile rtsBool wakeup;
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
#endif
    nat thread_index;              // a zero based index identifying the thread

    bdescr * free_blocks;          // a buffer of free blocks for this thread
                                   //  during GC without accessing the block
                                   //   allocators spin lock. 

    StgClosure* static_objects;      // live static objects
    StgClosure* scavenged_static_objects;   // static objects scavenged so far

    lnat gc_count;                 // number of GCs this thread has done

    // block that is currently being scanned
    bdescr *     scan_bd;

138 139 140 141 142 143 144 145
    // Remembered sets on this CPU.  Each GC thread has its own
    // private per-generation remembered sets, so it can add an item
    // to the remembered set without taking a lock.  The mut_lists
    // array on a gc_thread is the same as the one on the
    // corresponding Capability; we stash it here too for easy access
    // during GC; see recordMutableGen_GC().
    bdescr **    mut_lists;

146 147 148
    // --------------------
    // evacuate flags

Simon Marlow's avatar
Simon Marlow committed
149
    generation *evac_gen;          // Youngest generation that objects
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
                                   // should be evacuated to in
                                   // evacuate().  (Logically an
                                   // argument to evacuate, but it's
                                   // static a lot of the time so we
                                   // optimise it into a per-thread
                                   // variable).

    rtsBool failed_to_evac;        // failure to evacuate an object typically 
                                   // Causes it to be recorded in the mutable 
                                   // object list

    rtsBool eager_promotion;       // forces promotion to the evac gen
                                   // instead of the to-space
                                   // corresponding to the object

    lnat thunk_selector_depth;     // ummm.... not used as of now

#ifdef USE_PAPI
    int papi_events;
#endif

    // -------------------
    // stats

    lnat copied;
    lnat scanned;
    lnat any_work;
    lnat no_work;
    lnat scav_find_work;

    // -------------------
    // workspaces

    // array of workspaces, indexed by stp->abs_no.  This is placed
    // directly at the end of the gc_thread structure so that we can get from
    // the gc_thread pointer to a workspace using only pointer
    // arithmetic, no memory access.  This happens in the inner loop
    // of the GC, see Evac.c:alloc_for_copy().
Simon Marlow's avatar
Simon Marlow committed
188
    gen_workspace gens[];
189 190 191 192 193
} gc_thread;


extern nat n_gc_threads;

194 195 196
/* -----------------------------------------------------------------------------
   The gct variable is thread-local and points to the current thread's
   gc_thread structure.  It is heavily accessed, so we try to put gct
Simon Marlow's avatar
Simon Marlow committed
197 198 199 200 201 202 203
   into a global register variable if possible; if we don't have a
   register then use gcc's __thread extension to create a thread-local
   variable.

   Even on x86 where registers are scarce, it is worthwhile using a
   register variable here: I measured about a 2-5% slowdown with the
   __thread version.
204 205
   -------------------------------------------------------------------------- */

206 207 208 209
extern gc_thread **gc_threads;

#if defined(THREADED_RTS)

210 211
#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);

212 213
#define SET_GCT(to) gct = (to)

214

215 216

#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS))
217
// Using __thread is better than stealing a register on x86/Linux, because
218 219 220
// we have too few registers available.  In my tests it was worth
// about 5% in GC performance, but of course that might change as gcc
// improves. -- SDM 2009/04/03
221 222 223
//
// We ought to do the same on MacOS X, but __thread is not
// supported there yet (gcc 4.0.1).
224

225 226 227
extern __thread gc_thread* gct;
#define DECLARE_GCT __thread gc_thread* gct;

228

229
#elif defined(sparc_HOST_ARCH)
230 231 232 233 234 235 236 237 238 239 240 241 242 243
// On SPARC we can't pin gct to a register. Names like %l1 are just offsets
//	into the register window, which change on each function call.
//	
//	There are eight global (non-window) registers, but they're used for other purposes.
//	%g0     -- always zero
//	%g1     -- volatile over function calls, used by the linker
//	%g2-%g3 -- used as scratch regs by the C compiler (caller saves)
//	%g4	-- volatile over function calls, used by the linker
//	%g5-%g7	-- reserved by the OS

extern __thread gc_thread* gct;
#define DECLARE_GCT __thread gc_thread* gct;


244
#elif defined(REG_Base) && !defined(i386_HOST_ARCH)
245 246
// on i386, REG_Base is %ebx which is also used for PIC, so we don't
// want to steal it
247 248 249 250

GLOBAL_REG_DECL(gc_thread*, gct, REG_Base)
#define DECLARE_GCT /* nothing */

251

252 253 254 255 256
#elif defined(REG_R1)

GLOBAL_REG_DECL(gc_thread*, gct, REG_R1)
#define DECLARE_GCT /* nothing */

257

258 259 260 261 262 263 264 265 266 267
#elif defined(__GNUC__)

extern __thread gc_thread* gct;
#define DECLARE_GCT __thread gc_thread* gct;

#else

#error Cannot find a way to declare the thread-local gct

#endif
268

269 270 271 272 273 274 275 276 277 278
#else  // not the threaded RTS

extern StgWord8 the_gc_thread[];

#define gct ((gc_thread*)&the_gc_thread)
#define SET_GCT(to) /*nothing*/
#define DECLARE_GCT /*nothing*/

#endif

279
#include "EndPrivate.h"
280

Simon Marlow's avatar
Simon Marlow committed
281
#endif // SM_GCTHREAD_H
282