GC.c 52.7 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team 1998-2008
4 5 6
 *
 * Generational garbage collector
 *
7 8
 * Documentation on the architecture of the Garbage Collector can be
 * found in the online commentary:
thoughtpolice's avatar
thoughtpolice committed
9
 *
10
 *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11
 *
12 13
 * ---------------------------------------------------------------------------*/

Simon Marlow's avatar
Simon Marlow committed
14
#include "PosixSource.h"
15
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
16 17
#include "HsFFI.h"

18 19 20 21 22 23 24 25 26 27 28 29 30
#include "GC.h"
#include "GCThread.h"
#include "GCTDecl.h"            // NB. before RtsSignals.h which
                                // clobbers REG_R1 on arm/Linux
#include "Compact.h"
#include "Evac.h"
#include "Scav.h"
#include "GCUtils.h"
#include "MarkStack.h"
#include "MarkWeak.h"
#include "Sparks.h"
#include "Sweep.h"

Simon Marlow's avatar
Simon Marlow committed
31
#include "Storage.h"
32 33 34 35 36 37 38 39 40 41 42 43 44 45
#include "RtsUtils.h"
#include "Apply.h"
#include "Updates.h"
#include "Stats.h"
#include "Schedule.h"
#include "Sanity.h"
#include "BlockAlloc.h"
#include "ProfHeap.h"
#include "Weak.h"
#include "Prelude.h"
#include "RtsSignals.h"
#include "STM.h"
#include "Trace.h"
#include "RetainerProfile.h"
Simon Marlow's avatar
Simon Marlow committed
46
#include "LdvProfile.h"
47
#include "RaiseAsync.h"
48
#include "Papi.h"
Simon Marlow's avatar
Simon Marlow committed
49
#include "Stable.h"
50
#include "CheckUnload.h"
51 52

#include <string.h> // for memset()
53
#include <unistd.h>
54

55 56 57 58
/* -----------------------------------------------------------------------------
   Global variables
   -------------------------------------------------------------------------- */

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
/* STATIC OBJECT LIST.
 *
 * During GC:
 * We maintain a linked list of static objects that are still live.
 * The requirements for this list are:
 *
 *  - we need to scan the list while adding to it, in order to
 *    scavenge all the static objects (in the same way that
 *    breadth-first scavenging works for dynamic objects).
 *
 *  - we need to be able to tell whether an object is already on
 *    the list, to break loops.
 *
 * Each static object has a "static link field", which we use for
 * linking objects on to the list.  We use a stack-type list, consing
 * objects on the front as they are added (this means that the
 * scavenge phase is depth-first, not breadth-first, but that
thoughtpolice's avatar
thoughtpolice committed
76
 * shouldn't matter).
77 78 79 80 81
 *
 * A separate list is kept for objects that have been scavenged
 * already - this is so that we can zero all the marks afterwards.
 *
 * An object is on the list if its static link field is non-zero; this
thoughtpolice's avatar
thoughtpolice committed
82
 * means that we have to mark the end of the list with '1', not NULL.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
 *
 * Extra notes for generational GC:
 *
 * Each generation has a static object list associated with it.  When
 * collecting generations up to N, we treat the static object lists
 * from generations > N as roots.
 *
 * We build up a static object list while collecting generations 0..N,
 * which is then appended to the static object list of generation N+1.
 */

/* N is the oldest generation being collected, where the generations
 * are numbered starting at 0.  A major GC (indicated by the major_gc
 * flag) is when we're collecting all generations.  We only attempt to
 * deal with static objects and GC CAFs when doing a major GC.
 */
nat N;
rtsBool major_gc;

/* Data used for allocation area sizing.
 */
thoughtpolice's avatar
thoughtpolice committed
104
static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105 106 107 108 109

/* Mut-list stats */
#ifdef DEBUG
nat mutlist_MUTVARS,
    mutlist_MUTARRS,
110
    mutlist_MVARS,
111 112 113 114 115 116
    mutlist_TVAR,
    mutlist_TVAR_WATCH_QUEUE,
    mutlist_TREC_CHUNK,
    mutlist_TREC_HEADER,
    mutlist_ATOMIC_INVARIANT,
    mutlist_INVARIANT_CHECK_QUEUE,
117 118 119
    mutlist_OTHERS;
#endif

120 121
/* Thread-local data for each GC thread
 */
122
gc_thread **gc_threads = NULL;
123 124

#if !defined(THREADED_RTS)
125
StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
126
#endif
127

128 129 130 131
// Number of threads running in *this* GC.  Affects how many
// step->todos[] lists we have to look in to find work.
nat n_gc_threads;

132 133 134
// For stats:
long copied;        // *words* copied & scavenged during this GC

135 136
rtsBool work_stealing;

137 138
DECLARE_GCT

139 140 141 142
/* -----------------------------------------------------------------------------
   Static function declarations
   -------------------------------------------------------------------------- */

143
static void mark_root               (void *user, StgClosure **root);
144
static void zero_static_object_list (StgClosure* first_static);
Simon Marlow's avatar
Simon Marlow committed
145 146
static void prepare_collected_gen   (generation *gen);
static void prepare_uncollected_gen (generation *gen);
147 148 149
static void init_gc_thread          (gc_thread *t);
static void resize_generations      (void);
static void resize_nursery          (void);
Simon Marlow's avatar
Simon Marlow committed
150
static void start_gc_threads        (void);
151
static void scavenge_until_all_done (void);
152 153
static StgWord inc_running          (void);
static StgWord dec_running          (void);
Simon Marlow's avatar
Simon Marlow committed
154 155
static void wakeup_gc_threads       (nat me);
static void shutdown_gc_threads     (nat me);
Simon Marlow's avatar
Simon Marlow committed
156
static void collect_gct_blocks      (void);
157
static void collect_pinned_object_blocks (void);
158

159
#if defined(DEBUG)
160
static void gcCAFs                  (void);
161 162 163
#endif

/* -----------------------------------------------------------------------------
164
   The mark stack.
165 166
   -------------------------------------------------------------------------- */

167 168 169
bdescr *mark_stack_top_bd; // topmost block in the mark stack
bdescr *mark_stack_bd;     // current block in the mark stack
StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
170 171

/* -----------------------------------------------------------------------------
172
   GarbageCollect: the main entry point to the garbage collector.
173

Simon Marlow's avatar
Simon Marlow committed
174 175
   The collect_gen parameter is gotten by calling calcNeeded().

176 177 178 179
   Locks held: all capabilities are held throughout GarbageCollect().
   -------------------------------------------------------------------------- */

void
Simon Marlow's avatar
Simon Marlow committed
180
GarbageCollect (nat collect_gen,
181
                rtsBool do_heap_census,
182
                nat gc_type USED_IF_THREADS,
183
                Capability *cap)
184 185
{
  bdescr *bd;
186
  generation *gen;
187
  StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
188
#if defined(THREADED_RTS)
189
  gc_thread *saved_gct;
190
#endif
Simon Marlow's avatar
Simon Marlow committed
191
  nat g, n;
192

193
  // necessary if we stole a callee-saves register for gct:
194
#if defined(THREADED_RTS)
195
  saved_gct = gct;
196
#endif
197

198
#ifdef PROFILING
199
  CostCentreStack *save_CCS[n_capabilities];
200 201
#endif

202 203
  ACQUIRE_SM_LOCK;

204
#if defined(RTS_USER_SIGNALS)
205 206 207 208
  if (RtsFlags.MiscFlags.install_signal_handlers) {
    // block signals
    blockUserSignals();
  }
209 210
#endif

211 212
  ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
  // otherwise adjust the padding in gen_workspace.
213

Simon Marlow's avatar
Simon Marlow committed
214 215
  // this is the main thread
  SET_GCT(gc_threads[cap->no]);
216

thoughtpolice's avatar
thoughtpolice committed
217
  // tell the stats department that we've started a GC
218
  stat_startGC(cap, gct);
simonmar@microsoft.com's avatar
simonmar@microsoft.com committed
219

220
  // lock the StablePtr table
221
  stableLock();
222

223 224 225
#ifdef DEBUG
  mutlist_MUTVARS = 0;
  mutlist_MUTARRS = 0;
226 227 228 229 230 231 232
  mutlist_MVARS = 0;
  mutlist_TVAR = 0;
  mutlist_TVAR_WATCH_QUEUE = 0;
  mutlist_TREC_CHUNK = 0;
  mutlist_TREC_HEADER = 0;
  mutlist_ATOMIC_INVARIANT = 0;
  mutlist_INVARIANT_CHECK_QUEUE = 0;
233 234 235
  mutlist_OTHERS = 0;
#endif

thoughtpolice's avatar
thoughtpolice committed
236
  // attribute any costs to CCS_GC
237
#ifdef PROFILING
238
  for (n = 0; n < n_capabilities; n++) {
239 240
      save_CCS[n] = capabilities[n]->r.rCCCS;
      capabilities[n]->r.rCCCS = CCS_GC;
241
  }
242 243 244 245
#endif

  /* Figure out which generation to collect
   */
Simon Marlow's avatar
Simon Marlow committed
246
  N = collect_gen;
247
  major_gc = (N == RtsFlags.GcFlags.generations-1);
248

249
#if defined(THREADED_RTS)
250 251
  work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
                  N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
252 253 254
      // It's not always a good idea to do load balancing in parallel
      // GC.  In particular, for a parallel program we don't want to
      // lose locality by moving cached data into another CPU's cache
thoughtpolice's avatar
thoughtpolice committed
255
      // (this effect can be quite significant).
256 257 258 259 260 261 262
      //
      // We could have a more complex way to deterimine whether to do
      // work stealing or not, e.g. it might be a good idea to do it
      // if the heap is big.  For now, we just turn it on or off with
      // a flag.
#endif

Simon Marlow's avatar
Simon Marlow committed
263 264 265 266
  /* Start threads, so they can be spinning up while we finish initialisation.
   */
  start_gc_threads();

267
#if defined(THREADED_RTS)
268
  /* How many threads will be participating in this GC?
269 270
   * We don't try to parallelise minor GCs (unless the user asks for
   * it with +RTS -gn0), or mark/compact/sweep GC.
271
   */
272
  if (gc_type == SYNC_GC_PAR) {
273
      n_gc_threads = n_capabilities;
274
  } else {
275
      n_gc_threads = 1;
276
  }
277
#else
278
  n_gc_threads = 1;
279
#endif
280

281 282
  debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
             N, n_gc_threads);
283

284
#ifdef DEBUG
thoughtpolice's avatar
thoughtpolice committed
285
  // check for memory leaks if DEBUG is on
286
  memInventory(DEBUG_gc);
287 288
#endif

289 290 291
  // do this *before* we start scavenging
  collectFreshWeakPtrs();

292
  // check sanity *before* GC
Simon Marlow's avatar
Simon Marlow committed
293
  IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
294

295 296
  // gather blocks allocated using allocatePinned() from each capability
  // and put them on the g0->large_object list.
297
  collect_pinned_object_blocks();
298

299
  // Initialise all the generations/steps that we're collecting.
300
  for (g = 0; g <= N; g++) {
Simon Marlow's avatar
Simon Marlow committed
301
      prepare_collected_gen(&generations[g]);
302
  }
303
  // Initialise all the generations/steps that we're *not* collecting.
304
  for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
305
      prepare_uncollected_gen(&generations[g]);
306 307
  }

Simon Marlow's avatar
Simon Marlow committed
308 309 310
  // Prepare this gc_thread
  init_gc_thread(gct);

311 312
  /* Allocate a mark stack if we're doing a major collection.
   */
313
  if (major_gc && oldest_gen->mark) {
314 315 316 317 318
      mark_stack_bd     = allocBlock();
      mark_stack_top_bd = mark_stack_bd;
      mark_stack_bd->link = NULL;
      mark_stack_bd->u.back = NULL;
      mark_sp           = mark_stack_bd->start;
319
  } else {
320 321 322
      mark_stack_bd     = NULL;
      mark_stack_top_bd = NULL;
      mark_sp           = NULL;
323 324 325 326 327
  }

  /* -----------------------------------------------------------------------
   * follow all the roots that we know about:
   */
328 329 330 331 332 333

  // the main thread is running: this prevents any other threads from
  // exiting prematurely, so we can start them now.
  // NB. do this after the mutable lists have been saved above, otherwise
  // the other GC threads will be writing into the old mutable lists.
  inc_running();
Simon Marlow's avatar
Simon Marlow committed
334 335 336
  wakeup_gc_threads(gct->thread_index);

  traceEventGcWork(gct->cap);
337

338 339 340 341 342 343
  // scavenge the capability-private mutable lists.  This isn't part
  // of markSomeCapabilities() because markSomeCapabilities() can only
  // call back into the GC via mark_root() (due to the gct register
  // variable).
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
344
#if defined(THREADED_RTS)
345
          scavenge_capability_mut_Lists1(capabilities[n]);
346
#else
347
          scavenge_capability_mut_lists(capabilities[n]);
348
#endif
349 350
      }
  } else {
Simon Marlow's avatar
Simon Marlow committed
351
      scavenge_capability_mut_lists(gct->cap);
352 353
      for (n = 0; n < n_capabilities; n++) {
          if (gc_threads[n]->idle) {
354
              markCapability(mark_root, gct, capabilities[n],
355
                             rtsTrue/*don't mark sparks*/);
356
              scavenge_capability_mut_lists(capabilities[n]);
357 358
          }
      }
359 360
  }

361
  // follow roots from the CAF list (used by GHCi)
Simon Marlow's avatar
Simon Marlow committed
362
  gct->evac_gen_no = 0;
363
  markCAFs(mark_root, gct);
364

365
  // follow all the roots that the application knows about.
Simon Marlow's avatar
Simon Marlow committed
366
  gct->evac_gen_no = 0;
Simon Marlow's avatar
Simon Marlow committed
367 368
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
369
          markCapability(mark_root, gct, capabilities[n],
Simon Marlow's avatar
Simon Marlow committed
370 371 372 373 374 375 376
                         rtsTrue/*don't mark sparks*/);
      }
  } else {
      markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
  }

  markScheduler(mark_root, gct);
377

378 379
#if defined(RTS_USER_SIGNALS)
  // mark the signal handlers (signals should be already blocked)
380
  markSignalHandlers(mark_root, gct);
381 382
#endif

383
  // Mark the weak pointer list, and prepare to detect dead weak pointers.
384 385 386
  markWeakPtrList();
  initWeakForGC();

387
  // Mark the stable pointer table.
388
  markStableTables(mark_root, gct);
389 390 391 392 393

  /* -------------------------------------------------------------------------
   * Repeatedly scavenge all the areas we know about until there's no
   * more scavenging to be done.
   */
Simon Marlow's avatar
Simon Marlow committed
394 395
  for (;;)
  {
396
      scavenge_until_all_done();
Simon Marlow's avatar
Simon Marlow committed
397 398
      // The other threads are now stopped.  We might recurse back to
      // here, but from now on this is the only thread.
thoughtpolice's avatar
thoughtpolice committed
399

Simon Marlow's avatar
Simon Marlow committed
400 401
      // must be last...  invariant is that everything is fully
      // scavenged at this point.
thoughtpolice's avatar
thoughtpolice committed
402
      if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
403 404
          inc_running();
          continue;
Simon Marlow's avatar
Simon Marlow committed
405
      }
406

Simon Marlow's avatar
Simon Marlow committed
407 408
      // If we get to here, there's really nothing left to do.
      break;
409 410
  }

Simon Marlow's avatar
Simon Marlow committed
411
  shutdown_gc_threads(gct->thread_index);
412

413
  // Now see which stable names are still alive.
414
  gcStableTables();
415

416 417 418
#ifdef THREADED_RTS
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
419
          pruneSparkQueue(capabilities[n]);
420 421
      }
  } else {
422 423
      for (n = 0; n < n_capabilities; n++) {
          if (n == cap->no || gc_threads[n]->idle) {
424
              pruneSparkQueue(capabilities[n]);
425 426
         }
      }
427 428 429
  }
#endif

430 431 432 433
#ifdef PROFILING
  // We call processHeapClosureForDead() on every closure destroyed during
  // the current garbage collection, so we invoke LdvCensusForDead().
  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
434 435 436 437 438
      || RtsFlags.ProfFlags.bioSelector != NULL) {
      RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
      LdvCensusForDead(N);
      ACQUIRE_SM_LOCK;
  }
439 440 441 442
#endif

  // NO MORE EVACUATION AFTER THIS POINT!

443
  // Finally: compact or sweep the oldest generation.
444
  if (major_gc && oldest_gen->mark) {
thoughtpolice's avatar
thoughtpolice committed
445
      if (oldest_gen->compact)
446 447
          compact(gct->scavenged_static_objects);
      else
448
          sweep(oldest_gen);
449 450
  }

451
  copied = 0;
452 453
  par_max_copied = 0;
  par_tot_copied = 0;
thoughtpolice's avatar
thoughtpolice committed
454
  {
455 456
      nat i;
      for (i=0; i < n_gc_threads; i++) {
457
          if (n_gc_threads > 1) {
Simon Marlow's avatar
Simon Marlow committed
458 459 460 461 462 463
              debugTrace(DEBUG_gc,"thread %d:", i);
              debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
              debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
              debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
              debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
              debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
464 465
          }
          copied += gc_threads[i]->copied;
466
          par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
467
      }
468
      par_tot_copied = copied;
469
      if (n_gc_threads == 1) {
470 471
          par_max_copied = 0;
          par_tot_copied = 0;
472 473 474
      }
  }

Simon Marlow's avatar
Simon Marlow committed
475 476 477 478 479 480 481 482 483 484
  // Run through all the generations/steps and tidy up.
  // We're going to:
  //   - count the amount of "live" data (live_words, live_blocks)
  //   - count the amount of "copied" data in this GC (copied)
  //   - free from-space
  //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
  //
  live_words = 0;
  live_blocks = 0;

485 486
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {

487
    if (g == N) {
thoughtpolice's avatar
thoughtpolice committed
488
      generations[g].collections++; // for stats
489
      if (n_gc_threads > 1) generations[g].par_collections++;
490 491 492 493 494
    }

    // Count the mutable list as bytes "copied" for the purposes of
    // stats.  Every mutable list is copied during every GC.
    if (g > 0) {
495
        W_ mut_list_size = 0;
496
        for (n = 0; n < n_capabilities; n++) {
497
            mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
498
        }
499
        copied +=  mut_list_size;
500

501 502 503
        debugTrace(DEBUG_gc,
                   "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
                   (unsigned long)(mut_list_size * sizeof(W_)),
504 505 506 507 508 509
                   mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
                   mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
                   mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
                   mutlist_ATOMIC_INVARIANT,
                   mutlist_INVARIANT_CHECK_QUEUE,
                   mutlist_OTHERS);
510 511
    }

512 513
    bdescr *next, *prev;
    gen = &generations[g];
514

thoughtpolice's avatar
thoughtpolice committed
515
    // for generations we collected...
516
    if (g <= N) {
517

518 519 520 521
        /* free old memory and shift to-space into from-space for all
         * the collected steps (except the allocation area).  These
         * freed blocks will probaby be quickly recycled.
         */
522 523 524 525
        if (gen->mark)
        {
            // tack the new blocks on the end of the existing blocks
            if (gen->old_blocks != NULL) {
thoughtpolice's avatar
thoughtpolice committed
526

527 528
                prev = NULL;
                for (bd = gen->old_blocks; bd != NULL; bd = next) {
thoughtpolice's avatar
thoughtpolice committed
529

530
                    next = bd->link;
thoughtpolice's avatar
thoughtpolice committed
531

532 533 534 535 536 537
                    if (!(bd->flags & BF_MARKED))
                    {
                        if (prev == NULL) {
                            gen->old_blocks = next;
                        } else {
                            prev->link = next;
538
                        }
539 540
                        freeGroup(bd);
                        gen->n_old_blocks--;
541
                    }
542 543 544
                    else
                    {
                        gen->n_words += bd->free - bd->start;
thoughtpolice's avatar
thoughtpolice committed
545

546 547 548 549 550
                        // NB. this step might not be compacted next
                        // time, so reset the BF_MARKED flags.
                        // They are set before GC if we're going to
                        // compact.  (search for BF_MARKED above).
                        bd->flags &= ~BF_MARKED;
thoughtpolice's avatar
thoughtpolice committed
551

552 553 554
                        // between GCs, all blocks in the heap except
                        // for the nursery have the BF_EVACUATED flag set.
                        bd->flags |= BF_EVACUATED;
thoughtpolice's avatar
thoughtpolice committed
555

556 557 558
                        prev = bd;
                    }
                }
559

560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
                if (prev != NULL) {
                    prev->link = gen->blocks;
                    gen->blocks = gen->old_blocks;
                }
            }
            // add the new blocks to the block tally
            gen->n_blocks += gen->n_old_blocks;
            ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
            ASSERT(countOccupied(gen->blocks) == gen->n_words);
        }
        else // not copacted
        {
            freeChain(gen->old_blocks);
        }

        gen->old_blocks = NULL;
        gen->n_old_blocks = 0;

        /* LARGE OBJECTS.  The current live large objects are chained on
         * scavenged_large, having been moved during garbage
         * collection from large_objects.  Any objects left on the
         * large_objects list are therefore dead, so we free them here.
         */
        freeChain(gen->large_objects);
        gen->large_objects  = gen->scavenged_large_objects;
        gen->n_large_blocks = gen->n_scavenged_large_blocks;
586
        gen->n_large_words  = countOccupied(gen->large_objects);
587
        gen->n_new_large_words = 0;
588 589 590
    }
    else // for generations > N
    {
591 592 593 594 595
        /* For older generations, we need to append the
         * scavenged_large_object list (i.e. large objects that have been
         * promoted during this GC) to the large_object list for that step.
         */
        for (bd = gen->scavenged_large_objects; bd; bd = next) {
596 597
            next = bd->link;
            dbl_link_onto(bd, &gen->large_objects);
598 599
            gen->n_large_words += bd->free - bd->start;
        }
thoughtpolice's avatar
thoughtpolice committed
600

601 602
        // add the new blocks we promoted during this GC
        gen->n_large_blocks += gen->n_scavenged_large_blocks;
Simon Marlow's avatar
Simon Marlow committed
603 604 605
    }

    ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
606
    ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
Simon Marlow's avatar
Simon Marlow committed
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622

    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;

    // Count "live" data
    live_words  += genLiveWords(gen);
    live_blocks += genLiveBlocks(gen);

    // add in the partial blocks in the gen_workspaces, but ignore gen 0
    // if this is a local GC (we can't count another capability's part_list)
    {
        nat i;
        for (i = 0; i < n_capabilities; i++) {
            live_words  += gcThreadLiveWords(i, gen->no);
            live_blocks += gcThreadLiveBlocks(i, gen->no);
        }
623
    }
624
  } // for all generations
625

626 627
  // update the max size of older generations after a major GC
  resize_generations();
thoughtpolice's avatar
thoughtpolice committed
628

629
  // Free the mark stack.
630 631 632 633
  if (mark_stack_top_bd != NULL) {
      debugTrace(DEBUG_gc, "mark stack: %d blocks",
                 countBlocks(mark_stack_top_bd));
      freeChain(mark_stack_top_bd);
634 635
  }

636
  // Free any bitmaps.
637
  for (g = 0; g <= N; g++) {
638 639 640 641
      gen = &generations[g];
      if (gen->bitmap != NULL) {
          freeGroup(gen->bitmap);
          gen->bitmap = NULL;
642 643 644
      }
  }

645
  resize_nursery();
646

647 648 649
  resetNurseries();

 // mark the garbage collected CAFs as dead
650
#if defined(DEBUG)
651 652
  if (major_gc) { gcCAFs(); }
#endif
thoughtpolice's avatar
thoughtpolice committed
653

654 655 656 657 658 659 660 661 662
  // Update the stable pointer hash table.
  updateStableTables(major_gc);

  // unlock the StablePtr table.  Must be before scheduleFinalizers(),
  // because a finalizer may call hs_free_fun_ptr() or
  // hs_free_stable_ptr(), both of which access the StablePtr table.
  stableUnlock();

  // Must be after stableUnlock(), because it might free stable ptrs.
663 664 665 666
  if (major_gc) {
      checkUnload (gct->scavenged_static_objects);
  }

667 668 669
#ifdef PROFILING
  // resetStaticObjectForRetainerProfiling() must be called before
  // zeroing below.
670 671

  // ToDo: fix the gct->scavenged_static_objects below
672
  resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
673 674
#endif

675 676 677 678 679 680 681 682 683 684 685 686 687 688
  // zero the scavenged static object list
  if (major_gc) {
      nat i;
      if (n_gc_threads == 1) {
          zero_static_object_list(gct->scavenged_static_objects);
      } else {
          for (i = 0; i < n_gc_threads; i++) {
              if (!gc_threads[i]->idle) {
                  zero_static_object_list(gc_threads[i]->scavenged_static_objects);
              }
          }
      }
  }

689
  // Start any pending finalizers.  Must be after
690
  // updateStableTables() and stableUnlock() (see #4221).
691
  RELEASE_SM_LOCK;
692
  scheduleFinalizers(cap, dead_weak_ptr_list);
693 694
  ACQUIRE_SM_LOCK;

Simon Marlow's avatar
Simon Marlow committed
695 696 697 698 699 700
  // check sanity after GC
  // before resurrectThreads(), because that might overwrite some
  // closures, which will cause problems with THREADED where we don't
  // fill slop.
  IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));

701 702 703 704 705 706
  // If a heap census is due, we need to do it before
  // resurrectThreads(), for the same reason as checkSanity above:
  // resurrectThreads() will overwrite some closures and leave slop
  // behind.
  if (do_heap_census) {
      debugTrace(DEBUG_sched, "performing heap census");
707
      RELEASE_SM_LOCK;
Ian Lynagh's avatar
Ian Lynagh committed
708
      heapCensus(gct->gc_start_cpu);
709
      ACQUIRE_SM_LOCK;
710 711
  }

Simon Marlow's avatar
Simon Marlow committed
712 713 714 715 716
  // send exceptions to any threads which were about to die
  RELEASE_SM_LOCK;
  resurrectThreads(resurrected_threads);
  ACQUIRE_SM_LOCK;

717
  if (major_gc) {
718
      W_ need, got;
719 720 721 722 723 724 725 726 727 728 729
      need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
      got = mblocks_allocated;
      /* If the amount of data remains constant, next major GC we'll
         require (F+1)*need. We leave (F+2)*need in order to reduce
         repeated deallocation and reallocation. */
      need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
      if (got > need) {
          returnMemoryToOS(got - need);
      }
  }

Simon Marlow's avatar
Simon Marlow committed
730
  // extra GC trace info
Simon Marlow's avatar
Simon Marlow committed
731
  IF_DEBUG(gc, statDescribeGens());
732 733

#ifdef DEBUG
thoughtpolice's avatar
thoughtpolice committed
734
  // symbol-table based profiling
735 736 737
  /*  heapCensus(to_blocks); */ /* ToDo */
#endif

thoughtpolice's avatar
thoughtpolice committed
738
  // restore enclosing cost centre
739
#ifdef PROFILING
740
  for (n = 0; n < n_capabilities; n++) {
741
      capabilities[n]->r.rCCCS = save_CCS[n];
742
  }
743 744 745
#endif

#ifdef DEBUG
thoughtpolice's avatar
thoughtpolice committed
746
  // check for memory leaks if DEBUG is on
747
  memInventory(DEBUG_gc);
748 749
#endif

thoughtpolice's avatar
thoughtpolice committed
750
  // ok, GC over: tell the stats department what happened.
751
  stat_endGC(cap, gct, live_words, copied,
752 753
             live_blocks * BLOCK_SIZE_W - live_words /* slop */,
             N, n_gc_threads, par_max_copied, par_tot_copied);
754 755

#if defined(RTS_USER_SIGNALS)
756 757 758 759
  if (RtsFlags.MiscFlags.install_signal_handlers) {
    // unblock signals again
    unblockUserSignals();
  }
760 761 762
#endif

  RELEASE_SM_LOCK;
763

764
  SET_GCT(saved_gct);
765 766
}

767 768 769 770
/* -----------------------------------------------------------------------------
   Initialise the gc_thread structures.
   -------------------------------------------------------------------------- */

771 772 773 774 775
#define GC_THREAD_INACTIVE             0
#define GC_THREAD_STANDING_BY          1
#define GC_THREAD_RUNNING              2
#define GC_THREAD_WAITING_TO_CONTINUE  3

776 777
static void
new_gc_thread (nat n, gc_thread *t)
778
{
779 780
    nat g;
    gen_workspace *ws;
781

782
    t->cap = capabilities[n];
Simon Marlow's avatar
Simon Marlow committed
783

Simon Marlow's avatar
Simon Marlow committed
784 785
#ifdef THREADED_RTS
    t->id = 0;
786 787 788
    initSpinLock(&t->gc_spin);
    initSpinLock(&t->mut_spin);
    ACQUIRE_SPIN_LOCK(&t->gc_spin);
789
    ACQUIRE_SPIN_LOCK(&t->mut_spin);
790
    t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
791
                          // thread to start up, see wakeup_gc_threads
Simon Marlow's avatar
Simon Marlow committed
792 793
#endif

794
    t->thread_index = n;
795
    t->idle = rtsFalse;
796 797 798 799
    t->free_blocks = NULL;
    t->gc_count = 0;

    init_gc_thread(t);
thoughtpolice's avatar
thoughtpolice committed
800

801 802 803 804
#ifdef USE_PAPI
    t->papi_events = -1;
#endif

805
    for (g = 0; g < RtsFlags.GcFlags.generations; g++)
806
    {
807 808 809
        ws = &t->gens[g];
        ws->gen = &generations[g];
        ASSERT(g == ws->gen->no);
810
        ws->my_gct = t;
thoughtpolice's avatar
thoughtpolice committed
811

Simon Marlow's avatar
Simon Marlow committed
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826
        // We want to call
        //   alloc_todo_block(ws,0);
        // but can't, because it uses gct which isn't set up at this point.
        // Hence, allocate a block for todo_bd manually:
        {
            bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
            initBdescr(bd, ws->gen, ws->gen->to);
            bd->flags = BF_EVACUATED;
            bd->u.scan = bd->free = bd->start;

            ws->todo_bd = bd;
            ws->todo_free = bd->free;
            ws->todo_lim = bd->start + BLOCK_SIZE_W;
        }

827 828 829
        ws->todo_q = newWSDeque(128);
        ws->todo_overflow = NULL;
        ws->n_todo_overflow = 0;
830 831
        ws->todo_large_objects = NULL;

832 833 834
        ws->part_list = NULL;
        ws->n_part_blocks = 0;

835 836
        ws->scavd_list = NULL;
        ws->n_scavd_blocks = 0;
837 838 839 840
    }
}


841
void
842
initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
843 844
{
#if defined(THREADED_RTS)
845
    nat i;
846

847 848 849 850 851 852 853
    if (from > 0) {
        gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
                                      "initGcThreads");
    } else {
        gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
                                     "initGcThreads");
    }
854

855 856 857 858 859 860 861 862
    for (i = from; i < to; i++) {
        gc_threads[i] =
            stgMallocBytes(sizeof(gc_thread) +
                           RtsFlags.GcFlags.generations * sizeof(gen_workspace),
                           "alloc_gc_threads");

        new_gc_thread(i, gc_threads[i]);
    }
863
#else
864 865 866 867
    ASSERT(from == 0 && to == 1);
    gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
    gc_threads[0] = gct;
    new_gc_thread(0,gc_threads[0]);
868 869 870
#endif
}

871 872 873
void
freeGcThreads (void)
{
874
    nat g;
875 876 877
    if (gc_threads != NULL) {
#if defined(THREADED_RTS)
        nat i;
878
        for (i = 0; i < n_capabilities; i++) {
879
            for (g = 0; g < RtsFlags.GcFlags.generations; g++)
880
            {
881
                freeWSDeque(gc_threads[i]->gens[g].todo_q);
882
            }
883
            stgFree (gc_threads[i]);
884
        }
885 886
        stgFree (gc_threads);
#else
887
        for (g = 0; g < RtsFlags.GcFlags.generations; g++)
888
        {
889
            freeWSDeque(gc_threads[0]->gens[g].todo_q);
890
        }
891 892 893 894 895 896
        stgFree (gc_threads);
#endif
        gc_threads = NULL;
    }
}

Simon Marlow's avatar
Simon Marlow committed
897 898 899 900
/* ----------------------------------------------------------------------------
   Start GC threads
   ------------------------------------------------------------------------- */

901
static volatile StgWord gc_running_threads;
Simon Marlow's avatar
Simon Marlow committed
902

903
static StgWord
Simon Marlow's avatar
Simon Marlow committed
904 905
inc_running (void)
{
906
    StgWord new;
907
    new = atomic_inc(&gc_running_threads, 1);
908 909
    ASSERT(new <= n_gc_threads);
    return new;
Simon Marlow's avatar
Simon Marlow committed
910 911
}

912
static StgWord
Simon Marlow's avatar
Simon Marlow committed
913 914
dec_running (void)
{
915 916
    ASSERT(gc_running_threads != 0);
    return atomic_dec(&gc_running_threads);
Simon Marlow's avatar
Simon Marlow committed
917 918
}

919 920 921
static rtsBool
any_work (void)
{
922 923
    int g;
    gen_workspace *ws;
924 925 926 927 928 929

    gct->any_work++;

    write_barrier();

    // scavenge objects in compacted generation
930
    if (mark_stack_bd != NULL && !mark_stack_empty()) {
931
        return rtsTrue;
932
    }
thoughtpolice's avatar
thoughtpolice committed
933

934 935 936
    // Check for global work in any step.  We don't need to check for
    // local work, because we have already exited scavenge_loop(),
    // which means there is no local work for this thread.
937 938
    for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
        ws = &gct->gens[g];
939
        if (ws->todo_large_objects) return rtsTrue;
940 941 942 943 944 945 946 947 948 949
        if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
        if (ws->todo_overflow) return rtsTrue;
    }

#if defined(THREADED_RTS)
    if (work_stealing) {
        nat n;
        // look for work to steal
        for (n = 0; n < n_gc_threads; n++) {
            if (n == gct->thread_index) continue;
950 951
            for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
                ws = &gc_threads[n]->gens[g];
952 953 954
                if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
            }
        }
955
    }
956
#endif
957 958

    gct->no_work++;
959 960 961
#if defined(THREADED_RTS)
    yieldThread();
#endif
962 963

    return rtsFalse;
thoughtpolice's avatar
thoughtpolice committed
964
}
965

Simon Marlow's avatar
Simon Marlow committed
966
static void
967
scavenge_until_all_done (void)
Simon Marlow's avatar
Simon Marlow committed
968
{
969
    DEBUG_ONLY( nat r );
thoughtpolice's avatar
thoughtpolice committed
970

Simon Marlow's avatar
Simon Marlow committed
971 972

loop:
973 974 975 976 977 978 979
#if defined(THREADED_RTS)
    if (n_gc_threads > 1) {
        scavenge_loop();
    } else {
        scavenge_loop1();
    }
#else
Simon Marlow's avatar
Simon Marlow committed
980
    scavenge_loop();
981 982
#endif

Simon Marlow's avatar
Simon Marlow committed
983 984
    collect_gct_blocks();

Simon Marlow's avatar
Simon Marlow committed
985
    // scavenge_loop() only exits when there's no work to do
986 987

#ifdef DEBUG
Simon Marlow's avatar
Simon Marlow committed
988
    r = dec_running();
989 990 991 992
#else
    dec_running();
#endif

Simon Marlow's avatar
Simon Marlow committed
993
    traceEventGcIdle(gct->cap);
994 995

    debugTrace(DEBUG_gc, "%d GC threads still running", r);
thoughtpolice's avatar
thoughtpolice committed
996

Simon Marlow's avatar
Simon Marlow committed
997
    while (gc_running_threads != 0) {
998
        // usleep(1);
999 1000
        if (any_work()) {
            inc_running();
Simon Marlow's avatar
Simon Marlow committed
1001
            traceEventGcWork(gct->cap);
1002 1003 1004 1005
            goto loop;
        }
        // any_work() does not remove the work from the queue, it
        // just checks for the presence of work.  If we find any,
thoughtpolice's avatar
thoughtpolice committed
1006
        // then we increment gc_running_threads and go back to
1007
        // scavenge_loop() to perform any pending work.
Simon Marlow's avatar
Simon Marlow committed
1008
    }
thoughtpolice's avatar
thoughtpolice committed
1009

Simon Marlow's avatar
Simon Marlow committed
1010
    traceEventGcDone(gct->cap);
Simon Marlow's avatar
Simon Marlow committed
1011 1012 1013
}

#if defined(THREADED_RTS)
1014 1015 1016

void
gcWorkerThread (Capability *cap)
1017
{
1018 1019 1020 1021 1022
    gc_thread *saved_gct;

    // necessary if we stole a callee-saves register for gct:
    saved_gct = gct;

1023
    SET_GCT(gc_threads[cap->no]);
1024
    gct->id = osThreadId();
1025

1026 1027
    // Wait until we're told to wake up
    RELEASE_SPIN_LOCK(&gct->mut_spin);
1028 1029 1030 1031 1032
    // yieldThread();
    //    Strangely, adding a yieldThread() here makes the CPU time
    //    measurements more accurate on Linux, perhaps because it syncs
    //    the CPU time across the multiple cores.  Without this, CPU time
    //    is heavily skewed towards GC rather than MUT.
1033 1034 1035
    gct->wakeup = GC_THREAD_STANDING_BY;
    debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
    ACQUIRE_SPIN_LOCK(&gct->gc_spin);
thoughtpolice's avatar
thoughtpolice committed
1036

1037 1038 1039 1040 1041 1042 1043
#ifdef USE_PAPI
    // start performance counters in this thread...
    if (gct->papi_events == -1) {
        papi_init_eventset(&gct->papi_events);
    }
    papi_thread_start_gc1_count(gct->papi_events);
#endif
Simon Marlow's avatar
Simon Marlow committed
1044 1045 1046 1047 1048

    init_gc_thread(gct);

    traceEventGcWork(gct->cap);

1049
    // Every thread evacuates some roots.
Simon Marlow's avatar
Simon Marlow committed
1050
    gct->evac_gen_no = 0;
Simon Marlow's avatar
Simon Marlow committed
1051 1052
    markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
    scavenge_capability_mut_lists(cap);
1053 1054

    scavenge_until_all_done();
thoughtpolice's avatar
thoughtpolice committed
1055