GC.c 50.9 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team 1998-2008
4 5 6
 *
 * Generational garbage collector
 *
7 8 9 10 11
 * Documentation on the architecture of the Garbage Collector can be
 * found in the online commentary:
 * 
 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
 *
12 13
 * ---------------------------------------------------------------------------*/

14
// #include "PosixSource.h"
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
#include "Rts.h"
#include "RtsFlags.h"
#include "RtsUtils.h"
#include "Apply.h"
#include "OSThreads.h"
#include "LdvProfile.h"
#include "Updates.h"
#include "Stats.h"
#include "Schedule.h"
#include "Sanity.h"
#include "BlockAlloc.h"
#include "MBlock.h"
#include "ProfHeap.h"
#include "SchedAPI.h"
#include "Weak.h"
#include "Prelude.h"
#include "ParTicky.h"		// ToDo: move into Rts.h
#include "RtsSignals.h"
#include "STM.h"
#include "HsFFI.h"
#include "Linker.h"
#if defined(RTS_GTK_FRONTPANEL)
#include "FrontPanel.h"
#endif
#include "Trace.h"
#include "RetainerProfile.h"
#include "RaiseAsync.h"
42
#include "Papi.h"
43 44

#include "GC.h"
45
#include "GCThread.h"
46 47 48 49 50
#include "Compact.h"
#include "Evac.h"
#include "Scav.h"
#include "GCUtils.h"
#include "MarkWeak.h"
Simon Marlow's avatar
Simon Marlow committed
51
#include "Sparks.h"
52
#include "Sweep.h"
53 54

#include <string.h> // for memset()
55
#include <unistd.h>
56

57 58 59 60
/* -----------------------------------------------------------------------------
   Global variables
   -------------------------------------------------------------------------- */

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
/* STATIC OBJECT LIST.
 *
 * During GC:
 * We maintain a linked list of static objects that are still live.
 * The requirements for this list are:
 *
 *  - we need to scan the list while adding to it, in order to
 *    scavenge all the static objects (in the same way that
 *    breadth-first scavenging works for dynamic objects).
 *
 *  - we need to be able to tell whether an object is already on
 *    the list, to break loops.
 *
 * Each static object has a "static link field", which we use for
 * linking objects on to the list.  We use a stack-type list, consing
 * objects on the front as they are added (this means that the
 * scavenge phase is depth-first, not breadth-first, but that
 * shouldn't matter).  
 *
 * A separate list is kept for objects that have been scavenged
 * already - this is so that we can zero all the marks afterwards.
 *
 * An object is on the list if its static link field is non-zero; this
 * means that we have to mark the end of the list with '1', not NULL.  
 *
 * Extra notes for generational GC:
 *
 * Each generation has a static object list associated with it.  When
 * collecting generations up to N, we treat the static object lists
 * from generations > N as roots.
 *
 * We build up a static object list while collecting generations 0..N,
 * which is then appended to the static object list of generation N+1.
 */

/* N is the oldest generation being collected, where the generations
 * are numbered starting at 0.  A major GC (indicated by the major_gc
 * flag) is when we're collecting all generations.  We only attempt to
 * deal with static objects and GC CAFs when doing a major GC.
 */
nat N;
rtsBool major_gc;

/* Data used for allocation area sizing.
 */
static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC 

/* Mut-list stats */
#ifdef DEBUG
nat mutlist_MUTVARS,
    mutlist_MUTARRS,
112
    mutlist_MVARS,
113 114 115
    mutlist_OTHERS;
#endif

116 117
/* Thread-local data for each GC thread
 */
118
gc_thread **gc_threads = NULL;
119 120 121 122

#if !defined(THREADED_RTS)
StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
#endif
123

124 125 126 127
// Number of threads running in *this* GC.  Affects how many
// step->todos[] lists we have to look in to find work.
nat n_gc_threads;

128 129 130
// For stats:
long copied;        // *words* copied & scavenged during this GC

131 132
rtsBool work_stealing;

133 134
DECLARE_GCT

135 136 137 138
/* -----------------------------------------------------------------------------
   Static function declarations
   -------------------------------------------------------------------------- */

139
static void mark_root               (void *user, StgClosure **root);
140
static void zero_static_object_list (StgClosure* first_static);
141
static nat  initialise_N            (rtsBool force_major_gc);
142 143 144 145 146 147
static void init_collected_gen      (nat g, nat threads);
static void init_uncollected_gen    (nat g, nat threads);
static void init_gc_thread          (gc_thread *t);
static void update_task_list        (void);
static void resize_generations      (void);
static void resize_nursery          (void);
Simon Marlow's avatar
Simon Marlow committed
148
static void start_gc_threads        (void);
149
static void scavenge_until_all_done (void);
Simon Marlow's avatar
Simon Marlow committed
150 151
static nat  inc_running             (void);
static nat  dec_running             (void);
152 153
static void wakeup_gc_threads       (nat n_threads, nat me);
static void shutdown_gc_threads     (nat n_threads, nat me);
154 155

#if 0 && defined(DEBUG)
156
static void gcCAFs                  (void);
157 158 159
#endif

/* -----------------------------------------------------------------------------
160
   The mark bitmap & stack.
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
   -------------------------------------------------------------------------- */

#define MARK_STACK_BLOCKS 4

bdescr *mark_stack_bdescr;
StgPtr *mark_stack;
StgPtr *mark_sp;
StgPtr *mark_splim;

// Flag and pointers used for falling back to a linear scan when the
// mark stack overflows.
rtsBool mark_stack_overflowed;
bdescr *oldgen_scan_bd;
StgPtr  oldgen_scan;

/* -----------------------------------------------------------------------------
177
   GarbageCollect: the main entry point to the garbage collector.
178 179 180 181 182

   Locks held: all capabilities are held throughout GarbageCollect().
   -------------------------------------------------------------------------- */

void
183 184
GarbageCollect (rtsBool force_major_gc, 
                nat gc_type USED_IF_THREADS,
185
                Capability *cap)
186 187 188
{
  bdescr *bd;
  step *stp;
189
  lnat live, allocated, max_copied, avg_copied, slop;
190
  gc_thread *saved_gct;
191
  nat g, s, t, n;
192

193 194 195
  // necessary if we stole a callee-saves register for gct:
  saved_gct = gct;

196 197 198 199
#ifdef PROFILING
  CostCentreStack *prev_CCS;
#endif

200 201
  ACQUIRE_SM_LOCK;

202
#if defined(RTS_USER_SIGNALS)
203 204 205 206
  if (RtsFlags.MiscFlags.install_signal_handlers) {
    // block signals
    blockUserSignals();
  }
207 208
#endif

209 210 211
  ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
  // otherwise adjust the padding in step_workspace.

212 213 214
  // tell the stats department that we've started a GC 
  stat_startGC();

simonmar@microsoft.com's avatar
simonmar@microsoft.com committed
215 216 217
  // tell the STM to discard any cached closures it's hoping to re-use
  stmPreGCHook();

218 219 220
  // lock the StablePtr table
  stablePtrPreGC();

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
#ifdef DEBUG
  mutlist_MUTVARS = 0;
  mutlist_MUTARRS = 0;
  mutlist_OTHERS = 0;
#endif

  // attribute any costs to CCS_GC 
#ifdef PROFILING
  prev_CCS = CCCS;
  CCCS = CCS_GC;
#endif

  /* Approximate how much we allocated.  
   * Todo: only when generating stats? 
   */
  allocated = calcAllocated();

  /* Figure out which generation to collect
   */
240
  n = initialise_N(force_major_gc);
241

242 243 244 245 246 247 248 249 250 251 252 253 254
#if defined(THREADED_RTS)
  work_stealing = RtsFlags.ParFlags.parGcLoadBalancing;
      // It's not always a good idea to do load balancing in parallel
      // GC.  In particular, for a parallel program we don't want to
      // lose locality by moving cached data into another CPU's cache
      // (this effect can be quite significant). 
      //
      // We could have a more complex way to deterimine whether to do
      // work stealing or not, e.g. it might be a good idea to do it
      // if the heap is big.  For now, we just turn it on or off with
      // a flag.
#endif

Simon Marlow's avatar
Simon Marlow committed
255 256 257 258
  /* Start threads, so they can be spinning up while we finish initialisation.
   */
  start_gc_threads();

259
#if defined(THREADED_RTS)
260
  /* How many threads will be participating in this GC?
261 262
   * We don't try to parallelise minor GCs (unless the user asks for
   * it with +RTS -gn0), or mark/compact/sweep GC.
263
   */
264 265
  if (gc_type == PENDING_GC_PAR) {
      n_gc_threads = RtsFlags.ParFlags.nNodes;
266
  } else {
267
      n_gc_threads = 1;
268
  }
269
#else
270
  n_gc_threads = 1;
271
#endif
272

Simon Marlow's avatar
Simon Marlow committed
273
  debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
274
        N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
275 276 277 278 279 280 281

#ifdef RTS_GTK_FRONTPANEL
  if (RtsFlags.GcFlags.frontpanel) {
      updateFrontPanelBeforeGC(N);
  }
#endif

282 283 284 285 286
#ifdef DEBUG
  // check for memory leaks if DEBUG is on 
  memInventory(traceClass(DEBUG_gc));
#endif

287
  // check stack sanity *before* GC
288
  IF_DEBUG(sanity, checkFreeListSanity());
Simon Marlow's avatar
Simon Marlow committed
289
  IF_DEBUG(sanity, checkMutableLists(rtsTrue));
290

291 292 293 294 295
  // Initialise all our gc_thread structures
  for (t = 0; t < n_gc_threads; t++) {
      init_gc_thread(gc_threads[t]);
  }

296
  // Initialise all the generations/steps that we're collecting.
297
  for (g = 0; g <= N; g++) {
298
      init_collected_gen(g,n_gc_threads);
299
  }
300 301
  
  // Initialise all the generations/steps that we're *not* collecting.
302
  for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
303
      init_uncollected_gen(g,n_gc_threads);
304 305 306 307
  }

  /* Allocate a mark stack if we're doing a major collection.
   */
308
  if (major_gc && oldest_gen->steps[0].mark) {
309 310 311 312
      nat mark_stack_blocks;
      mark_stack_blocks = stg_max(MARK_STACK_BLOCKS, 
                                  oldest_gen->steps[0].n_old_blocks / 100);
      mark_stack_bdescr = allocGroup(mark_stack_blocks);
313 314
      mark_stack = (StgPtr *)mark_stack_bdescr->start;
      mark_sp    = mark_stack;
315
      mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
316 317 318 319
  } else {
      mark_stack_bdescr = NULL;
  }

Simon Marlow's avatar
Simon Marlow committed
320
  // this is the main thread
321 322
#ifdef THREADED_RTS
  if (n_gc_threads == 1) {
323
      SET_GCT(gc_threads[0]);
324
  } else {
325
      SET_GCT(gc_threads[cap->no]);
326 327
  }
#else
328
SET_GCT(gc_threads[0]);
329
#endif
330 331 332 333

  /* -----------------------------------------------------------------------
   * follow all the roots that we know about:
   */
334 335 336 337 338 339

  // the main thread is running: this prevents any other threads from
  // exiting prematurely, so we can start them now.
  // NB. do this after the mutable lists have been saved above, otherwise
  // the other GC threads will be writing into the old mutable lists.
  inc_running();
340
  wakeup_gc_threads(n_gc_threads, gct->thread_index);
341

342 343 344 345 346 347
  // Mutable lists from each generation > N
  // we want to *scavenge* these roots, not evacuate them: they're not
  // going to move in this GC.
  // Also do them in reverse generation order, for the usual reason:
  // namely to reduce the likelihood of spurious old->new pointers.
  //
348
  for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
      scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
      freeChain_sync(generations[g].saved_mut_list);
      generations[g].saved_mut_list = NULL;

  }

  // scavenge the capability-private mutable lists.  This isn't part
  // of markSomeCapabilities() because markSomeCapabilities() can only
  // call back into the GC via mark_root() (due to the gct register
  // variable).
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
          scavenge_capability_mut_lists(&capabilities[n]);
      }
  } else {
      scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
365 366
  }

367
  // follow roots from the CAF list (used by GHCi)
368
  gct->evac_step = 0;
369
  markCAFs(mark_root, gct);
370

371
  // follow all the roots that the application knows about.
372
  gct->evac_step = 0;
373 374
  markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
                       rtsTrue/*prune sparks*/);
375

376 377
#if defined(RTS_USER_SIGNALS)
  // mark the signal handlers (signals should be already blocked)
378
  markSignalHandlers(mark_root, gct);
379 380
#endif

381
  // Mark the weak pointer list, and prepare to detect dead weak pointers.
382 383 384
  markWeakPtrList();
  initWeakForGC();

385
  // Mark the stable pointer table.
386
  markStablePtrTable(mark_root, gct);
387 388 389 390 391

  /* -------------------------------------------------------------------------
   * Repeatedly scavenge all the areas we know about until there's no
   * more scavenging to be done.
   */
Simon Marlow's avatar
Simon Marlow committed
392 393
  for (;;)
  {
394
      scavenge_until_all_done();
Simon Marlow's avatar
Simon Marlow committed
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
      // The other threads are now stopped.  We might recurse back to
      // here, but from now on this is the only thread.
      
      // if any blackholes are alive, make the threads that wait on
      // them alive too.
      if (traverseBlackholeQueue()) {
	  inc_running(); 
	  continue;
      }
  
      // must be last...  invariant is that everything is fully
      // scavenged at this point.
      if (traverseWeakPtrList()) { // returns rtsTrue if evaced something 
	  inc_running();
	  continue;
      }
411

Simon Marlow's avatar
Simon Marlow committed
412 413
      // If we get to here, there's really nothing left to do.
      break;
414 415
  }

416
  shutdown_gc_threads(n_gc_threads, gct->thread_index);
417

418 419
  // Update pointers from the Task list
  update_task_list();
420 421 422 423 424 425 426 427 428 429 430 431 432 433

  // Now see which stable names are still alive.
  gcStablePtrTable();

#ifdef PROFILING
  // We call processHeapClosureForDead() on every closure destroyed during
  // the current garbage collection, so we invoke LdvCensusForDead().
  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
      || RtsFlags.ProfFlags.bioSelector != NULL)
    LdvCensusForDead(N);
#endif

  // NO MORE EVACUATION AFTER THIS POINT!

434 435 436 437 438 439 440 441 442 443
  // Two-space collector: free the old to-space.
  // g0s0->old_blocks is the old nursery
  // g0s0->blocks is to-space from the previous GC
  if (RtsFlags.GcFlags.generations == 1) {
      if (g0s0->blocks != NULL) {
	  freeChain(g0s0->blocks);
	  g0s0->blocks = NULL;
      }
  }

444
  // For each workspace, in each thread, move the copied blocks to the step
445 446 447
  {
      gc_thread *thr;
      step_workspace *ws;
448
      bdescr *prev, *next;
449

450
      for (t = 0; t < n_gc_threads; t++) {
451 452 453
	  thr = gc_threads[t];

          // not step 0
454 455 456 457 458 459
          if (RtsFlags.GcFlags.generations == 1) {
              s = 0;
          } else {
              s = 1;
          }
          for (; s < total_steps; s++) {
460 461 462
              ws = &thr->steps[s];

              // Push the final block
463 464 465 466 467
              if (ws->todo_bd) { 
                  push_scanned_block(ws->todo_bd, ws);
              }

              ASSERT(gct->scan_bd == NULL);
468 469
              ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
              
470 471
              prev = NULL;
              for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
472
                  ws->step->n_words += bd->free - bd->start;
473 474 475
                  prev = bd;
              }
              if (prev != NULL) {
476 477 478 479
                  prev->link = ws->step->blocks;
                  ws->step->blocks = ws->scavd_list;
              } 
              ws->step->n_blocks += ws->n_scavd_blocks;
480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
          }
      }

      // Add all the partial blocks *after* we've added all the full
      // blocks.  This is so that we can grab the partial blocks back
      // again and try to fill them up in the next GC.
      for (t = 0; t < n_gc_threads; t++) {
	  thr = gc_threads[t];

          // not step 0
          if (RtsFlags.GcFlags.generations == 1) {
              s = 0;
          } else {
              s = 1;
          }
          for (; s < total_steps; s++) {
              ws = &thr->steps[s];
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512

              prev = NULL;
              for (bd = ws->part_list; bd != NULL; bd = next) {
                  next = bd->link;
                  if (bd->free == bd->start) {
                      if (prev == NULL) {
                          ws->part_list = next;
                      } else {
                          prev->link = next;
                      }
                      freeGroup(bd);
                      ws->n_part_blocks--;
                  } else {
                      ws->step->n_words += bd->free - bd->start;
                      prev = bd;
                  }
513
              }
514 515
              if (prev != NULL) {
                  prev->link = ws->step->blocks;
516 517 518
                  ws->step->blocks = ws->part_list;
              }
              ws->step->n_blocks += ws->n_part_blocks;
519

520
              ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
521
              ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
522 523 524 525
	  }
      }
  }

526 527 528 529 530 531
  // Finally: compact or sweep the oldest generation.
  if (major_gc && oldest_gen->steps[0].mark) {
      if (oldest_gen->steps[0].compact) 
          compact(gct->scavenged_static_objects);
      else
          sweep(&oldest_gen->steps[0]);
532 533
  }

534 535
  /* run through all the generations/steps and tidy up 
   */
536
  copied = 0;
537 538
  max_copied = 0;
  avg_copied = 0;
539 540 541
  { 
      nat i;
      for (i=0; i < n_gc_threads; i++) {
542
          if (n_gc_threads > 1) {
Simon Marlow's avatar
Simon Marlow committed
543 544 545 546 547 548
              debugTrace(DEBUG_gc,"thread %d:", i);
              debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
              debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
              debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
              debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
              debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
549 550
          }
          copied += gc_threads[i]->copied;
551 552 553 554 555 556 557
          max_copied = stg_max(gc_threads[i]->copied, max_copied);
      }
      if (n_gc_threads == 1) {
          max_copied = 0;
          avg_copied = 0;
      } else {
          avg_copied = copied;
558 559 560
      }
  }

561 562
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {

563
    if (g == N) {
564
      generations[g].collections++; // for stats 
565
      if (n_gc_threads > 1) generations[g].par_collections++;
566 567 568 569 570 571 572 573 574
    }

    // Count the mutable list as bytes "copied" for the purposes of
    // stats.  Every mutable list is copied during every GC.
    if (g > 0) {
	nat mut_list_size = 0;
	for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
	    mut_list_size += bd->free - bd->start;
	}
575 576 577 578 579 580
        for (n = 0; n < n_capabilities; n++) {
            for (bd = capabilities[n].mut_lists[g]; 
                 bd != NULL; bd = bd->link) {
                mut_list_size += bd->free - bd->start;
            }
        }
581 582 583
	copied +=  mut_list_size;

	debugTrace(DEBUG_gc,
584
		   "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
585
		   (unsigned long)(mut_list_size * sizeof(W_)),
586
		   mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
587 588 589
    }

    for (s = 0; s < generations[g].n_steps; s++) {
590
      bdescr *next, *prev;
591 592 593 594 595 596 597 598 599
      stp = &generations[g].steps[s];

      // for generations we collected... 
      if (g <= N) {

	/* free old memory and shift to-space into from-space for all
	 * the collected steps (except the allocation area).  These
	 * freed blocks will probaby be quickly recycled.
	 */
600
	if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
601
	    if (stp->mark)
602
            {
603 604
		// tack the new blocks on the end of the existing blocks
		if (stp->old_blocks != NULL) {
605 606

                    prev = NULL;
607
		    for (bd = stp->old_blocks; bd != NULL; bd = next) {
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636

                        next = bd->link;

                        if (!(bd->flags & BF_MARKED))
                        {
                            if (prev == NULL) {
                                stp->old_blocks = next;
                            } else {
                                prev->link = next;
                            }
                            freeGroup(bd);
                            stp->n_old_blocks--;
                        }
                        else
                        {
                            stp->n_words += bd->free - bd->start;

                            // NB. this step might not be compacted next
                            // time, so reset the BF_MARKED flags.
                            // They are set before GC if we're going to
                            // compact.  (search for BF_MARKED above).
                            bd->flags &= ~BF_MARKED;
                            
                            // between GCs, all blocks in the heap except
                            // for the nursery have the BF_EVACUATED flag set.
                            bd->flags |= BF_EVACUATED;

                            prev = bd;
                        }
637
		    }
638 639 640 641 642

                    if (prev != NULL) {
                        prev->link = stp->blocks;
                        stp->blocks = stp->old_blocks;
                    }
643 644 645 646
		}
		// add the new blocks to the block tally
		stp->n_blocks += stp->n_old_blocks;
		ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
647
                ASSERT(countOccupied(stp->blocks) == stp->n_words);
648 649 650
	    }
	    else // not copacted
	    {
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
		freeChain(stp->old_blocks);
	    }
	    stp->old_blocks = NULL;
	    stp->n_old_blocks = 0;
	}

	/* LARGE OBJECTS.  The current live large objects are chained on
	 * scavenged_large, having been moved during garbage
	 * collection from large_objects.  Any objects left on
	 * large_objects list are therefore dead, so we free them here.
	 */
	for (bd = stp->large_objects; bd != NULL; bd = next) {
	  next = bd->link;
	  freeGroup(bd);
	  bd = next;
	}

	stp->large_objects  = stp->scavenged_large_objects;
	stp->n_large_blocks = stp->n_scavenged_large_blocks;

671 672 673
      }
      else // for older generations... 
      {
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
	/* For older generations, we need to append the
	 * scavenged_large_object list (i.e. large objects that have been
	 * promoted during this GC) to the large_object list for that step.
	 */
	for (bd = stp->scavenged_large_objects; bd; bd = next) {
	  next = bd->link;
	  dbl_link_onto(bd, &stp->large_objects);
	}

	// add the new blocks we promoted during this GC 
	stp->n_large_blocks += stp->n_scavenged_large_blocks;
      }
    }
  }

689 690 691
  // update the max size of older generations after a major GC
  resize_generations();
  
692 693
  // Calculate the amount of live data for stats.
  live = calcLiveWords();
694

695 696
  // Free the small objects allocated via allocate(), since this will
  // all have been copied into G0S1 now.  
697 698 699 700 701 702
  if (RtsFlags.GcFlags.generations > 1) {
      if (g0s0->blocks != NULL) {
          freeChain(g0s0->blocks);
          g0s0->blocks = NULL;
      }
      g0s0->n_blocks = 0;
703
      g0s0->n_words = 0;
704 705 706 707 708 709 710
  }
  alloc_blocks = 0;
  alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;

  // Start a new pinned_object_block
  pinned_object_block = NULL;

711
  // Free the mark stack.
712 713 714 715
  if (mark_stack_bdescr != NULL) {
      freeGroup(mark_stack_bdescr);
  }

716
  // Free any bitmaps.
717 718 719 720 721 722 723 724 725 726
  for (g = 0; g <= N; g++) {
      for (s = 0; s < generations[g].n_steps; s++) {
	  stp = &generations[g].steps[s];
	  if (stp->bitmap != NULL) {
	      freeGroup(stp->bitmap);
	      stp->bitmap = NULL;
	  }
      }
  }

727
  resize_nursery();
728 729 730 731 732 733 734 735 736

 // mark the garbage collected CAFs as dead 
#if 0 && defined(DEBUG) // doesn't work at the moment 
  if (major_gc) { gcCAFs(); }
#endif
  
#ifdef PROFILING
  // resetStaticObjectForRetainerProfiling() must be called before
  // zeroing below.
737 738 739 740 741
  if (n_gc_threads > 1) {
      barf("profiling is currently broken with multi-threaded GC");
      // ToDo: fix the gct->scavenged_static_objects below
  }
  resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
742 743 744 745
#endif

  // zero the scavenged static object list 
  if (major_gc) {
746 747 748 749
      nat i;
      for (i = 0; i < n_gc_threads; i++) {
          zero_static_object_list(gc_threads[i]->scavenged_static_objects);
      }
750 751 752 753 754 755 756
  }

  // Reset the nursery
  resetNurseries();

  // start any pending finalizers 
  RELEASE_SM_LOCK;
757
  scheduleFinalizers(cap, old_weak_ptr_list);
758 759 760 761 762
  ACQUIRE_SM_LOCK;
  
  // send exceptions to any threads which were about to die 
  RELEASE_SM_LOCK;
  resurrectThreads(resurrected_threads);
763
  performPendingThrowTos(exception_threads);
764 765 766 767 768 769 770 771 772
  ACQUIRE_SM_LOCK;

  // Update the stable pointer hash table.
  updateStablePtrTable(major_gc);

  // check sanity after GC 
  IF_DEBUG(sanity, checkSanity());

  // extra GC trace info 
Simon Marlow's avatar
Simon Marlow committed
773
  IF_DEBUG(gc, statDescribeGens());
774 775 776 777 778 779 780 781 782 783 784 785 786

#ifdef DEBUG
  // symbol-table based profiling 
  /*  heapCensus(to_blocks); */ /* ToDo */
#endif

  // restore enclosing cost centre 
#ifdef PROFILING
  CCCS = prev_CCS;
#endif

#ifdef DEBUG
  // check for memory leaks if DEBUG is on 
787
  memInventory(traceClass(DEBUG_gc));
788 789 790 791 792 793 794 795 796
#endif

#ifdef RTS_GTK_FRONTPANEL
  if (RtsFlags.GcFlags.frontpanel) {
      updateFrontPanelAfterGC( N, live );
  }
#endif

  // ok, GC over: tell the stats department what happened. 
797 798
  slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
  stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
799

800 801 802
  // unlock the StablePtr table
  stablePtrPostGC();

803 804 805
  // Guess which generation we'll collect *next* time
  initialise_N(force_major_gc);

806
#if defined(RTS_USER_SIGNALS)
807 808 809 810
  if (RtsFlags.MiscFlags.install_signal_handlers) {
    // unblock signals again
    unblockUserSignals();
  }
811 812 813
#endif

  RELEASE_SM_LOCK;
814

815
  SET_GCT(saved_gct);
816 817
}

818 819
/* -----------------------------------------------------------------------------
   Figure out which generation to collect, initialise N and major_gc.
820 821 822

   Also returns the total number of blocks in generations that will be
   collected.
823 824
   -------------------------------------------------------------------------- */

825
static nat
826 827
initialise_N (rtsBool force_major_gc)
{
828 829 830 831 832
    int g;
    nat s, blocks, blocks_total;

    blocks = 0;
    blocks_total = 0;
833 834

    if (force_major_gc) {
835
        N = RtsFlags.GcFlags.generations - 1;
836
    } else {
837
        N = 0;
838
    }
839 840 841 842

    for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
        blocks = 0;
        for (s = 0; s < generations[g].n_steps; s++) {
843
            blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
844 845 846 847 848 849 850 851 852 853 854 855 856 857
            blocks += generations[g].steps[s].n_large_blocks;
        }
        if (blocks >= generations[g].max_blocks) {
            N = stg_max(N,g);
        }
        if ((nat)g <= N) {
            blocks_total += blocks;
        }
    }

    blocks_total += countNurseryBlocks();

    major_gc = (N == RtsFlags.GcFlags.generations-1);
    return blocks_total;
858 859 860 861 862 863
}

/* -----------------------------------------------------------------------------
   Initialise the gc_thread structures.
   -------------------------------------------------------------------------- */

864 865 866 867 868
#define GC_THREAD_INACTIVE             0
#define GC_THREAD_STANDING_BY          1
#define GC_THREAD_RUNNING              2
#define GC_THREAD_WAITING_TO_CONTINUE  3

869 870
static void
new_gc_thread (nat n, gc_thread *t)
871
{
872
    nat s;
873 874
    step_workspace *ws;

Simon Marlow's avatar
Simon Marlow committed
875 876
#ifdef THREADED_RTS
    t->id = 0;
877 878 879 880
    initSpinLock(&t->gc_spin);
    initSpinLock(&t->mut_spin);
    ACQUIRE_SPIN_LOCK(&t->gc_spin);
    t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
881
                          // thread to start up, see wakeup_gc_threads
Simon Marlow's avatar
Simon Marlow committed
882 883
#endif

884 885 886 887 888 889
    t->thread_index = n;
    t->free_blocks = NULL;
    t->gc_count = 0;

    init_gc_thread(t);
    
890 891 892 893
#ifdef USE_PAPI
    t->papi_events = -1;
#endif

894
    for (s = 0; s < total_steps; s++)
895
    {
896
        ws = &t->steps[s];
897 898
        ws->step = &all_steps[s];
        ASSERT(s == ws->step->abs_no);
899
        ws->my_gct = t;
900 901
        
        ws->todo_bd = NULL;
902 903 904
        ws->todo_q = newWSDeque(128);
        ws->todo_overflow = NULL;
        ws->n_todo_overflow = 0;
905
        
906 907 908
        ws->part_list = NULL;
        ws->n_part_blocks = 0;

909 910
        ws->scavd_list = NULL;
        ws->n_scavd_blocks = 0;
911 912 913 914
    }
}


915 916
void
initGcThreads (void)
917 918 919 920
{
    if (gc_threads == NULL) {
#if defined(THREADED_RTS)
        nat i;
921
	gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes * 
922
				     sizeof(gc_thread*), 
923 924
				     "alloc_gc_threads");

925
	for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
926 927 928 929 930
            gc_threads[i] = 
                stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
                               "alloc_gc_threads");

            new_gc_thread(i, gc_threads[i]);
931 932
	}
#else
933 934 935
        gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
	gc_threads[0] = gct;
        new_gc_thread(0,gc_threads[0]);
936 937 938 939
#endif
    }
}

Simon Marlow's avatar
Simon Marlow committed
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
/* ----------------------------------------------------------------------------
   Start GC threads
   ------------------------------------------------------------------------- */

static nat gc_running_threads;

#if defined(THREADED_RTS)
static Mutex gc_running_mutex;
#endif

static nat
inc_running (void)
{
    nat n_running;
    ACQUIRE_LOCK(&gc_running_mutex);
    n_running = ++gc_running_threads;
    RELEASE_LOCK(&gc_running_mutex);
957
    ASSERT(n_running <= n_gc_threads);
Simon Marlow's avatar
Simon Marlow committed
958 959 960 961 962 963 964 965
    return n_running;
}

static nat
dec_running (void)
{
    nat n_running;
    ACQUIRE_LOCK(&gc_running_mutex);
966
    ASSERT(n_gc_threads != 0);
Simon Marlow's avatar
Simon Marlow committed
967 968 969 970 971
    n_running = --gc_running_threads;
    RELEASE_LOCK(&gc_running_mutex);
    return n_running;
}

972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
static rtsBool
any_work (void)
{
    int s;
    step_workspace *ws;

    gct->any_work++;

    write_barrier();

    // scavenge objects in compacted generation
    if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
	(mark_stack_bdescr != NULL && !mark_stack_empty())) {
	return rtsTrue;
    }
    
    // Check for global work in any step.  We don't need to check for
    // local work, because we have already exited scavenge_loop(),
    // which means there is no local work for this thread.
    for (s = total_steps-1; s >= 0; s--) {
        if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
            continue; 
        }
        ws = &gct->steps[s];
        if (ws->todo_large_objects) return rtsTrue;
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
        if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
        if (ws->todo_overflow) return rtsTrue;
    }

#if defined(THREADED_RTS)
    if (work_stealing) {
        nat n;
        // look for work to steal
        for (n = 0; n < n_gc_threads; n++) {
            if (n == gct->thread_index) continue;
            for (s = total_steps-1; s >= 0; s--) {
                ws = &gc_threads[n]->steps[s];
                if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
            }
        }
1012
    }
1013
#endif
1014 1015 1016 1017 1018 1019

    gct->no_work++;

    return rtsFalse;
}    

Simon Marlow's avatar
Simon Marlow committed
1020
static void
1021
scavenge_until_all_done (void)
Simon Marlow's avatar
Simon Marlow committed
1022 1023 1024 1025 1026 1027
{
    nat r;
	
    debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);

loop:
1028 1029 1030 1031 1032 1033 1034
#if defined(THREADED_RTS)
    if (n_gc_threads > 1) {
        scavenge_loop();
    } else {
        scavenge_loop1();
    }
#else
Simon Marlow's avatar
Simon Marlow committed
1035
    scavenge_loop();
1036 1037
#endif

Simon Marlow's avatar
Simon Marlow committed
1038 1039 1040 1041
    // scavenge_loop() only exits when there's no work to do
    r = dec_running();
    
    debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", 
1042 1043
               gct->thread_index, r);
    
Simon Marlow's avatar
Simon Marlow committed
1044
    while (gc_running_threads != 0) {
1045
        // usleep(1);
1046 1047 1048 1049 1050 1051 1052 1053
        if (any_work()) {
            inc_running();
            goto loop;
        }
        // any_work() does not remove the work from the queue, it
        // just checks for the presence of work.  If we find any,
        // then we increment gc_running_threads and go back to 
        // scavenge_loop() to perform any pending work.
Simon Marlow's avatar
Simon Marlow committed
1054 1055 1056 1057 1058 1059 1060
    }
    
    // All threads are now stopped
    debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
}

#if defined(THREADED_RTS)
1061 1062 1063

void
gcWorkerThread (Capability *cap)
1064
{
1065 1066 1067 1068
    cap->in_gc = rtsTrue;

    gct = gc_threads[cap->no];
    gct->id = osThreadId();
1069

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
    // Wait until we're told to wake up
    RELEASE_SPIN_LOCK(&gct->mut_spin);
    gct->wakeup = GC_THREAD_STANDING_BY;
    debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
    ACQUIRE_SPIN_LOCK(&gct->gc_spin);
    
#ifdef USE_PAPI
    // start performance counters in this thread...
    if (gct->papi_events == -1) {
        papi_init_eventset(&gct->papi_events);
    }
    papi_thread_start_gc1_count(gct->papi_events);
#endif
    
1084 1085
    // Every thread evacuates some roots.
    gct->evac_step = 0;
1086 1087
    markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
                         rtsTrue/*prune sparks*/);
1088
    scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1089 1090

    scavenge_until_all_done();
1091
    
1092
#ifdef USE_PAPI
1093 1094
    // count events in this thread towards the GC totals
    papi_thread_stop_gc1_count(gct->papi_events);
1095 1096
#endif

1097 1098 1099 1100 1101 1102 1103 1104
    // Wait until we're told to continue
    RELEASE_SPIN_LOCK(&gct->gc_spin);
    gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
    debugTrace(DEBUG_gc, "GC thread %d waiting to continue...", 
               gct->thread_index);
    ACQUIRE_SPIN_LOCK(&gct->mut_spin);
    debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
}
1105