GC.c 48.9 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team 1998-2008
4 5 6
 *
 * Generational garbage collector
 *
7 8 9 10 11
 * Documentation on the architecture of the Garbage Collector can be
 * found in the online commentary:
 * 
 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
 *
12 13
 * ---------------------------------------------------------------------------*/

Simon Marlow's avatar
Simon Marlow committed
14
#include "PosixSource.h"
15
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
16 17 18
#include "HsFFI.h"

#include "Storage.h"
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include "RtsUtils.h"
#include "Apply.h"
#include "Updates.h"
#include "Stats.h"
#include "Schedule.h"
#include "Sanity.h"
#include "BlockAlloc.h"
#include "ProfHeap.h"
#include "Weak.h"
#include "Prelude.h"
#include "RtsSignals.h"
#include "STM.h"
#if defined(RTS_GTK_FRONTPANEL)
#include "FrontPanel.h"
#endif
#include "Trace.h"
#include "RetainerProfile.h"
Simon Marlow's avatar
Simon Marlow committed
36
#include "LdvProfile.h"
37
#include "RaiseAsync.h"
38
#include "Papi.h"
Simon Marlow's avatar
Simon Marlow committed
39
#include "Stable.h"
40 41

#include "GC.h"
42
#include "GCThread.h"
Simon Marlow's avatar
Simon Marlow committed
43
#include "GCTDecl.h"
44 45 46 47
#include "Compact.h"
#include "Evac.h"
#include "Scav.h"
#include "GCUtils.h"
48
#include "MarkStack.h"
49
#include "MarkWeak.h"
Simon Marlow's avatar
Simon Marlow committed
50
#include "Sparks.h"
51
#include "Sweep.h"
52 53

#include <string.h> // for memset()
54
#include <unistd.h>
55

56 57 58 59
/* -----------------------------------------------------------------------------
   Global variables
   -------------------------------------------------------------------------- */

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
/* STATIC OBJECT LIST.
 *
 * During GC:
 * We maintain a linked list of static objects that are still live.
 * The requirements for this list are:
 *
 *  - we need to scan the list while adding to it, in order to
 *    scavenge all the static objects (in the same way that
 *    breadth-first scavenging works for dynamic objects).
 *
 *  - we need to be able to tell whether an object is already on
 *    the list, to break loops.
 *
 * Each static object has a "static link field", which we use for
 * linking objects on to the list.  We use a stack-type list, consing
 * objects on the front as they are added (this means that the
 * scavenge phase is depth-first, not breadth-first, but that
 * shouldn't matter).  
 *
 * A separate list is kept for objects that have been scavenged
 * already - this is so that we can zero all the marks afterwards.
 *
 * An object is on the list if its static link field is non-zero; this
 * means that we have to mark the end of the list with '1', not NULL.  
 *
 * Extra notes for generational GC:
 *
 * Each generation has a static object list associated with it.  When
 * collecting generations up to N, we treat the static object lists
 * from generations > N as roots.
 *
 * We build up a static object list while collecting generations 0..N,
 * which is then appended to the static object list of generation N+1.
 */

/* N is the oldest generation being collected, where the generations
 * are numbered starting at 0.  A major GC (indicated by the major_gc
 * flag) is when we're collecting all generations.  We only attempt to
 * deal with static objects and GC CAFs when doing a major GC.
 */
nat N;
rtsBool major_gc;

/* Data used for allocation area sizing.
 */
Simon Marlow's avatar
Simon Marlow committed
105
static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC 
106 107 108 109 110

/* Mut-list stats */
#ifdef DEBUG
nat mutlist_MUTVARS,
    mutlist_MUTARRS,
111
    mutlist_MVARS,
112 113 114
    mutlist_OTHERS;
#endif

115 116
/* Thread-local data for each GC thread
 */
117
gc_thread **gc_threads = NULL;
118 119

#if !defined(THREADED_RTS)
Simon Marlow's avatar
Simon Marlow committed
120
StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121
#endif
122

123 124 125 126
// Number of threads running in *this* GC.  Affects how many
// step->todos[] lists we have to look in to find work.
nat n_gc_threads;

127 128 129
// For stats:
long copied;        // *words* copied & scavenged during this GC

130 131
rtsBool work_stealing;

132 133
DECLARE_GCT

134 135 136 137
/* -----------------------------------------------------------------------------
   Static function declarations
   -------------------------------------------------------------------------- */

138
static void mark_root               (void *user, StgClosure **root);
139
static void zero_static_object_list (StgClosure* first_static);
140
static nat  initialise_N            (rtsBool force_major_gc);
Simon Marlow's avatar
Simon Marlow committed
141 142
static void prepare_collected_gen   (generation *gen);
static void prepare_uncollected_gen (generation *gen);
143 144 145
static void init_gc_thread          (gc_thread *t);
static void resize_generations      (void);
static void resize_nursery          (void);
Simon Marlow's avatar
Simon Marlow committed
146
static void start_gc_threads        (void);
147
static void scavenge_until_all_done (void);
148 149
static StgWord inc_running          (void);
static StgWord dec_running          (void);
Simon Marlow's avatar
Simon Marlow committed
150 151
static void wakeup_gc_threads       (nat me);
static void shutdown_gc_threads     (nat me);
Simon Marlow's avatar
Simon Marlow committed
152
static void collect_gct_blocks      (void);
153 154

#if 0 && defined(DEBUG)
155
static void gcCAFs                  (void);
156 157 158
#endif

/* -----------------------------------------------------------------------------
159
   The mark stack.
160 161
   -------------------------------------------------------------------------- */

162 163 164
bdescr *mark_stack_top_bd; // topmost block in the mark stack
bdescr *mark_stack_bd;     // current block in the mark stack
StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
165 166

/* -----------------------------------------------------------------------------
167
   GarbageCollect: the main entry point to the garbage collector.
168 169 170 171 172

   Locks held: all capabilities are held throughout GarbageCollect().
   -------------------------------------------------------------------------- */

void
173 174
GarbageCollect (rtsBool force_major_gc, 
                nat gc_type USED_IF_THREADS,
175
                Capability *cap)
176 177
{
  bdescr *bd;
Simon Marlow's avatar
Simon Marlow committed
178
  generation *gen;
Simon Marlow's avatar
Simon Marlow committed
179
  lnat live_blocks, live_words, allocated, max_copied, avg_copied;
Ian Lynagh's avatar
Ian Lynagh committed
180
#if defined(THREADED_RTS)
181
  gc_thread *saved_gct;
Ian Lynagh's avatar
Ian Lynagh committed
182
#endif
Simon Marlow's avatar
Simon Marlow committed
183
  nat g, n;
184

185
  // necessary if we stole a callee-saves register for gct:
Ian Lynagh's avatar
Ian Lynagh committed
186
#if defined(THREADED_RTS)
187
  saved_gct = gct;
Ian Lynagh's avatar
Ian Lynagh committed
188
#endif
189

190 191 192 193
#ifdef PROFILING
  CostCentreStack *prev_CCS;
#endif

194 195
  ACQUIRE_SM_LOCK;

196
#if defined(RTS_USER_SIGNALS)
197 198 199 200
  if (RtsFlags.MiscFlags.install_signal_handlers) {
    // block signals
    blockUserSignals();
  }
201 202
#endif

Simon Marlow's avatar
Simon Marlow committed
203 204
  ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
  // otherwise adjust the padding in gen_workspace.
205

Simon Marlow's avatar
Simon Marlow committed
206 207
  // this is the main thread
  SET_GCT(gc_threads[cap->no]);
208

Simon Marlow's avatar
Simon Marlow committed
209 210
  // tell the stats department that we've started a GC 
  stat_startGC(gct);
simonmar@microsoft.com's avatar
simonmar@microsoft.com committed
211

212 213 214
  // lock the StablePtr table
  stablePtrPreGC();

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
#ifdef DEBUG
  mutlist_MUTVARS = 0;
  mutlist_MUTARRS = 0;
  mutlist_OTHERS = 0;
#endif

  // attribute any costs to CCS_GC 
#ifdef PROFILING
  prev_CCS = CCCS;
  CCCS = CCS_GC;
#endif

  /* Approximate how much we allocated.  
   * Todo: only when generating stats? 
   */
230
  allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
231 232 233

  /* Figure out which generation to collect
   */
234
  n = initialise_N(force_major_gc);
235

236
#if defined(THREADED_RTS)
237 238
  work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
                  N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
239 240 241 242 243 244 245 246 247 248 249
      // It's not always a good idea to do load balancing in parallel
      // GC.  In particular, for a parallel program we don't want to
      // lose locality by moving cached data into another CPU's cache
      // (this effect can be quite significant). 
      //
      // We could have a more complex way to deterimine whether to do
      // work stealing or not, e.g. it might be a good idea to do it
      // if the heap is big.  For now, we just turn it on or off with
      // a flag.
#endif

Simon Marlow's avatar
Simon Marlow committed
250 251 252 253
  /* Start threads, so they can be spinning up while we finish initialisation.
   */
  start_gc_threads();

254
#if defined(THREADED_RTS)
255
  /* How many threads will be participating in this GC?
256 257
   * We don't try to parallelise minor GCs (unless the user asks for
   * it with +RTS -gn0), or mark/compact/sweep GC.
258
   */
259 260
  if (gc_type == PENDING_GC_PAR) {
      n_gc_threads = RtsFlags.ParFlags.nNodes;
261
  } else {
262
      n_gc_threads = 1;
263
  }
264
#else
265
  n_gc_threads = 1;
266
#endif
267

Simon Marlow's avatar
Simon Marlow committed
268
  debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
269
        N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
270 271 272 273 274 275 276

#ifdef RTS_GTK_FRONTPANEL
  if (RtsFlags.GcFlags.frontpanel) {
      updateFrontPanelBeforeGC(N);
  }
#endif

277 278
#ifdef DEBUG
  // check for memory leaks if DEBUG is on 
279
  memInventory(DEBUG_gc);
280 281
#endif

282
  // check sanity *before* GC
Simon Marlow's avatar
Simon Marlow committed
283
  IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
284

285
  // Initialise all the generations/steps that we're collecting.
286
  for (g = 0; g <= N; g++) {
Simon Marlow's avatar
Simon Marlow committed
287
      prepare_collected_gen(&generations[g]);
288
  }
289
  // Initialise all the generations/steps that we're *not* collecting.
290
  for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
291
      prepare_uncollected_gen(&generations[g]);
292 293
  }

Simon Marlow's avatar
Simon Marlow committed
294 295 296
  // Prepare this gc_thread
  init_gc_thread(gct);

297 298
  /* Allocate a mark stack if we're doing a major collection.
   */
Simon Marlow's avatar
Simon Marlow committed
299
  if (major_gc && oldest_gen->mark) {
300 301 302 303 304
      mark_stack_bd     = allocBlock();
      mark_stack_top_bd = mark_stack_bd;
      mark_stack_bd->link = NULL;
      mark_stack_bd->u.back = NULL;
      mark_sp           = mark_stack_bd->start;
305
  } else {
306 307 308
      mark_stack_bd     = NULL;
      mark_stack_top_bd = NULL;
      mark_sp           = NULL;
309 310 311 312 313
  }

  /* -----------------------------------------------------------------------
   * follow all the roots that we know about:
   */
314 315 316 317 318 319

  // the main thread is running: this prevents any other threads from
  // exiting prematurely, so we can start them now.
  // NB. do this after the mutable lists have been saved above, otherwise
  // the other GC threads will be writing into the old mutable lists.
  inc_running();
Simon Marlow's avatar
Simon Marlow committed
320 321 322
  wakeup_gc_threads(gct->thread_index);

  traceEventGcWork(gct->cap);
323

324 325 326 327 328 329
  // scavenge the capability-private mutable lists.  This isn't part
  // of markSomeCapabilities() because markSomeCapabilities() can only
  // call back into the GC via mark_root() (due to the gct register
  // variable).
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
Simon Marlow's avatar
Simon Marlow committed
330 331 332
#if defined(THREADED_RTS)
          scavenge_capability_mut_Lists1(&capabilities[n]);
#else
333
          scavenge_capability_mut_lists(&capabilities[n]);
Simon Marlow's avatar
Simon Marlow committed
334
#endif
335 336
      }
  } else {
Simon Marlow's avatar
Simon Marlow committed
337
      scavenge_capability_mut_lists(gct->cap);
338 339
  }

340
  // follow roots from the CAF list (used by GHCi)
Simon Marlow's avatar
Simon Marlow committed
341
  gct->evac_gen_no = 0;
342
  markCAFs(mark_root, gct);
343

344
  // follow all the roots that the application knows about.
Simon Marlow's avatar
Simon Marlow committed
345
  gct->evac_gen_no = 0;
Simon Marlow's avatar
Simon Marlow committed
346 347 348 349 350 351 352 353 354 355
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
          markCapability(mark_root, gct, &capabilities[n],
                         rtsTrue/*don't mark sparks*/);
      }
  } else {
      markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
  }

  markScheduler(mark_root, gct);
356

357 358
#if defined(RTS_USER_SIGNALS)
  // mark the signal handlers (signals should be already blocked)
359
  markSignalHandlers(mark_root, gct);
360 361
#endif

362
  // Mark the weak pointer list, and prepare to detect dead weak pointers.
363 364 365
  markWeakPtrList();
  initWeakForGC();

366
  // Mark the stable pointer table.
367
  markStablePtrTable(mark_root, gct);
368 369 370 371 372

  /* -------------------------------------------------------------------------
   * Repeatedly scavenge all the areas we know about until there's no
   * more scavenging to be done.
   */
Simon Marlow's avatar
Simon Marlow committed
373 374
  for (;;)
  {
375
      scavenge_until_all_done();
Simon Marlow's avatar
Simon Marlow committed
376 377 378 379 380 381 382 383 384
      // The other threads are now stopped.  We might recurse back to
      // here, but from now on this is the only thread.
      
      // must be last...  invariant is that everything is fully
      // scavenged at this point.
      if (traverseWeakPtrList()) { // returns rtsTrue if evaced something 
	  inc_running();
	  continue;
      }
385

Simon Marlow's avatar
Simon Marlow committed
386 387
      // If we get to here, there's really nothing left to do.
      break;
388 389
  }

Simon Marlow's avatar
Simon Marlow committed
390
  shutdown_gc_threads(gct->thread_index);
391

392 393 394
  // Now see which stable names are still alive.
  gcStablePtrTable();

395 396 397 398 399 400
#ifdef THREADED_RTS
  if (n_gc_threads == 1) {
      for (n = 0; n < n_capabilities; n++) {
          pruneSparkQueue(&capabilities[n]);
      }
  } else {
Simon Marlow's avatar
Simon Marlow committed
401
      pruneSparkQueue(gct->cap);
402 403 404
  }
#endif

405 406 407 408 409 410 411 412 413 414
#ifdef PROFILING
  // We call processHeapClosureForDead() on every closure destroyed during
  // the current garbage collection, so we invoke LdvCensusForDead().
  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
      || RtsFlags.ProfFlags.bioSelector != NULL)
    LdvCensusForDead(N);
#endif

  // NO MORE EVACUATION AFTER THIS POINT!

415
  // Finally: compact or sweep the oldest generation.
Simon Marlow's avatar
Simon Marlow committed
416 417
  if (major_gc && oldest_gen->mark) {
      if (oldest_gen->compact) 
418 419
          compact(gct->scavenged_static_objects);
      else
Simon Marlow's avatar
Simon Marlow committed
420
          sweep(oldest_gen);
421 422
  }

423
  copied = 0;
424 425
  max_copied = 0;
  avg_copied = 0;
426 427 428
  { 
      nat i;
      for (i=0; i < n_gc_threads; i++) {
429
          if (n_gc_threads > 1) {
Simon Marlow's avatar
Simon Marlow committed
430 431 432 433 434 435
              debugTrace(DEBUG_gc,"thread %d:", i);
              debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
              debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
              debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
              debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
              debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
436 437
          }
          copied += gc_threads[i]->copied;
438 439 440 441 442 443 444
          max_copied = stg_max(gc_threads[i]->copied, max_copied);
      }
      if (n_gc_threads == 1) {
          max_copied = 0;
          avg_copied = 0;
      } else {
          avg_copied = copied;
445 446 447
      }
  }

Simon Marlow's avatar
Simon Marlow committed
448 449 450 451 452 453 454 455 456 457
  // Run through all the generations/steps and tidy up.
  // We're going to:
  //   - count the amount of "live" data (live_words, live_blocks)
  //   - count the amount of "copied" data in this GC (copied)
  //   - free from-space
  //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
  //
  live_words = 0;
  live_blocks = 0;

458 459
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {

460
    if (g == N) {
461
      generations[g].collections++; // for stats 
462
      if (n_gc_threads > 1) generations[g].par_collections++;
463 464 465 466 467 468
    }

    // Count the mutable list as bytes "copied" for the purposes of
    // stats.  Every mutable list is copied during every GC.
    if (g > 0) {
	nat mut_list_size = 0;
469
        for (n = 0; n < n_capabilities; n++) {
470
            mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
471
        }
472 473 474
	copied +=  mut_list_size;

	debugTrace(DEBUG_gc,
475
		   "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
476
		   (unsigned long)(mut_list_size * sizeof(W_)),
477
		   mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
478 479
    }

Simon Marlow's avatar
Simon Marlow committed
480 481
    bdescr *next, *prev;
    gen = &generations[g];
482

Simon Marlow's avatar
Simon Marlow committed
483 484
    // for generations we collected... 
    if (g <= N) {
485 486 487 488 489

	/* free old memory and shift to-space into from-space for all
	 * the collected steps (except the allocation area).  These
	 * freed blocks will probaby be quickly recycled.
	 */
Simon Marlow's avatar
Simon Marlow committed
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
        if (gen->mark)
        {
            // tack the new blocks on the end of the existing blocks
            if (gen->old_blocks != NULL) {
                
                prev = NULL;
                for (bd = gen->old_blocks; bd != NULL; bd = next) {
                    
                    next = bd->link;
                    
                    if (!(bd->flags & BF_MARKED))
                    {
                        if (prev == NULL) {
                            gen->old_blocks = next;
                        } else {
                            prev->link = next;
506
                        }
Simon Marlow's avatar
Simon Marlow committed
507 508
                        freeGroup(bd);
                        gen->n_old_blocks--;
509
                    }
Simon Marlow's avatar
Simon Marlow committed
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
                    else
                    {
                        gen->n_words += bd->free - bd->start;
                        
                        // NB. this step might not be compacted next
                        // time, so reset the BF_MARKED flags.
                        // They are set before GC if we're going to
                        // compact.  (search for BF_MARKED above).
                        bd->flags &= ~BF_MARKED;
                        
                        // between GCs, all blocks in the heap except
                        // for the nursery have the BF_EVACUATED flag set.
                        bd->flags |= BF_EVACUATED;
                        
                        prev = bd;
                    }
                }
527

Simon Marlow's avatar
Simon Marlow committed
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
                if (prev != NULL) {
                    prev->link = gen->blocks;
                    gen->blocks = gen->old_blocks;
                }
            }
            // add the new blocks to the block tally
            gen->n_blocks += gen->n_old_blocks;
            ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
            ASSERT(countOccupied(gen->blocks) == gen->n_words);
        }
        else // not copacted
        {
            freeChain(gen->old_blocks);
        }

        gen->old_blocks = NULL;
        gen->n_old_blocks = 0;

        /* LARGE OBJECTS.  The current live large objects are chained on
         * scavenged_large, having been moved during garbage
         * collection from large_objects.  Any objects left on the
         * large_objects list are therefore dead, so we free them here.
         */
        freeChain(gen->large_objects);
        gen->large_objects  = gen->scavenged_large_objects;
        gen->n_large_blocks = gen->n_scavenged_large_blocks;
554
        gen->n_new_large_words = 0;
Simon Marlow's avatar
Simon Marlow committed
555 556 557
    }
    else // for generations > N
    {
558 559 560 561
	/* For older generations, we need to append the
	 * scavenged_large_object list (i.e. large objects that have been
	 * promoted during this GC) to the large_object list for that step.
	 */
Simon Marlow's avatar
Simon Marlow committed
562 563 564
	for (bd = gen->scavenged_large_objects; bd; bd = next) {
            next = bd->link;
            dbl_link_onto(bd, &gen->large_objects);
565
	}
Simon Marlow's avatar
Simon Marlow committed
566
        
567
	// add the new blocks we promoted during this GC 
Simon Marlow's avatar
Simon Marlow committed
568
	gen->n_large_blocks += gen->n_scavenged_large_blocks;
Simon Marlow's avatar
Simon Marlow committed
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
    }

    ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);

    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;

    // Count "live" data
    live_words  += genLiveWords(gen);
    live_blocks += genLiveBlocks(gen);

    // add in the partial blocks in the gen_workspaces, but ignore gen 0
    // if this is a local GC (we can't count another capability's part_list)
    {
        nat i;
        for (i = 0; i < n_capabilities; i++) {
            live_words  += gcThreadLiveWords(i, gen->no);
            live_blocks += gcThreadLiveBlocks(i, gen->no);
        }
588
    }
Simon Marlow's avatar
Simon Marlow committed
589
  } // for all generations
590

591 592 593 594
  // update the max size of older generations after a major GC
  resize_generations();
  
  // Free the mark stack.
595 596 597 598
  if (mark_stack_top_bd != NULL) {
      debugTrace(DEBUG_gc, "mark stack: %d blocks",
                 countBlocks(mark_stack_top_bd));
      freeChain(mark_stack_top_bd);
599 600
  }

601
  // Free any bitmaps.
602
  for (g = 0; g <= N; g++) {
Simon Marlow's avatar
Simon Marlow committed
603 604 605 606
      gen = &generations[g];
      if (gen->bitmap != NULL) {
          freeGroup(gen->bitmap);
          gen->bitmap = NULL;
607 608 609
      }
  }

610 611 612
  // Reset the nursery: make the blocks empty
  allocated += clearNurseries();

613
  resize_nursery();
614

615 616 617
  resetNurseries();

 // mark the garbage collected CAFs as dead
618 619 620 621 622 623 624
#if 0 && defined(DEBUG) // doesn't work at the moment 
  if (major_gc) { gcCAFs(); }
#endif
  
#ifdef PROFILING
  // resetStaticObjectForRetainerProfiling() must be called before
  // zeroing below.
625 626 627 628 629
  if (n_gc_threads > 1) {
      barf("profiling is currently broken with multi-threaded GC");
      // ToDo: fix the gct->scavenged_static_objects below
  }
  resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
630 631 632 633
#endif

  // zero the scavenged static object list 
  if (major_gc) {
634
      nat i;
635 636 637 638 639 640
      if (n_gc_threads == 1) {
          zero_static_object_list(gct->scavenged_static_objects);
      } else {
          for (i = 0; i < n_gc_threads; i++) {
              zero_static_object_list(gc_threads[i]->scavenged_static_objects);
          }
641
      }
642 643 644 645 646
  }

  // Update the stable pointer hash table.
  updateStablePtrTable(major_gc);

647 648 649 650 651 652 653 654 655 656 657
  // unlock the StablePtr table.  Must be before scheduleFinalizers(),
  // because a finalizer may call hs_free_fun_ptr() or
  // hs_free_stable_ptr(), both of which access the StablePtr table.
  stablePtrPostGC();

  // Start any pending finalizers.  Must be after
  // updateStablePtrTable() and stablePtrPostGC() (see #4221).
  RELEASE_SM_LOCK;
  scheduleFinalizers(cap, old_weak_ptr_list);
  ACQUIRE_SM_LOCK;

Simon Marlow's avatar
Simon Marlow committed
658 659 660 661 662 663 664 665 666 667 668
  // check sanity after GC
  // before resurrectThreads(), because that might overwrite some
  // closures, which will cause problems with THREADED where we don't
  // fill slop.
  IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));

  // send exceptions to any threads which were about to die
  RELEASE_SM_LOCK;
  resurrectThreads(resurrected_threads);
  ACQUIRE_SM_LOCK;

669 670 671 672 673 674 675 676 677 678 679 680 681
  if (major_gc) {
      nat need, got;
      need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
      got = mblocks_allocated;
      /* If the amount of data remains constant, next major GC we'll
         require (F+1)*need. We leave (F+2)*need in order to reduce
         repeated deallocation and reallocation. */
      need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
      if (got > need) {
          returnMemoryToOS(got - need);
      }
  }

Simon Marlow's avatar
Simon Marlow committed
682
  // extra GC trace info
Simon Marlow's avatar
Simon Marlow committed
683
  IF_DEBUG(gc, statDescribeGens());
684 685 686 687 688 689 690 691 692 693 694 695 696

#ifdef DEBUG
  // symbol-table based profiling 
  /*  heapCensus(to_blocks); */ /* ToDo */
#endif

  // restore enclosing cost centre 
#ifdef PROFILING
  CCCS = prev_CCS;
#endif

#ifdef DEBUG
  // check for memory leaks if DEBUG is on 
697
  memInventory(DEBUG_gc);
698 699 700 701 702 703 704 705 706
#endif

#ifdef RTS_GTK_FRONTPANEL
  if (RtsFlags.GcFlags.frontpanel) {
      updateFrontPanelAfterGC( N, live );
  }
#endif

  // ok, GC over: tell the stats department what happened. 
Simon Marlow's avatar
Simon Marlow committed
707 708
  stat_endGC(gct, allocated, live_words,
             copied, N, max_copied, avg_copied,
Simon Marlow's avatar
Simon Marlow committed
709
             live_blocks * BLOCK_SIZE_W - live_words /* slop */);
710

711 712 713
  // Guess which generation we'll collect *next* time
  initialise_N(force_major_gc);

714
#if defined(RTS_USER_SIGNALS)
715 716 717 718
  if (RtsFlags.MiscFlags.install_signal_handlers) {
    // unblock signals again
    unblockUserSignals();
  }
719 720 721
#endif

  RELEASE_SM_LOCK;
722

723
  SET_GCT(saved_gct);
724 725
}

726 727
/* -----------------------------------------------------------------------------
   Figure out which generation to collect, initialise N and major_gc.
728 729 730

   Also returns the total number of blocks in generations that will be
   collected.
731 732
   -------------------------------------------------------------------------- */

733
static nat
734 735
initialise_N (rtsBool force_major_gc)
{
736
    int g;
Simon Marlow's avatar
Simon Marlow committed
737
    nat blocks, blocks_total;
738 739 740

    blocks = 0;
    blocks_total = 0;
741 742

    if (force_major_gc) {
743
        N = RtsFlags.GcFlags.generations - 1;
744
    } else {
745
        N = 0;
746
    }
747 748

    for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
Simon Marlow's avatar
Simon Marlow committed
749 750 751 752

        blocks = generations[g].n_words / BLOCK_SIZE_W
               + generations[g].n_large_blocks;

753 754 755 756 757 758 759 760 761 762 763 764
        if (blocks >= generations[g].max_blocks) {
            N = stg_max(N,g);
        }
        if ((nat)g <= N) {
            blocks_total += blocks;
        }
    }

    blocks_total += countNurseryBlocks();

    major_gc = (N == RtsFlags.GcFlags.generations-1);
    return blocks_total;
765 766 767 768 769 770
}

/* -----------------------------------------------------------------------------
   Initialise the gc_thread structures.
   -------------------------------------------------------------------------- */

771 772 773 774 775
#define GC_THREAD_INACTIVE             0
#define GC_THREAD_STANDING_BY          1
#define GC_THREAD_RUNNING              2
#define GC_THREAD_WAITING_TO_CONTINUE  3

776 777
static void
new_gc_thread (nat n, gc_thread *t)
778
{
Simon Marlow's avatar
Simon Marlow committed
779 780
    nat g;
    gen_workspace *ws;
781

Simon Marlow's avatar
Simon Marlow committed
782 783
    t->cap = &capabilities[n];

Simon Marlow's avatar
Simon Marlow committed
784 785
#ifdef THREADED_RTS
    t->id = 0;
786 787 788 789
    initSpinLock(&t->gc_spin);
    initSpinLock(&t->mut_spin);
    ACQUIRE_SPIN_LOCK(&t->gc_spin);
    t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
790
                          // thread to start up, see wakeup_gc_threads
Simon Marlow's avatar
Simon Marlow committed
791 792
#endif

793 794 795 796 797 798
    t->thread_index = n;
    t->free_blocks = NULL;
    t->gc_count = 0;

    init_gc_thread(t);
    
799 800 801 802
#ifdef USE_PAPI
    t->papi_events = -1;
#endif

Simon Marlow's avatar
Simon Marlow committed
803
    for (g = 0; g < RtsFlags.GcFlags.generations; g++)
804
    {
Simon Marlow's avatar
Simon Marlow committed
805 806 807
        ws = &t->gens[g];
        ws->gen = &generations[g];
        ASSERT(g == ws->gen->no);
808
        ws->my_gct = t;
809
        
Simon Marlow's avatar
Simon Marlow committed
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
        // We want to call
        //   alloc_todo_block(ws,0);
        // but can't, because it uses gct which isn't set up at this point.
        // Hence, allocate a block for todo_bd manually:
        {
            bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
            initBdescr(bd, ws->gen, ws->gen->to);
            bd->flags = BF_EVACUATED;
            bd->u.scan = bd->free = bd->start;

            ws->todo_bd = bd;
            ws->todo_free = bd->free;
            ws->todo_lim = bd->start + BLOCK_SIZE_W;
        }

825 826 827
        ws->todo_q = newWSDeque(128);
        ws->todo_overflow = NULL;
        ws->n_todo_overflow = 0;
828 829
        ws->todo_large_objects = NULL;

830 831 832
        ws->part_list = NULL;
        ws->n_part_blocks = 0;

833 834
        ws->scavd_list = NULL;
        ws->n_scavd_blocks = 0;
835 836 837 838
    }
}


839 840
void
initGcThreads (void)
841 842 843 844
{
    if (gc_threads == NULL) {
#if defined(THREADED_RTS)
        nat i;
845
	gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes * 
846
				     sizeof(gc_thread*), 
847 848
				     "alloc_gc_threads");

849
	for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
850
            gc_threads[i] = 
Simon Marlow's avatar
Simon Marlow committed
851 852
                stgMallocBytes(sizeof(gc_thread) + 
                               RtsFlags.GcFlags.generations * sizeof(gen_workspace),
853 854 855
                               "alloc_gc_threads");

            new_gc_thread(i, gc_threads[i]);
856 857
	}
#else
858 859 860
        gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
	gc_threads[0] = gct;
        new_gc_thread(0,gc_threads[0]);
861 862 863 864
#endif
    }
}

865 866 867
void
freeGcThreads (void)
{
Simon Marlow's avatar
Simon Marlow committed
868
    nat g;
869 870 871
    if (gc_threads != NULL) {
#if defined(THREADED_RTS)
        nat i;
872
	for (i = 0; i < n_capabilities; i++) {
Simon Marlow's avatar
Simon Marlow committed
873
            for (g = 0; g < RtsFlags.GcFlags.generations; g++)
874
            {
Simon Marlow's avatar
Simon Marlow committed
875
                freeWSDeque(gc_threads[i]->gens[g].todo_q);
876
            }
877 878 879 880
            stgFree (gc_threads[i]);
	}
        stgFree (gc_threads);
#else
Simon Marlow's avatar
Simon Marlow committed
881
        for (g = 0; g < RtsFlags.GcFlags.generations; g++)
882
        {
Simon Marlow's avatar
Simon Marlow committed
883
            freeWSDeque(gc_threads[0]->gens[g].todo_q);
884
        }
885 886 887 888 889 890
        stgFree (gc_threads);
#endif
        gc_threads = NULL;
    }
}

Simon Marlow's avatar
Simon Marlow committed
891 892 893 894
/* ----------------------------------------------------------------------------
   Start GC threads
   ------------------------------------------------------------------------- */

895
static volatile StgWord gc_running_threads;
Simon Marlow's avatar
Simon Marlow committed
896

897
static StgWord
Simon Marlow's avatar
Simon Marlow committed
898 899
inc_running (void)
{
900 901 902 903
    StgWord new;
    new = atomic_inc(&gc_running_threads);
    ASSERT(new <= n_gc_threads);
    return new;
Simon Marlow's avatar
Simon Marlow committed
904 905
}

906
static StgWord
Simon Marlow's avatar
Simon Marlow committed
907 908
dec_running (void)
{
909 910
    ASSERT(gc_running_threads != 0);
    return atomic_dec(&gc_running_threads);
Simon Marlow's avatar
Simon Marlow committed
911 912
}

913 914 915
static rtsBool
any_work (void)
{
Simon Marlow's avatar
Simon Marlow committed
916 917
    int g;
    gen_workspace *ws;
918 919 920 921 922 923

    gct->any_work++;

    write_barrier();

    // scavenge objects in compacted generation
924
    if (mark_stack_bd != NULL && !mark_stack_empty()) {
925 926 927 928 929 930
	return rtsTrue;
    }
    
    // Check for global work in any step.  We don't need to check for
    // local work, because we have already exited scavenge_loop(),
    // which means there is no local work for this thread.
Simon Marlow's avatar
Simon Marlow committed
931 932
    for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
        ws = &gct->gens[g];
933
        if (ws->todo_large_objects) return rtsTrue;
934 935 936 937 938 939 940 941 942 943
        if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
        if (ws->todo_overflow) return rtsTrue;
    }

#if defined(THREADED_RTS)
    if (work_stealing) {
        nat n;
        // look for work to steal
        for (n = 0; n < n_gc_threads; n++) {
            if (n == gct->thread_index) continue;
Simon Marlow's avatar
Simon Marlow committed
944 945
            for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
                ws = &gc_threads[n]->gens[g];
946 947 948
                if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
            }
        }
949
    }
950
#endif
951 952

    gct->no_work++;
953 954 955
#if defined(THREADED_RTS)
    yieldThread();
#endif
956 957 958 959

    return rtsFalse;
}    

Simon Marlow's avatar
Simon Marlow committed
960
static void
961
scavenge_until_all_done (void)
Simon Marlow's avatar
Simon Marlow committed
962
{
Ian Lynagh's avatar
Ian Lynagh committed
963
    DEBUG_ONLY( nat r );
Simon Marlow's avatar
Simon Marlow committed
964 965 966
	

loop:
967 968 969 970 971 972 973
#if defined(THREADED_RTS)
    if (n_gc_threads > 1) {
        scavenge_loop();
    } else {
        scavenge_loop1();
    }
#else
Simon Marlow's avatar
Simon Marlow committed
974
    scavenge_loop();
975 976
#endif

Simon Marlow's avatar
Simon Marlow committed
977 978
    collect_gct_blocks();

Simon Marlow's avatar
Simon Marlow committed
979
    // scavenge_loop() only exits when there's no work to do
Ian Lynagh's avatar
Ian Lynagh committed
980 981

#ifdef DEBUG
Simon Marlow's avatar
Simon Marlow committed
982
    r = dec_running();
Ian Lynagh's avatar
Ian Lynagh committed
983 984 985 986
#else
    dec_running();
#endif

Simon Marlow's avatar
Simon Marlow committed
987
    traceEventGcIdle(gct->cap);
988 989

    debugTrace(DEBUG_gc, "%d GC threads still running", r);
990
    
Simon Marlow's avatar
Simon Marlow committed
991
    while (gc_running_threads != 0) {
992
        // usleep(1);
993 994
        if (any_work()) {
            inc_running();
Simon Marlow's avatar
Simon Marlow committed
995
            traceEventGcWork(gct->cap);
996 997 998 999 1000 1001
            goto loop;
        }
        // any_work() does not remove the work from the queue, it
        // just checks for the presence of work.  If we find any,
        // then we increment gc_running_threads and go back to 
        // scavenge_loop() to perform any pending work.
Simon Marlow's avatar
Simon Marlow committed
1002 1003
    }
    
Simon Marlow's avatar
Simon Marlow committed
1004
    traceEventGcDone(gct->cap);
Simon Marlow's avatar
Simon Marlow committed
1005 1006 1007
}

#if defined(THREADED_RTS)
1008 1009 1010

void
gcWorkerThread (Capability *cap)
1011
{
1012 1013 1014 1015 1016
    gc_thread *saved_gct;

    // necessary if we stole a callee-saves register for gct:
    saved_gct = gct;

1017 1018
    gct = gc_threads[cap->no];
    gct->id = osThreadId();
1019

Simon Marlow's avatar
Simon Marlow committed
1020 1021
    stat_gcWorkerThreadStart(gct);

1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
    // Wait until we're told to wake up
    RELEASE_SPIN_LOCK(&gct->mut_spin);
    gct->wakeup = GC_THREAD_STANDING_BY;
    debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
    ACQUIRE_SPIN_LOCK(&gct->gc_spin);
    
#ifdef USE_PAPI
    // start performance counters in this thread...
    if (gct->papi_events == -1) {
        papi_init_eventset(&gct->papi_events);
    }
    papi_thread_start_gc1_count(gct->papi_events);
#endif
Simon Marlow's avatar
Simon Marlow committed
1035 1036 1037 1038 1039

    init_gc_thread(gct);

    traceEventGcWork(gct->cap);

1040
    // Every thread evacuates some roots.
Simon Marlow's avatar
Simon Marlow committed
1041
    gct->evac_gen_no = 0;
Simon Marlow's avatar
Simon Marlow committed
1042 1043
    markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
    scavenge_capability_mut_lists(cap);
1044 1045

    scavenge_until_all_done();
1046
    
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
#ifdef THREADED_RTS
    // Now that the whole heap is marked, we discard any sparks that
    // were found to be unreachable.  The main GC thread is currently
    // marking heap reachable via weak pointers, so it is
    // non-deterministic whether a spark will be retained if it is
    // only reachable via weak pointers.  To fix this problem would
    // require another GC barrier, which is too high a price.
    pruneSparkQueue(cap);
#endif

1057
#ifdef USE_PAPI
1058 1059
    // count events in this thread towards the GC totals
    papi_thread_stop_gc1_count(gct->papi_events);
1060 1061
#endif

1062 1063 1064 1065 1066 1067 1068
    // Wait until we're told to continue
    RELEASE_SPIN_LOCK(&gct->gc_spin);
    gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
    debugTrace(DEBUG_gc, "GC thread %d waiting to continue...", 
               gct->thread_index);
    ACQUIRE_SPIN_LOCK(&gct->mut_spin);
    debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1069

Simon Marlow's avatar
Simon Marlow committed
1070 1071 1072
    // record the time spent doing GC in the Task structure
    stat_gcWorkerThreadDone(gct);

1073
    SET_GCT(saved_gct);
1074
}
1075

Simon Marlow's avatar
Simon Marlow committed
1076 1077
#endif