GC.c 111 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 * $Id: GC.c,v 1.139 2002/09/05 16:26:33 simonmar Exp $
3
 *
4 5 6
 * (c) The GHC Team 1998-1999
 *
 * Generational garbage collector
7 8 9
 *
 * ---------------------------------------------------------------------------*/

10
#include "PosixSource.h"
11 12 13 14 15 16 17
#include "Rts.h"
#include "RtsFlags.h"
#include "RtsUtils.h"
#include "Storage.h"
#include "StoragePriv.h"
#include "Stats.h"
#include "Schedule.h"
18
#include "SchedAPI.h"		// for ReverCAFs prototype
19 20
#include "Sanity.h"
#include "BlockAlloc.h"
21
#include "MBlock.h"
22
#include "Main.h"
23
#include "ProfHeap.h"
24 25
#include "SchedAPI.h"
#include "Weak.h"
26
#include "StablePriv.h"
27
#include "Prelude.h"
28 29
#include "ParTicky.h"		// ToDo: move into Rts.h
#include "GCCompact.h"
30 31 32 33 34 35 36 37 38
#if defined(GRAN) || defined(PAR)
# include "GranSimRts.h"
# include "ParallelRts.h"
# include "FetchMe.h"
# if defined(DEBUG)
#  include "Printer.h"
#  include "ParallelDebug.h"
# endif
#endif
39 40
#include "HsFFI.h"
#include "Linker.h"
41 42 43
#if defined(RTS_GTK_FRONTPANEL)
#include "FrontPanel.h"
#endif
44

45 46 47
#include "RetainerProfile.h"
#include "LdvProfile.h"

48 49
#include <string.h>

50 51
/* STATIC OBJECT LIST.
 *
52
 * During GC:
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 * We maintain a linked list of static objects that are still live.
 * The requirements for this list are:
 *
 *  - we need to scan the list while adding to it, in order to
 *    scavenge all the static objects (in the same way that
 *    breadth-first scavenging works for dynamic objects).
 *
 *  - we need to be able to tell whether an object is already on
 *    the list, to break loops.
 *
 * Each static object has a "static link field", which we use for
 * linking objects on to the list.  We use a stack-type list, consing
 * objects on the front as they are added (this means that the
 * scavenge phase is depth-first, not breadth-first, but that
 * shouldn't matter).  
 *
 * A separate list is kept for objects that have been scavenged
 * already - this is so that we can zero all the marks afterwards.
 *
 * An object is on the list if its static link field is non-zero; this
 * means that we have to mark the end of the list with '1', not NULL.  
74 75 76 77 78 79 80 81 82 83
 *
 * Extra notes for generational GC:
 *
 * Each generation has a static object list associated with it.  When
 * collecting generations up to N, we treat the static object lists
 * from generations > N as roots.
 *
 * We build up a static object list while collecting generations 0..N,
 * which is then appended to the static object list of generation N+1.
 */
84 85
static StgClosure* static_objects;      // live static objects
StgClosure* scavenged_static_objects;   // static objects scavenged so far
86 87 88 89 90 91 92 93 94 95 96 97

/* N is the oldest generation being collected, where the generations
 * are numbered starting at 0.  A major GC (indicated by the major_gc
 * flag) is when we're collecting all generations.  We only attempt to
 * deal with static objects and GC CAFs when doing a major GC.
 */
static nat N;
static rtsBool major_gc;

/* Youngest generation that objects should be evacuated to in
 * evacuate().  (Logically an argument to evacuate, but it's static
 * a lot of the time so we optimise it into a global variable).
98
 */
99
static nat evac_gen;
100

101
/* Weak pointers
102
 */
103
StgWeak *old_weak_ptr_list; // also pending finaliser list
104 105 106 107 108 109

/* Which stage of processing various kinds of weak pointer are we at?
 * (see traverse_weak_ptr_list() below for discussion).
 */
typedef enum { WeakPtrs, WeakThreads, WeakDone } WeakStage;
static WeakStage weak_stage;
110

111 112 113
/* List of all threads during GC
 */
static StgTSO *old_all_threads;
114
StgTSO *resurrected_threads;
115

116 117
/* Flag indicating failure to evacuate an object to the desired
 * generation.
118
 */
119
static rtsBool failed_to_evac;
120

121 122
/* Old to-space (used for two-space collector only)
 */
123
static bdescr *old_to_blocks;
124

125 126
/* Data used for allocation area sizing.
 */
127 128
static lnat new_blocks;		 // blocks allocated during this GC 
static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC 
129

130 131
/* Used to avoid long recursion due to selector thunks
 */
132
static lnat thunk_selector_depth = 0;
133 134
#define MAX_THUNK_SELECTOR_DEPTH 256

135 136 137 138
/* -----------------------------------------------------------------------------
   Static function declarations
   -------------------------------------------------------------------------- */

139
static void         mark_root               ( StgClosure **root );
140 141 142
static StgClosure * evacuate                ( StgClosure *q );
static void         zero_static_object_list ( StgClosure* first_static );
static void         zero_mutable_list       ( StgMutClosure *first );
143

144
static rtsBool      traverse_weak_ptr_list  ( void );
145
static void         mark_weak_ptr_list      ( StgWeak **list );
146

147 148
static void         scavenge                ( step * );
static void         scavenge_mark_stack     ( void );
149
static void         scavenge_stack          ( StgPtr p, StgPtr stack_end );
150
static rtsBool      scavenge_one            ( StgPtr p );
151
static void         scavenge_large          ( step * );
152 153 154
static void         scavenge_static         ( void );
static void         scavenge_mutable_list   ( generation *g );
static void         scavenge_mut_once_list  ( generation *g );
155

156
#if 0 && defined(DEBUG)
157
static void         gcCAFs                  ( void );
158 159
#endif

160 161 162 163 164 165 166 167 168 169 170
/* -----------------------------------------------------------------------------
   inline functions etc. for dealing with the mark bitmap & stack.
   -------------------------------------------------------------------------- */

#define MARK_STACK_BLOCKS 4

static bdescr *mark_stack_bdescr;
static StgPtr *mark_stack;
static StgPtr *mark_sp;
static StgPtr *mark_splim;

171 172 173 174 175 176
// Flag and pointers used for falling back to a linear scan when the
// mark stack overflows.
static rtsBool mark_stack_overflowed;
static bdescr *oldgen_scan_bd;
static StgPtr  oldgen_scan;

177 178 179 180 181 182 183 184 185 186 187 188
static inline rtsBool
mark_stack_empty(void)
{
    return mark_sp == mark_stack;
}

static inline rtsBool
mark_stack_full(void)
{
    return mark_sp >= mark_splim;
}

189 190 191 192 193 194
static inline void
reset_mark_stack(void)
{
    mark_sp = mark_stack;
}

195 196 197 198 199
static inline void
push_mark_stack(StgPtr p)
{
    *mark_sp++ = p;
}
200

201 202 203 204 205
static inline StgPtr
pop_mark_stack(void)
{
    return *--mark_sp;
}
206

207 208 209
/* -----------------------------------------------------------------------------
   GarbageCollect

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
   For garbage collecting generation N (and all younger generations):

     - follow all pointers in the root set.  the root set includes all 
       mutable objects in all steps in all generations.

     - for each pointer, evacuate the object it points to into either
       + to-space in the next higher step in that generation, if one exists,
       + if the object's generation == N, then evacuate it to the next
         generation if one exists, or else to-space in the current
	 generation.
       + if the object's generation < N, then evacuate it to to-space
         in the next generation.

     - repeatedly scavenge to-space from each step in each generation
       being collected until no more objects can be evacuated.
      
     - free from-space in each step, and set from-space = to-space.

sof's avatar
sof committed
228 229
   Locks held: sched_mutex

230 231
   -------------------------------------------------------------------------- */

232 233
void
GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
234
{
235
  bdescr *bd;
236
  step *stp;
237
  lnat live, allocated, collected = 0, copied = 0;
238
  lnat oldgen_saved_blocks = 0;
239 240
  nat g, s;

241 242 243 244
#ifdef PROFILING
  CostCentreStack *prev_CCS;
#endif

245 246
#if defined(DEBUG) && defined(GRAN)
  IF_DEBUG(gc, belch("@@ Starting garbage collection at %ld (%lx)\n", 
247
		     Now, Now));
248 249
#endif

250
  // tell the stats department that we've started a GC 
251 252
  stat_startGC();

253
  // Init stats and print par specific (timing) info 
254 255
  PAR_TICKY_PAR_START();

256
  // attribute any costs to CCS_GC 
257 258 259 260 261
#ifdef PROFILING
  prev_CCS = CCCS;
  CCCS = CCS_GC;
#endif

262 263 264
  /* Approximate how much we allocated.  
   * Todo: only when generating stats? 
   */
265
  allocated = calcAllocated();
266 267 268

  /* Figure out which generation to collect
   */
269 270 271 272 273 274
  if (force_major_gc) {
    N = RtsFlags.GcFlags.generations - 1;
    major_gc = rtsTrue;
  } else {
    N = 0;
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
275 276 277
      if (generations[g].steps[0].n_blocks +
	  generations[g].steps[0].n_large_blocks
	  >= generations[g].max_blocks) {
278 279
        N = g;
      }
280
    }
281
    major_gc = (N == RtsFlags.GcFlags.generations-1);
282 283
  }

284 285 286 287 288 289
#ifdef RTS_GTK_FRONTPANEL
  if (RtsFlags.GcFlags.frontpanel) {
      updateFrontPanelBeforeGC(N);
  }
#endif

290
  // check stack sanity *before* GC (ToDo: check all threads) 
291 292 293
#if defined(GRAN)
  // ToDo!: check sanity  IF_DEBUG(sanity, checkTSOsSanity());
#endif
294
  IF_DEBUG(sanity, checkFreeListSanity());
295

296 297
  /* Initialise the static object lists
   */
298 299 300
  static_objects = END_OF_STATIC_LIST;
  scavenged_static_objects = END_OF_STATIC_LIST;

301
  /* zero the mutable list for the oldest generation (see comment by
302
   * zero_mutable_list below).
303 304
   */
  if (major_gc) { 
305
    zero_mutable_list(generations[RtsFlags.GcFlags.generations-1].mut_once_list);
306 307
  }

308 309 310
  /* Save the old to-space if we're doing a two-space collection
   */
  if (RtsFlags.GcFlags.generations == 1) {
311 312
    old_to_blocks = g0s0->to_blocks;
    g0s0->to_blocks = NULL;
313 314
  }

315 316 317 318 319
  /* Keep a count of how many new blocks we allocated during this GC
   * (used for resizing the allocation area, later).
   */
  new_blocks = 0;

320 321 322 323
  /* Initialise to-space in all the generations/steps that we're
   * collecting.
   */
  for (g = 0; g <= N; g++) {
324
    generations[g].mut_once_list = END_MUT_LIST;
325 326 327
    generations[g].mut_list = END_MUT_LIST;

    for (s = 0; s < generations[g].n_steps; s++) {
328

329
      // generation 0, step 0 doesn't need to-space 
330 331 332 333
      if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
	continue; 
      }

334 335 336 337
      /* Get a free block for to-space.  Extra blocks will be chained on
       * as necessary.
       */
      bd = allocBlock();
338
      stp = &generations[g].steps[s];
339
      ASSERT(stp->gen_no == g);
340
      ASSERT(stp->hp ? Bdescr(stp->hp)->step == stp : rtsTrue);
341
      bd->gen_no = g;
342
      bd->step = stp;
343
      bd->link = NULL;
344 345 346 347 348 349 350 351
      bd->flags        = BF_EVACUATED;	// it's a to-space block 
      stp->hp          = bd->start;
      stp->hpLim       = stp->hp + BLOCK_SIZE_W;
      stp->hp_bd       = bd;
      stp->to_blocks   = bd;
      stp->n_to_blocks = 1;
      stp->scan        = bd->start;
      stp->scan_bd     = bd;
352 353
      stp->new_large_objects = NULL;
      stp->scavenged_large_objects = NULL;
354
      stp->n_scavenged_large_blocks = 0;
355
      new_blocks++;
356
      // mark the large objects as not evacuated yet 
357
      for (bd = stp->large_objects; bd; bd = bd->link) {
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
	bd->flags = BF_LARGE;
      }

      // for a compacted step, we need to allocate the bitmap
      if (stp->is_compacted) {
	  nat bitmap_size; // in bytes
	  bdescr *bitmap_bdescr;
	  StgWord *bitmap;

	  bitmap_size = stp->n_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);

	  if (bitmap_size > 0) {
	      bitmap_bdescr = allocGroup((nat)BLOCK_ROUND_UP(bitmap_size) 
					 / BLOCK_SIZE);
	      stp->bitmap = bitmap_bdescr;
	      bitmap = bitmap_bdescr->start;
	      
375
	      IF_DEBUG(gc, belch("bitmap_size: %d, bitmap: %p",
376 377 378 379 380 381 382 383 384 385 386 387
				   bitmap_size, bitmap););
	      
	      // don't forget to fill it with zeros!
	      memset(bitmap, 0, bitmap_size);
	      
	      // for each block in this step, point to its bitmap from the
	      // block descriptor.
	      for (bd=stp->blocks; bd != NULL; bd = bd->link) {
		  bd->u.bitmap = bitmap;
		  bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
	      }
	  }
388 389 390 391 392 393 394 395 396
      }
    }
  }

  /* make sure the older generations have at least one block to
   * allocate into (this makes things easier for copy(), see below.
   */
  for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
    for (s = 0; s < generations[g].n_steps; s++) {
397 398
      stp = &generations[g].steps[s];
      if (stp->hp_bd == NULL) {
399 400 401 402 403 404 405 406 407 408 409 410
	  ASSERT(stp->blocks == NULL);
	  bd = allocBlock();
	  bd->gen_no = g;
	  bd->step = stp;
	  bd->link = NULL;
	  bd->flags = 0;	// *not* a to-space block or a large object
	  stp->hp = bd->start;
	  stp->hpLim = stp->hp + BLOCK_SIZE_W;
	  stp->hp_bd = bd;
	  stp->blocks = bd;
	  stp->n_blocks = 1;
	  new_blocks++;
411 412 413
      }
      /* Set the scan pointer for older generations: remember we
       * still have to scavenge objects that have been promoted. */
414 415
      stp->scan = stp->hp;
      stp->scan_bd = stp->hp_bd;
416 417
      stp->to_blocks = NULL;
      stp->n_to_blocks = 0;
418 419
      stp->new_large_objects = NULL;
      stp->scavenged_large_objects = NULL;
420
      stp->n_scavenged_large_blocks = 0;
421 422
    }
  }
423

424 425 426 427 428 429 430 431 432 433 434
  /* Allocate a mark stack if we're doing a major collection.
   */
  if (major_gc) {
      mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
      mark_stack = (StgPtr *)mark_stack_bdescr->start;
      mark_sp    = mark_stack;
      mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
  } else {
      mark_stack_bdescr = NULL;
  }

435
  /* -----------------------------------------------------------------------
436
   * follow all the roots that we know about:
437 438 439 440 441 442 443 444 445 446 447
   *   - mutable lists from each generation > N
   * we want to *scavenge* these roots, not evacuate them: they're not
   * going to move in this GC.
   * Also: do them in reverse generation order.  This is because we
   * often want to promote objects that are pointed to by older
   * generations early, so we don't have to repeatedly copy them.
   * Doing the generations in reverse order ensures that we don't end
   * up in the situation where we want to evac an object to gen 3 and
   * it has already been evaced to gen 2.
   */
  { 
448 449
    int st;
    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
450
      generations[g].saved_mut_list = generations[g].mut_list;
451
      generations[g].mut_list = END_MUT_LIST;
452
    }
453

454
    // Do the mut-once lists first 
455
    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
456 457
      IF_PAR_DEBUG(verbose,
		   printMutOnceList(&generations[g]));
458 459 460 461 462
      scavenge_mut_once_list(&generations[g]);
      evac_gen = g;
      for (st = generations[g].n_steps-1; st >= 0; st--) {
	scavenge(&generations[g].steps[st]);
      }
463 464
    }

465
    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
466 467
      IF_PAR_DEBUG(verbose,
		   printMutableList(&generations[g]));
468 469 470 471
      scavenge_mutable_list(&generations[g]);
      evac_gen = g;
      for (st = generations[g].n_steps-1; st >= 0; st--) {
	scavenge(&generations[g].steps[st]);
472 473
      }
    }
474 475
  }

476 477 478 479
  /* follow roots from the CAF list (used by GHCi)
   */
  evac_gen = 0;
  markCAFs(mark_root);
480

481 482 483
  /* follow all the roots that the application knows about.
   */
  evac_gen = 0;
484
  get_roots(mark_root);
485

486 487 488 489 490 491 492 493 494
#if defined(PAR)
  /* And don't forget to mark the TSO if we got here direct from
   * Haskell! */
  /* Not needed in a seq version?
  if (CurrentTSO) {
    CurrentTSO = (StgTSO *)MarkRoot((StgClosure *)CurrentTSO);
  }
  */

495
  // Mark the entries in the GALA table of the parallel system 
496
  markLocalGAs(major_gc);
497
  // Mark all entries on the list of pending fetches 
498
  markPendingFetches(major_gc);
499 500
#endif

501 502 503
  /* Mark the weak pointer list, and prepare to detect dead weak
   * pointers.
   */
504
  mark_weak_ptr_list(&weak_ptr_list);
505 506
  old_weak_ptr_list = weak_ptr_list;
  weak_ptr_list = NULL;
507
  weak_stage = WeakPtrs;
508

509 510 511 512 513 514 515
  /* The all_threads list is like the weak_ptr_list.  
   * See traverse_weak_ptr_list() for the details.
   */
  old_all_threads = all_threads;
  all_threads = END_TSO_QUEUE;
  resurrected_threads = END_TSO_QUEUE;

516 517
  /* Mark the stable pointer table.
   */
518
  markStablePtrTable(mark_root);
519

520 521 522 523 524 525 526 527 528 529 530
#ifdef INTERPRETER
  { 
      /* ToDo: To fix the caf leak, we need to make the commented out
       * parts of this code do something sensible - as described in 
       * the CAF document.
       */
      extern void markHugsObjects(void);
      markHugsObjects();
  }
#endif

531 532 533
  /* -------------------------------------------------------------------------
   * Repeatedly scavenge all the areas we know about until there's no
   * more scavenging to be done.
534 535
   */
  { 
536
    rtsBool flag;
537
  loop:
538 539
    flag = rtsFalse;

540
    // scavenge static objects 
541
    if (major_gc && static_objects != END_OF_STATIC_LIST) {
542 543 544 545
	IF_DEBUG(sanity, checkStaticObjects(static_objects));
	scavenge_static();
    }

546 547 548 549 550 551 552 553 554
    /* When scavenging the older generations:  Objects may have been
     * evacuated from generations <= N into older generations, and we
     * need to scavenge these objects.  We're going to try to ensure that
     * any evacuations that occur move the objects into at least the
     * same generation as the object being scavenged, otherwise we
     * have to create new entries on the mutable list for the older
     * generation.
     */

555
    // scavenge each step in generations 0..maxgen 
556
    { 
ken's avatar
ken committed
557 558
      long gen;
      int st; 
559

560
    loop2:
561
      // scavenge objects in compacted generation
562 563
      if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
	  (mark_stack_bdescr != NULL && !mark_stack_empty())) {
564 565 566 567
	  scavenge_mark_stack();
	  flag = rtsTrue;
      }

ken's avatar
ken committed
568 569
      for (gen = RtsFlags.GcFlags.generations; --gen >= 0; ) {
	for (st = generations[gen].n_steps; --st >= 0; ) {
570
	  if (gen == 0 && st == 0 && RtsFlags.GcFlags.generations > 1) { 
571 572
	    continue; 
	  }
573
	  stp = &generations[gen].steps[st];
574
	  evac_gen = gen;
575 576
	  if (stp->hp_bd != stp->scan_bd || stp->scan < stp->hp) {
	    scavenge(stp);
577
	    flag = rtsTrue;
578
	    goto loop2;
579
	  }
580 581
	  if (stp->new_large_objects != NULL) {
	    scavenge_large(stp);
582
	    flag = rtsTrue;
583
	    goto loop2;
584 585 586
	  }
	}
      }
587
    }
588

589 590
    if (flag) { goto loop; }

591 592
    // must be last...  invariant is that everything is fully
    // scavenged at this point.
593
    if (traverse_weak_ptr_list()) { // returns rtsTrue if evaced something 
594 595 596 597
      goto loop;
    }
  }

598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
  /* Update the pointers from the "main thread" list - these are
   * treated as weak pointers because we want to allow a main thread
   * to get a BlockedOnDeadMVar exception in the same way as any other
   * thread.  Note that the threads should all have been retained by
   * GC by virtue of being on the all_threads list, we're just
   * updating pointers here.
   */
  {
      StgMainThread *m;
      StgTSO *tso;
      for (m = main_threads; m != NULL; m = m->link) {
	  tso = (StgTSO *) isAlive((StgClosure *)m->tso);
	  if (tso == NULL) {
	      barf("main thread has been GC'd");
	  }
	  m->tso = tso;
      }
  }

617
#if defined(PAR)
618
  // Reconstruct the Global Address tables used in GUM 
619 620 621 622
  rebuildGAtables(major_gc);
  IF_DEBUG(sanity, checkLAGAtable(rtsTrue/*check closures, too*/));
#endif

623 624 625 626 627 628 629 630 631 632 633
  // Now see which stable names are still alive.
  gcStablePtrTable();

  // Tidy the end of the to-space chains 
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
      for (s = 0; s < generations[g].n_steps; s++) {
	  stp = &generations[g].steps[s];
	  if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
	      stp->hp_bd->free = stp->hp;
	      stp->hp_bd->link = NULL;
	  }
634
      }
635
  }
636

637 638 639
#ifdef PROFILING
  // We call processHeapClosureForDead() on every closure destroyed during
  // the current garbage collection, so we invoke LdvCensusForDead().
640 641
  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
      || RtsFlags.ProfFlags.bioSelector != NULL)
642 643 644
    LdvCensusForDead(N);
#endif

645 646
  // NO MORE EVACUATION AFTER THIS POINT!
  // Finally: compaction of the oldest generation.
647
  if (major_gc && oldest_gen->steps[0].is_compacted) {
648 649
      // save number of blocks for stats
      oldgen_saved_blocks = oldest_gen->steps[0].n_blocks;
650 651 652 653 654
      compact(get_roots);
  }

  IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));

655 656
  /* run through all the generations/steps and tidy up 
   */
657
  copied = new_blocks * BLOCK_SIZE_W;
658
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
659 660

    if (g <= N) {
661
      generations[g].collections++; // for stats 
662 663
    }

664 665
    for (s = 0; s < generations[g].n_steps; s++) {
      bdescr *next;
666
      stp = &generations[g].steps[s];
667

668
      if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
669
	// stats information: how much we copied 
670
	if (g <= N) {
671 672
	  copied -= stp->hp_bd->start + BLOCK_SIZE_W -
	    stp->hp_bd->free;
673
	}
674 675
      }

676
      // for generations we collected... 
677 678
      if (g <= N) {

679 680 681 682 683 684
	  // rough calculation of garbage collected, for stats output
	  if (stp->is_compacted) {
	      collected += (oldgen_saved_blocks - stp->n_blocks) * BLOCK_SIZE_W;
	  } else {
	      collected += stp->n_blocks * BLOCK_SIZE_W;
	  }
685 686 687 688 689 690

	/* free old memory and shift to-space into from-space for all
	 * the collected steps (except the allocation area).  These
	 * freed blocks will probaby be quickly recycled.
	 */
	if (!(g == 0 && s == 0)) {
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
	    if (stp->is_compacted) {
		// for a compacted step, just shift the new to-space
		// onto the front of the now-compacted existing blocks.
		for (bd = stp->to_blocks; bd != NULL; bd = bd->link) {
		    bd->flags &= ~BF_EVACUATED;	// now from-space 
		}
		// tack the new blocks on the end of the existing blocks
		if (stp->blocks == NULL) {
		    stp->blocks = stp->to_blocks;
		} else {
		    for (bd = stp->blocks; bd != NULL; bd = next) {
			next = bd->link;
			if (next == NULL) {
			    bd->link = stp->to_blocks;
			}
		    }
		}
		// add the new blocks to the block tally
		stp->n_blocks += stp->n_to_blocks;
	    } else {
		freeChain(stp->blocks);
		stp->blocks = stp->to_blocks;
		stp->n_blocks = stp->n_to_blocks;
		for (bd = stp->blocks; bd != NULL; bd = bd->link) {
		    bd->flags &= ~BF_EVACUATED;	// now from-space 
		}
	    }
	    stp->to_blocks = NULL;
	    stp->n_to_blocks = 0;
720 721 722 723 724 725 726
	}

	/* LARGE OBJECTS.  The current live large objects are chained on
	 * scavenged_large, having been moved during garbage
	 * collection from large_objects.  Any objects left on
	 * large_objects list are therefore dead, so we free them here.
	 */
727
	for (bd = stp->large_objects; bd != NULL; bd = next) {
728 729 730 731
	  next = bd->link;
	  freeGroup(bd);
	  bd = next;
	}
732 733

	// update the count of blocks used by large objects
734
	for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
735
	  bd->flags &= ~BF_EVACUATED;
736
	}
737 738
	stp->large_objects  = stp->scavenged_large_objects;
	stp->n_large_blocks = stp->n_scavenged_large_blocks;
739 740

      } else {
741
	// for older generations... 
742 743 744 745 746
	
	/* For older generations, we need to append the
	 * scavenged_large_object list (i.e. large objects that have been
	 * promoted during this GC) to the large_object list for that step.
	 */
747
	for (bd = stp->scavenged_large_objects; bd; bd = next) {
748
	  next = bd->link;
749
	  bd->flags &= ~BF_EVACUATED;
750
	  dbl_link_onto(bd, &stp->large_objects);
751 752
	}

753 754
	// add the new blocks we promoted during this GC 
	stp->n_blocks += stp->n_to_blocks;
755
	stp->n_large_blocks += stp->n_scavenged_large_blocks;
756 757 758
      }
    }
  }
759

760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
  /* Reset the sizes of the older generations when we do a major
   * collection.
   *
   * CURRENT STRATEGY: make all generations except zero the same size.
   * We have to stay within the maximum heap size, and leave a certain
   * percentage of the maximum heap size available to allocate into.
   */
  if (major_gc && RtsFlags.GcFlags.generations > 1) {
      nat live, size, min_alloc;
      nat max  = RtsFlags.GcFlags.maxHeapSize;
      nat gens = RtsFlags.GcFlags.generations;

      // live in the oldest generations
      live = oldest_gen->steps[0].n_blocks +
	     oldest_gen->steps[0].n_large_blocks;

      // default max size for all generations except zero
      size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
		     RtsFlags.GcFlags.minOldGenSize);

      // minimum size for generation zero
781 782 783 784 785
      min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
			  RtsFlags.GcFlags.minAllocAreaSize);

      // Auto-enable compaction when the residency reaches a
      // certain percentage of the maximum heap size (default: 30%).
786 787 788 789 790
      if (RtsFlags.GcFlags.generations > 1 &&
	  (RtsFlags.GcFlags.compact ||
	   (max > 0 &&
	    oldest_gen->steps[0].n_blocks > 
	    (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
791 792 793 794 795 796
	  oldest_gen->steps[0].is_compacted = 1;
//	  fprintf(stderr,"compaction: on\n", live);
      } else {
	  oldest_gen->steps[0].is_compacted = 0;
//	  fprintf(stderr,"compaction: off\n", live);
      }
797 798 799 800 801 802

      // if we're going to go over the maximum heap size, reduce the
      // size of the generations accordingly.  The calculation is
      // different if compaction is turned on, because we don't need
      // to double the space required to collect the old generation.
      if (max != 0) {
803 804 805 806 807 808 809 810

	  // this test is necessary to ensure that the calculations
	  // below don't have any negative results - we're working
	  // with unsigned values here.
	  if (max < min_alloc) {
	      heapOverflow();
	  }

811
	  if (oldest_gen->steps[0].is_compacted) {
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
	      if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
		  size = (max - min_alloc) / ((gens - 1) * 2 - 1);
	      }
	  } else {
	      if ( (size * (gens - 1) * 2) + min_alloc > max ) {
		  size = (max - min_alloc) / ((gens - 1) * 2);
	      }
	  }

	  if (size < live) {
	      heapOverflow();
	  }
      }

#if 0
      fprintf(stderr,"live: %d, min_alloc: %d, size : %d, max = %d\n", live,
	      min_alloc, size, max);
#endif

      for (g = 0; g < gens; g++) {
	  generations[g].max_blocks = size;
      }
  }

  // Guess the amount of live data for stats.
837 838
  live = calcLive();

839 840 841 842 843 844 845 846
  /* Free the small objects allocated via allocate(), since this will
   * all have been copied into G0S1 now.  
   */
  if (small_alloc_list != NULL) {
    freeChain(small_alloc_list);
  }
  small_alloc_list = NULL;
  alloc_blocks = 0;
847 848
  alloc_Hp = NULL;
  alloc_HpLim = NULL;
849 850
  alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;

851 852 853
  // Start a new pinned_object_block
  pinned_object_block = NULL;

854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
  /* Free the mark stack.
   */
  if (mark_stack_bdescr != NULL) {
      freeGroup(mark_stack_bdescr);
  }

  /* Free any bitmaps.
   */
  for (g = 0; g <= N; g++) {
      for (s = 0; s < generations[g].n_steps; s++) {
	  stp = &generations[g].steps[s];
	  if (stp->is_compacted && stp->bitmap != NULL) {
	      freeGroup(stp->bitmap);
	  }
      }
  }

871 872 873 874
  /* Two-space collector:
   * Free the old to-space, and estimate the amount of live data.
   */
  if (RtsFlags.GcFlags.generations == 1) {
875 876
    nat blocks;
    
877 878
    if (old_to_blocks != NULL) {
      freeChain(old_to_blocks);
879
    }
880 881
    for (bd = g0s0->to_blocks; bd != NULL; bd = bd->link) {
      bd->flags = 0;	// now from-space 
882
    }
883

884 885 886
    /* For a two-space collector, we need to resize the nursery. */
    
    /* set up a new nursery.  Allocate a nursery size based on a
887 888 889
     * function of the amount of live data (by default a factor of 2)
     * Use the blocks from the old nursery if possible, freeing up any
     * left over blocks.
890 891 892 893 894 895 896 897
     *
     * If we get near the maximum heap size, then adjust our nursery
     * size accordingly.  If the nursery is the same size as the live
     * data (L), then we need 3L bytes.  We can reduce the size of the
     * nursery to bring the required memory down near 2L bytes.
     * 
     * A normal 2-space collector would need 4L bytes to give the same
     * performance we get from 3L bytes, reducing to the same
898
     * performance at 2L bytes.
899
     */
900
    blocks = g0s0->n_to_blocks;
901

902 903 904
    if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
	 blocks * RtsFlags.GcFlags.oldGenFactor * 2 > 
	   RtsFlags.GcFlags.maxHeapSize ) {
ken's avatar
ken committed
905
      long adjusted_blocks;  // signed on purpose 
906 907 908
      int pc_free; 
      
      adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
909
      IF_DEBUG(gc, belch("@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
910 911 912 913 914 915 916 917 918 919 920 921
      pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
      if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ {
	heapOverflow();
      }
      blocks = adjusted_blocks;
      
    } else {
      blocks *= RtsFlags.GcFlags.oldGenFactor;
      if (blocks < RtsFlags.GcFlags.minAllocAreaSize) {
	blocks = RtsFlags.GcFlags.minAllocAreaSize;
      }
    }
922
    resizeNursery(blocks);
923
    
924
  } else {
925
    /* Generational collector:
926 927
     * If the user has given us a suggested heap size, adjust our
     * allocation area to make best use of the memory available.
928 929 930
     */

    if (RtsFlags.GcFlags.heapSizeSuggestion) {
ken's avatar
ken committed
931
      long blocks;
932
      nat needed = calcNeeded(); 	// approx blocks needed at next GC 
933 934

      /* Guess how much will be live in generation 0 step 0 next time.
935
       * A good approximation is obtained by finding the
936
       * percentage of g0s0 that was live at the last minor GC.
937
       */
938 939
      if (N == 0) {
	g0s0_pcnt_kept = (new_blocks * 100) / g0s0->n_blocks;
940 941
      }

942 943 944 945 946 947 948 949 950 951 952
      /* Estimate a size for the allocation area based on the
       * information available.  We might end up going slightly under
       * or over the suggested heap size, but we should be pretty
       * close on average.
       *
       * Formula:            suggested - needed
       *                ----------------------------
       *                    1 + g0s0_pcnt_kept/100
       *
       * where 'needed' is the amount of memory needed at the next
       * collection for collecting all steps except g0s0.
953
       */
954
      blocks = 
ken's avatar
ken committed
955 956
	(((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
	(100 + (long)g0s0_pcnt_kept);
957
      
ken's avatar
ken committed
958
      if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
959
	blocks = RtsFlags.GcFlags.minAllocAreaSize;
960
      }
961 962
      
      resizeNursery((nat)blocks);
963 964 965 966 967

    } else {
      // we might have added extra large blocks to the nursery, so
      // resize back to minAllocAreaSize again.
      resizeNursery(RtsFlags.GcFlags.minAllocAreaSize);
968
    }
969 970
  }

971 972
 // mark the garbage collected CAFs as dead 
#if 0 && defined(DEBUG) // doesn't work at the moment 
973
  if (major_gc) { gcCAFs(); }
974 975
#endif
  
976 977 978 979 980 981
#ifdef PROFILING
  // resetStaticObjectForRetainerProfiling() must be called before
  // zeroing below.
  resetStaticObjectForRetainerProfiling();
#endif

982
  // zero the scavenged static object list 
983
  if (major_gc) {
984
    zero_static_object_list(scavenged_static_objects);
985
  }
986

987
  // Reset the nursery
988
  resetNurseries();
989

sof's avatar
sof committed
990 991
  RELEASE_LOCK(&sched_mutex);
  
992
  // start any pending finalizers 
993
  scheduleFinalizers(old_weak_ptr_list);
994
  
sof's avatar
sof committed
995 996 997
  // send exceptions to any threads which were about to die 
  resurrectThreads(resurrected_threads);
  
sof's avatar
sof committed
998 999
  ACQUIRE_LOCK(&sched_mutex);

1000 1001
  // Update the stable pointer hash table.
  updateStablePtrTable(major_gc);
1002

1003 1004 1005 1006 1007
  // check sanity after GC 
  IF_DEBUG(sanity, checkSanity());

  // extra GC trace info 
  IF_DEBUG(gc, statDescribeGens());
1008 1009

#ifdef DEBUG
1010 1011
  // symbol-table based profiling 
  /*  heapCensus(to_blocks); */ /* ToDo */
1012 1013
#endif

1014
  // restore enclosing cost centre 
1015 1016 1017 1018
#ifdef PROFILING
  CCCS = prev_CCS;
#endif

1019
  // check for memory leaks if sanity checking is on 
1020 1021
  IF_DEBUG(sanity, memInventory());

1022 1023
#ifdef RTS_GTK_FRONTPANEL
  if (RtsFlags.GcFlags.frontpanel) {
1024 1025 1026 1027
      updateFrontPanelAfterGC( N, live );
  }
#endif

1028
  // ok, GC over: tell the stats department what happened. 
1029
  stat_endGC(allocated, collected, live, copied, N);
1030 1031

  //PAR_TICKY_TP();
1032 1033
}

1034

1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
/* -----------------------------------------------------------------------------
   Weak Pointers

   traverse_weak_ptr_list is called possibly many times during garbage
   collection.  It returns a flag indicating whether it did any work
   (i.e. called evacuate on any live pointers).

   Invariant: traverse_weak_ptr_list is called when the heap is in an
   idempotent state.  That means that there are no pending
   evacuate/scavenge operations.  This invariant helps the weak
   pointer code decide which weak pointers are dead - if there are no
   new live weak pointers, then all the currently unreachable ones are
   dead.
1048

1049
   For generational GC: we just don't try to finalize weak pointers in
1050 1051 1052
   older generations than the one we're collecting.  This could
   probably be optimised by keeping per-generation lists of weak
   pointers, but for a few weak pointers this scheme will work.
1053 1054 1055 1056 1057 1058 1059