Storage.c 36.6 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
Gabor Greif's avatar
Gabor Greif committed
3
 * (c) The GHC Team, 1998-2012
4 5 6
 *
 * Storage manager front end
 *
7 8 9 10 11
 * Documentation on the architecture of the Storage Manager can be
 * found in the online commentary:
 * 
 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
 *
12 13
 * ---------------------------------------------------------------------------*/

14
#include "PosixSource.h"
15
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
16 17

#include "Storage.h"
Simon Marlow's avatar
Simon Marlow committed
18
#include "GCThread.h"
19 20 21 22
#include "RtsUtils.h"
#include "Stats.h"
#include "BlockAlloc.h"
#include "Weak.h"
23
#include "Sanity.h"
24
#include "Arena.h"
25
#include "Capability.h"
26
#include "Schedule.h"
Gabor Greif's avatar
Gabor Greif committed
27
#include "RetainerProfile.h"        // for counting memory blocks (memInventory)
28
#include "OSMem.h"
Simon Marlow's avatar
Simon Marlow committed
29
#include "Trace.h"
30
#include "GC.h"
31
#include "Evac.h"
32 33 34
#if defined(ios_HOST_OS)
#include "Hash.h"
#endif
35

36 37
#include <string.h>

38 39
#include "ffi.h"

40
/* 
41
 * All these globals require sm_mutex to access in THREADED_RTS mode.
42
 */
43
StgClosure    *caf_list         = NULL;
44 45
StgClosure    *revertible_caf_list = NULL;
rtsBool       keepCAFs;
46

Simon Marlow's avatar
Simon Marlow committed
47 48
W_ large_alloc_lim;    /* GC if n_large_blocks in any nursery
                        * reaches this. */
49

Simon Marlow's avatar
Simon Marlow committed
50
bdescr *exec_block;
51

Gabor Greif's avatar
Gabor Greif committed
52 53
generation *generations = NULL; /* all the generations */
generation *g0          = NULL; /* generation 0, for convenience */
54
generation *oldest_gen  = NULL; /* oldest generation, for convenience */
55

Simon Marlow's avatar
Simon Marlow committed
56
nursery *nurseries = NULL;     /* array of nurseries, size == n_capabilities */
57

58
#ifdef THREADED_RTS
59 60 61 62
/*
 * Storage manager mutex:  protects all the above state from
 * simultaneous access by two STG threads.
 */
63
Mutex sm_mutex;
64 65
#endif

66
static void allocNurseries (nat from, nat to);
67

68
static void
Simon Marlow's avatar
Simon Marlow committed
69
initGeneration (generation *gen, int g)
70
{
Simon Marlow's avatar
Simon Marlow committed
71 72 73 74 75 76 77 78 79 80 81 82 83
    gen->no = g;
    gen->collections = 0;
    gen->par_collections = 0;
    gen->failed_promotions = 0;
    gen->max_blocks = 0;
    gen->blocks = NULL;
    gen->n_blocks = 0;
    gen->n_words = 0;
    gen->live_estimate = 0;
    gen->old_blocks = NULL;
    gen->n_old_blocks = 0;
    gen->large_objects = NULL;
    gen->n_large_blocks = 0;
84
    gen->n_large_words = 0;
85
    gen->n_new_large_words = 0;
Simon Marlow's avatar
Simon Marlow committed
86 87 88 89 90
    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;
    gen->mark = 0;
    gen->compact = 0;
    gen->bitmap = NULL;
91
#ifdef THREADED_RTS
Simon Marlow's avatar
Simon Marlow committed
92
    initSpinLock(&gen->sync);
93
#endif
Simon Marlow's avatar
Simon Marlow committed
94 95
    gen->threads = END_TSO_QUEUE;
    gen->old_threads = END_TSO_QUEUE;
96 97
    gen->weak_ptr_list = NULL;
    gen->old_weak_ptr_list = NULL;
98 99
}

100
void
Gabor Greif's avatar
Gabor Greif committed
101
initStorage (void)
102
{
103
  nat g;
104

105 106 107 108 109
  if (generations != NULL) {
      // multi-init protection
      return;
  }

110 111
  initMBlocks();

sof's avatar
sof committed
112 113 114
  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
   * doing something reasonable.
   */
Ian Lynagh's avatar
Ian Lynagh committed
115
  /* We use the NOT_NULL variant or gcc warns that the test is always true */
116
  ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
sof's avatar
sof committed
117 118 119
  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
  
120 121
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.heapSizeSuggestion > 
122
      RtsFlags.GcFlags.maxHeapSize) {
Gabor Greif's avatar
Gabor Greif committed
123
      RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
124 125
  }

126 127 128
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.minAllocAreaSize > 
      RtsFlags.GcFlags.maxHeapSize) {
129
      errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
130
      RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
131 132
  }

133 134
  initBlockAllocator();
  
135
#if defined(THREADED_RTS)
sof's avatar
sof committed
136
  initMutex(&sm_mutex);
sof's avatar
sof committed
137 138
#endif

139 140
  ACQUIRE_SM_LOCK;

141 142
  /* allocate generation info array */
  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations 
Gabor Greif's avatar
Gabor Greif committed
143 144
                                             * sizeof(struct generation_),
                                             "initStorage: gens");
145

146
  /* Initialise all generations */
147
  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
148
      initGeneration(&generations[g], g);
149 150
  }

151 152 153 154 155 156
  /* A couple of convenience pointers */
  g0 = &generations[0];
  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];

  /* Set up the destination pointers in each younger gen. step */
  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
Simon Marlow's avatar
Simon Marlow committed
157
      generations[g].to = &generations[g+1];
158
  }
Simon Marlow's avatar
Simon Marlow committed
159
  oldest_gen->to = oldest_gen;
160 161
  
  /* The oldest generation has one step. */
162
  if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
163
      if (RtsFlags.GcFlags.generations == 1) {
Gabor Greif's avatar
Gabor Greif committed
164
          errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
165
      } else {
Gabor Greif's avatar
Gabor Greif committed
166
          oldest_gen->mark = 1;
167
          if (RtsFlags.GcFlags.compact)
Simon Marlow's avatar
Simon Marlow committed
168
              oldest_gen->compact = 1;
169
      }
170
  }
171

172 173
  generations[0].max_blocks = 0;

Simon Marlow's avatar
Simon Marlow committed
174 175
  caf_list = END_OF_STATIC_LIST;
  revertible_caf_list = END_OF_STATIC_LIST;
176 177
   
  /* initialise the allocate() interface */
178
  large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
179

180 181
  exec_block = NULL;

182 183
#ifdef THREADED_RTS
  initSpinLock(&gc_alloc_block_sync);
184
  whitehole_spin = 0;
185 186
#endif

187 188
  N = 0;

189
  storageAddCapabilities(0, n_capabilities);
190

191
  IF_DEBUG(gc, statDescribeGens());
192 193

  RELEASE_SM_LOCK;
194 195 196 197 198 199 200

  traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
                     RtsFlags.GcFlags.generations,
                     RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE_W * sizeof(W_),
                     RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W * sizeof(W_),
                     MBLOCK_SIZE_W * sizeof(W_),
                     BLOCK_SIZE_W  * sizeof(W_));
201 202
}

203 204
void storageAddCapabilities (nat from, nat to)
{
205
    nat n, g, i;
206 207 208 209 210 211

    if (from > 0) {
        nurseries = stgReallocBytes(nurseries, to * sizeof(struct nursery_),
                                    "storageAddCapabilities");
    } else {
        nurseries = stgMallocBytes(to * sizeof(struct nursery_),
Gabor Greif's avatar
Gabor Greif committed
212
                                   "storageAddCapabilities");
213 214
    }

215 216 217
    // we've moved the nurseries, so we have to update the rNursery
    // pointers from the Capabilities.
    for (i = 0; i < to; i++) {
218
        capabilities[i]->r.rNursery = &nurseries[i];
219 220
    }

221 222 223 224 225 226 227 228 229 230 231
    /* The allocation area.  Policy: keep the allocation area
     * small to begin with, even if we have a large suggested heap
     * size.  Reason: we're going to do a major collection first, and we
     * don't want it to be a big one.  This vague idea is borne out by
     * rigorous experimental evidence.
     */
    allocNurseries(from, to);

    // allocate a block for each mut list
    for (n = from; n < to; n++) {
        for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
232
            capabilities[n]->mut_lists[g] = allocBlock();
233 234 235
        }
    }

236 237 238 239
#if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR)
    newThreadLocalKey(&gctKey);
#endif

240 241 242 243
    initGcThreads(from, to);
}


244 245 246
void
exitStorage (void)
{
247 248
    updateNurseriesStats();
    stat_exit();
Simon Marlow's avatar
Simon Marlow committed
249 250 251
}

void
252
freeStorage (rtsBool free_heap)
Simon Marlow's avatar
Simon Marlow committed
253
{
254
    stgFree(generations);
255
    if (free_heap) freeAllMBlocks();
256 257 258
#if defined(THREADED_RTS)
    closeMutex(&sm_mutex);
#endif
259
    stgFree(nurseries);
260 261 262
#if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR)
    freeThreadLocalKey(&gctKey);
#endif
263
    freeGcThreads();
264 265
}

266 267
/* -----------------------------------------------------------------------------
   CAF management.
268 269 270

   The entry code for every CAF does the following:
     
Simon Marlow's avatar
Simon Marlow committed
271 272 273 274 275 276 277 278 279 280
      - builds a CAF_BLACKHOLE in the heap

      - calls newCaf, which atomically updates the CAF with
        IND_STATIC pointing to the CAF_BLACKHOLE

      - if newCaf returns zero, it re-enters the CAF (see Note [atomic
        CAF entry])

      - pushes an update frame pointing to the CAF_BLACKHOLE

281
   Why do we build an BLACKHOLE in the heap rather than just updating
282
   the thunk directly?  It's so that we only need one kind of update
Simon Marlow's avatar
Simon Marlow committed
283 284 285
   frame - otherwise we'd need a static version of the update frame
   too, and various other parts of the RTS that deal with update
   frames would also need special cases for static update frames.
286 287 288

   newCaf() does the following:
       
Simon Marlow's avatar
Simon Marlow committed
289 290 291
      - it updates the CAF with an IND_STATIC pointing to the
        CAF_BLACKHOLE, atomically.

292 293
      - it puts the CAF on the oldest generation's mutable list.
        This is so that we treat the CAF as a root when collecting
Gabor Greif's avatar
Gabor Greif committed
294
        younger generations.
295

Simon Marlow's avatar
Simon Marlow committed
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
   ------------------
   Note [atomic CAF entry]

   With THREADED_RTS, newCaf() is required to be atomic (see
   #5558). This is because if two threads happened to enter the same
   CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
   and so the normal threadPaused() machinery for detecting duplicate
   evaluation will not detect this.  Hence in lockCAF() below, we
   atomically lock the CAF with WHITEHOLE before updating it with
   IND_STATIC, and return zero if another thread locked the CAF first.
   In the event that we lost the race, CAF entry code will re-enter
   the CAF and block on the other thread's CAF_BLACKHOLE.

   ------------------
   Note [GHCi CAFs]

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
   For GHCI, we have additional requirements when dealing with CAFs:

      - we must *retain* all dynamically-loaded CAFs ever entered,
        just in case we need them again.
      - we must be able to *revert* CAFs that have been evaluated, to
        their pre-evaluated form.

      To do this, we use an additional CAF list.  When newCaf() is
      called on a dynamically-loaded CAF, we add it to the CAF list
      instead of the old-generation mutable list, and save away its
      old info pointer (in caf->saved_info) for later reversion.

      To revert all the CAFs, we traverse the CAF list and reset the
      info pointer to caf->saved_info, then throw away the CAF list.
      (see GC.c:revertCAFs()).

      -- SDM 29/1/01

330 331
   -------------------------------------------------------------------------- */

Simon Marlow's avatar
Simon Marlow committed
332
STATIC_INLINE StgWord lockCAF (StgClosure *caf, StgClosure *bh)
333
{
Simon Marlow's avatar
Simon Marlow committed
334 335 336 337 338 339 340 341 342 343 344
    const StgInfoTable *orig_info;

    orig_info = caf->header.info;

#ifdef THREADED_RTS
    const StgInfoTable *cur_info;

    if (orig_info == &stg_IND_STATIC_info ||
        orig_info == &stg_WHITEHOLE_info) {
        // already claimed by another thread; re-enter the CAF
        return 0;
Simon Marlow's avatar
Simon Marlow committed
345
    }
Simon Marlow's avatar
Simon Marlow committed
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401

    cur_info = (const StgInfoTable *)
        cas((StgVolatilePtr)&caf->header.info,
            (StgWord)orig_info,
            (StgWord)&stg_WHITEHOLE_info);

    if (cur_info != orig_info) {
        // already claimed by another thread; re-enter the CAF
        return 0;
    }

    // successfully claimed by us; overwrite with IND_STATIC
#endif

    // For the benefit of revertCAFs(), save the original info pointer
    ((StgIndStatic *)caf)->saved_info  = orig_info;

    ((StgIndStatic*)caf)->indirectee = bh;
    write_barrier();
    SET_INFO(caf,&stg_IND_STATIC_info);

    return 1;
}

StgWord
newCAF(StgRegTable *reg, StgClosure *caf, StgClosure *bh)
{
    if (lockCAF(caf,bh) == 0) return 0;

    if(keepCAFs)
    {
        // HACK:
        // If we are in GHCi _and_ we are using dynamic libraries,
        // then we can't redirect newCAF calls to newDynCAF (see below),
        // so we make newCAF behave almost like newDynCAF.
        // The dynamic libraries might be used by both the interpreted
        // program and GHCi itself, so they must not be reverted.
        // This also means that in GHCi with dynamic libraries, CAFs are not
        // garbage collected. If this turns out to be a problem, we could
        // do another hack here and do an address range test on caf to figure
        // out whether it is from a dynamic library.

        ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex
        ((StgIndStatic *)caf)->static_link = caf_list;
        caf_list = caf;
        RELEASE_SM_LOCK;
    }
    else
    {
        // Put this CAF on the mutable list for the old generation.
        ((StgIndStatic *)caf)->saved_info = NULL;
        if (oldest_gen->no != 0) {
            recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no);
        }
    }
    return 1;
402 403
}

404 405 406 407 408 409 410
// External API for setting the keepCAFs flag. see #3900.
void
setKeepCAFs (void)
{
    keepCAFs = 1;
}

411 412 413 414
// An alternate version of newCaf which is used for dynamically loaded
// object code in GHCi.  In this case we want to retain *all* CAFs in
// the object code, because they might be demanded at any time from an
// expression evaluated on the command line.
415 416
// Also, GHCi might want to revert CAFs, so we add these to the
// revertible_caf_list.
417 418 419
//
// The linker hackily arranges that references to newCaf from dynamic
// code end up pointing to newDynCAF.
Simon Marlow's avatar
Simon Marlow committed
420 421
StgWord
newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf, StgClosure *bh)
422
{
Simon Marlow's avatar
Simon Marlow committed
423 424
    if (lockCAF(caf,bh) == 0) return 0;

425 426
    ACQUIRE_SM_LOCK;

427 428
    ((StgIndStatic *)caf)->static_link = revertible_caf_list;
    revertible_caf_list = caf;
429 430

    RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
431 432

    return 1;
433 434
}

435 436 437 438
/* -----------------------------------------------------------------------------
   Nursery management.
   -------------------------------------------------------------------------- */

439
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
440
allocNursery (bdescr *tail, W_ blocks)
441
{
Simon Marlow's avatar
Simon Marlow committed
442
    bdescr *bd = NULL;
Simon Marlow's avatar
Simon Marlow committed
443
    W_ i, n;
444 445 446 447 448 449 450 451

    // We allocate the nursery as a single contiguous block and then
    // divide it into single blocks manually.  This way we guarantee
    // that the nursery blocks are adjacent, so that the processor's
    // automatic prefetching works across nursery blocks.  This is a
    // tiny optimisation (~0.5%), but it's free.

    while (blocks > 0) {
452 453 454 455 456 457
        n = stg_min(BLOCKS_PER_MBLOCK, blocks);
        // allocLargeChunk will prefer large chunks, but will pick up
        // small chunks if there are any available.  We must allow
        // single blocks here to avoid fragmentation (#7257)
        bd = allocLargeChunk(1, n);
        n = bd->blocks;
458 459 460 461 462 463 464 465 466 467
        blocks -= n;

        for (i = 0; i < n; i++) {
            initBdescr(&bd[i], g0, g0);

            bd[i].blocks = 1;
            bd[i].flags = 0;

            if (i > 0) {
                bd[i].u.back = &bd[i-1];
468 469
            } else {
                bd[i].u.back = NULL;
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
            }

            if (i+1 < n) {
                bd[i].link = &bd[i+1];
            } else {
                bd[i].link = tail;
                if (tail != NULL) {
                    tail->u.back = &bd[i];
                }
            }

            bd[i].free = bd[i].start;
        }

        tail = &bd[0];
485
    }
486 487

    return &bd[0];
488 489 490
}

static void
491
assignNurseriesToCapabilities (nat from, nat to)
492 493
{
    nat i;
sof's avatar
sof committed
494

495
    for (i = from; i < to; i++) {
496 497
        capabilities[i]->r.rCurrentNursery = nurseries[i].blocks;
        capabilities[i]->r.rCurrentAlloc   = NULL;
498
    }
499
}
500

Simon Marlow's avatar
Simon Marlow committed
501
static void
502
allocNurseries (nat from, nat to)
503 504 505
{ 
    nat i;

506 507
    for (i = from; i < to; i++) {
        nurseries[i].blocks =
Simon Marlow's avatar
Simon Marlow committed
508
            allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
Gabor Greif's avatar
Gabor Greif committed
509
        nurseries[i].n_blocks =
Simon Marlow's avatar
Simon Marlow committed
510
            RtsFlags.GcFlags.minAllocAreaSize;
511
    }
512
    assignNurseriesToCapabilities(from, to);
513
}
514
      
515
void
516
clearNursery (Capability *cap)
517
{
518
    bdescr *bd;
519

520
    for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) {
521
        cap->total_allocated += (W_)(bd->free - bd->start);
522 523 524 525
        bd->free = bd->start;
        ASSERT(bd->gen_no == 0);
        ASSERT(bd->gen == g0);
        IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
526
    }
527 528 529 530 531
}

void
resetNurseries (void)
{
532
    assignNurseriesToCapabilities(0, n_capabilities);
533 534
}

535
W_
536
countNurseryBlocks (void)
537
{
538
    nat i;
539
    W_ blocks = 0;
540

Simon Marlow's avatar
Simon Marlow committed
541
    for (i = 0; i < n_capabilities; i++) {
Gabor Greif's avatar
Gabor Greif committed
542
        blocks += nurseries[i].n_blocks;
543
    }
544
    return blocks;
545 546
}

547
static void
Simon Marlow's avatar
Simon Marlow committed
548
resizeNursery (nursery *nursery, W_ blocks)
549 550
{
  bdescr *bd;
Simon Marlow's avatar
Simon Marlow committed
551
  W_ nursery_blocks;
552

Simon Marlow's avatar
Simon Marlow committed
553
  nursery_blocks = nursery->n_blocks;
554
  if (nursery_blocks == blocks) return;
555

556
  if (nursery_blocks < blocks) {
Simon Marlow's avatar
Simon Marlow committed
557
      debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", 
Gabor Greif's avatar
Gabor Greif committed
558
                 blocks);
Simon Marlow's avatar
Simon Marlow committed
559
    nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
560 561 562 563
  } 
  else {
    bdescr *next_bd;
    
Simon Marlow's avatar
Simon Marlow committed
564
    debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", 
Gabor Greif's avatar
Gabor Greif committed
565
               blocks);
566

Simon Marlow's avatar
Simon Marlow committed
567
    bd = nursery->blocks;
568
    while (nursery_blocks > blocks) {
Gabor Greif's avatar
Gabor Greif committed
569 570 571 572 573
        next_bd = bd->link;
        next_bd->u.back = NULL;
        nursery_blocks -= bd->blocks; // might be a large block
        freeGroup(bd);
        bd = next_bd;
574
    }
Simon Marlow's avatar
Simon Marlow committed
575
    nursery->blocks = bd;
576 577 578
    // might have gone just under, by freeing a large block, so make
    // up the difference.
    if (nursery_blocks < blocks) {
Gabor Greif's avatar
Gabor Greif committed
579
        nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
580
    }
581 582
  }
  
Simon Marlow's avatar
Simon Marlow committed
583 584
  nursery->n_blocks = blocks;
  ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
585
}
586

587 588 589 590
// 
// Resize each of the nurseries to the specified size.
//
void
Simon Marlow's avatar
Simon Marlow committed
591
resizeNurseriesFixed (W_ blocks)
592 593
{
    nat i;
Simon Marlow's avatar
Simon Marlow committed
594
    for (i = 0; i < n_capabilities; i++) {
Gabor Greif's avatar
Gabor Greif committed
595
        resizeNursery(&nurseries[i], blocks);
596
    }
597 598
}

599 600 601 602
// 
// Resize the nurseries to the total specified size.
//
void
Simon Marlow's avatar
Simon Marlow committed
603
resizeNurseries (W_ blocks)
604 605 606
{
    // If there are multiple nurseries, then we just divide the number
    // of available blocks between them.
Simon Marlow's avatar
Simon Marlow committed
607
    resizeNurseriesFixed(blocks / n_capabilities);
608 609
}

610 611

/* -----------------------------------------------------------------------------
612
   move_STACK is called to update the TSO structure after it has been
613 614 615 616
   moved from one place to another.
   -------------------------------------------------------------------------- */

void
617
move_STACK (StgStack *src, StgStack *dest)
618 619 620 621 622 623 624 625
{
    ptrdiff_t diff;

    // relocate the stack pointer... 
    diff = (StgPtr)dest - (StgPtr)src; // In *words* 
    dest->sp = (StgPtr)dest->sp + diff;
}

626
/* -----------------------------------------------------------------------------
627
   allocate()
628 629

   This allocates memory in the current thread - it is intended for
630 631 632
   use primarily from STG-land where we have a Capability.  It is
   better than allocate() because it doesn't require taking the
   sm_mutex lock in the common case.
633 634 635 636 637 638 639

   Memory is allocated directly from the nursery if possible (but not
   from the current nursery block, so as not to interfere with
   Hp/HpLim).
   -------------------------------------------------------------------------- */

StgPtr
640
allocate (Capability *cap, W_ n)
641 642 643 644
{
    bdescr *bd;
    StgPtr p;

nfrisby's avatar
nfrisby committed
645
    TICK_ALLOC_HEAP_NOCTR(WDS(n));
646
    CCS_ALLOC(cap->r.rCCCS,n);
647
    
648
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
649
        W_ req_blocks =  (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
650 651 652

        // Attempting to allocate an object larger than maxHeapSize
        // should definitely be disallowed.  (bug #1791)
653 654 655 656 657
        if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
             req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
            req_blocks >= HS_INT32_MAX)   // avoid overflow when
                                          // calling allocGroup() below
        {
658 659 660 661 662 663 664 665
            heapOverflow();
            // heapOverflow() doesn't exit (see #2592), but we aren't
            // in a position to do a clean shutdown here: we
            // either have to allocate the memory or exit now.
            // Allocating the memory would be bad, because the user
            // has requested that we not exceed maxHeapSize, so we
            // just exit.
            stg_exit(EXIT_HEAPOVERFLOW);
666 667
        }

668
        ACQUIRE_SM_LOCK
Gabor Greif's avatar
Gabor Greif committed
669 670 671
        bd = allocGroup(req_blocks);
        dbl_link_onto(bd, &g0->large_objects);
        g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
672
        g0->n_new_large_words += n;
673
        RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
674
        initBdescr(bd, g0, g0);
Gabor Greif's avatar
Gabor Greif committed
675 676
        bd->flags = BF_LARGE;
        bd->free = bd->start + n;
677
        cap->total_allocated += n;
Gabor Greif's avatar
Gabor Greif committed
678
        return bd->start;
679
    }
680

681
    /* small allocation (<LARGE_OBJECT_THRESHOLD) */
682

683 684
    bd = cap->r.rCurrentAlloc;
    if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
685
        
686 687 688 689
        // The CurrentAlloc block is full, we need to find another
        // one.  First, we try taking the next block from the
        // nursery:
        bd = cap->r.rCurrentNursery->link;
690
        
691 692 693 694 695 696 697
        if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
            // The nursery is empty, or the next block is already
            // full: allocate a fresh block (we can't fail here).
            ACQUIRE_SM_LOCK;
            bd = allocBlock();
            cap->r.rNursery->n_blocks++;
            RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
698
            initBdescr(bd, g0, g0);
699
            bd->flags = 0;
700 701
            // If we had to allocate a new block, then we'll GC
            // pretty quickly now, because MAYBE_GC() will
702
            // notice that CurrentNursery->link is NULL.
703 704 705 706 707 708 709 710 711 712 713 714
        } else {
            // we have a block in the nursery: take it and put
            // it at the *front* of the nursery list, and use it
            // to allocate() from.
            cap->r.rCurrentNursery->link = bd->link;
            if (bd->link != NULL) {
                bd->link->u.back = cap->r.rCurrentNursery;
            }
        }
        dbl_link_onto(bd, &cap->r.rNursery->blocks);
        cap->r.rCurrentAlloc = bd;
        IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
715 716 717
    }
    p = bd->free;
    bd->free += n;
Simon Marlow's avatar
Simon Marlow committed
718 719

    IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
720 721 722
    return p;
}

723 724 725 726 727
/* ---------------------------------------------------------------------------
   Allocate a fixed/pinned object.

   We allocate small pinned objects into a single block, allocating a
   new block when the current one overflows.  The block is chained
Simon Marlow's avatar
Simon Marlow committed
728
   onto the large_object_list of generation 0.
729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746

   NOTE: The GC can't in general handle pinned objects.  This
   interface is only safe to use for ByteArrays, which have no
   pointers and don't require scavenging.  It works because the
   block's descriptor has the BF_LARGE flag set, so the block is
   treated as a large object and chained onto various lists, rather
   than the individual objects being copied.  However, when it comes
   to scavenge the block, the GC will only scavenge the first object.
   The reason is that the GC can't linearly scan a block of pinned
   objects at the moment (doing so would require using the
   mostly-copying techniques).  But since we're restricting ourselves
   to pinned ByteArrays, not scavenging is ok.

   This function is called by newPinnedByteArray# which immediately
   fills the allocated memory with a MutableByteArray#.
   ------------------------------------------------------------------------- */

StgPtr
747
allocatePinned (Capability *cap, W_ n)
748 749
{
    StgPtr p;
750
    bdescr *bd;
751 752 753 754

    // If the request is for a large object, then allocate()
    // will give us a pinned object anyway.
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
Gabor Greif's avatar
Gabor Greif committed
755
        p = allocate(cap, n);
Simon Marlow's avatar
Simon Marlow committed
756 757
        Bdescr(p)->flags |= BF_PINNED;
        return p;
758 759
    }

nfrisby's avatar
nfrisby committed
760
    TICK_ALLOC_HEAP_NOCTR(WDS(n));
761
    CCS_ALLOC(cap->r.rCCCS,n);
sof's avatar
sof committed
762

763 764
    bd = cap->pinned_object_block;
    
765
    // If we don't have a block of pinned objects yet, or the current
766
    // one isn't large enough to hold the new object, get a new one.
767
    if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
768 769 770 771 772 773

        // stash the old block on cap->pinned_object_blocks.  On the
        // next GC cycle these objects will be moved to
        // g0->large_objects.
        if (bd != NULL) {
            dbl_link_onto(bd, &cap->pinned_object_blocks);
774
            // add it to the allocation stats when the block is full
775
            cap->total_allocated += bd->free - bd->start;
776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
        }

        // We need to find another block.  We could just allocate one,
        // but that means taking a global lock and we really want to
        // avoid that (benchmarks that allocate a lot of pinned
        // objects scale really badly if we do this).
        //
        // So first, we try taking the next block from the nursery, in
        // the same way as allocate(), but note that we can only take
        // an *empty* block, because we're about to mark it as
        // BF_PINNED | BF_LARGE.
        bd = cap->r.rCurrentNursery->link;
        if (bd == NULL || bd->free != bd->start) { // must be empty!
            // The nursery is empty, or the next block is non-empty:
            // allocate a fresh block (we can't fail here).

            // XXX in the case when the next nursery block is
            // non-empty we aren't exerting any pressure to GC soon,
            // so if this case ever happens then we could in theory
            // keep allocating for ever without calling the GC. We
            // can't bump g0->n_new_large_words because that will be
            // counted towards allocation, and we're already counting
            // our pinned obects as allocation in
            // collect_pinned_object_blocks in the GC.
            ACQUIRE_SM_LOCK;
            bd = allocBlock();
            RELEASE_SM_LOCK;
            initBdescr(bd, g0, g0);
        } else {
            // we have a block in the nursery: steal it
            cap->r.rCurrentNursery->link = bd->link;
            if (bd->link != NULL) {
                bd->link->u.back = cap->r.rCurrentNursery;
            }
810
            cap->r.rNursery->n_blocks -= bd->blocks;
811 812 813 814 815
        }

        cap->pinned_object_block = bd;
        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED;

816 817 818 819 820 821 822 823 824 825 826 827 828
        // The pinned_object_block remains attached to the capability
        // until it is full, even if a GC occurs.  We want this
        // behaviour because otherwise the unallocated portion of the
        // block would be forever slop, and under certain workloads
        // (allocating a few ByteStrings per GC) we accumulate a lot
        // of slop.
        //
        // So, the pinned_object_block is initially marked
        // BF_EVACUATED so the GC won't touch it.  When it is full,
        // we place it on the large_objects list, and at the start of
        // the next GC the BF_EVACUATED flag will be cleared, and the
        // block will be promoted as usual (if anything in it is
        // live).
829 830 831 832 833 834 835
    }

    p = bd->free;
    bd->free += n;
    return p;
}

836
/* -----------------------------------------------------------------------------
837 838 839 840
   Write Barriers
   -------------------------------------------------------------------------- */

/*
841 842 843 844
   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
   and is put on the mutable list.
845
*/
846
void
847
dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
848
{
849
    Capability *cap = regTableToCapability(reg);
850
    if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
Gabor Greif's avatar
Gabor Greif committed
851
        p->header.info = &stg_MUT_VAR_DIRTY_info;
852
        recordClosureMutated(cap,p);
853 854 855
    }
}

856 857 858 859 860 861 862 863 864
void
dirty_TVAR(Capability *cap, StgTVar *p)
{
    if (p->header.info == &stg_TVAR_CLEAN_info) {
        p->header.info = &stg_TVAR_DIRTY_info;
        recordClosureMutated(cap,(StgClosure*)p);
    }
}

865 866 867 868 869 870 871 872 873
// Setting a TSO's link field with a write barrier.
// It is *not* necessary to call this function when
//    * setting the link field to END_TSO_QUEUE
//    * putting a TSO on the blackhole_queue
//    * setting the link field of the currently running TSO, as it
//      will already be dirty.
void
setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
{
874 875
    if (tso->dirty == 0) {
        tso->dirty = 1;
876
        recordClosureMutated(cap,(StgClosure*)tso);
877 878 879 880
    }
    tso->_link = target;
}

881 882 883
void
setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
{
884 885
    if (tso->dirty == 0) {
        tso->dirty = 1;
886 887 888 889 890
        recordClosureMutated(cap,(StgClosure*)tso);
    }
    tso->block_info.prev = target;
}

891 892 893
void
dirty_TSO (Capability *cap, StgTSO *tso)
{
894 895
    if (tso->dirty == 0) {
        tso->dirty = 1;
896
        recordClosureMutated(cap,(StgClosure*)tso);
897
    }
898 899 900 901 902 903 904 905 906
}

void
dirty_STACK (Capability *cap, StgStack *stack)
{
    if (stack->dirty == 0) {
        stack->dirty = 1;
        recordClosureMutated(cap,(StgClosure*)stack);
    }
907 908
}

909 910 911 912 913 914 915 916 917 918 919
/*
   This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
   on the mutable list; a MVAR_DIRTY is.  When written to, a
   MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
   The check for MVAR_CLEAN is inlined at the call site for speed,
   this really does make a difference on concurrency-heavy benchmarks
   such as Chaneneos and cheap-concurrency.
*/
void
dirty_MVAR(StgRegTable *reg, StgClosure *p)
{
920
    recordClosureMutated(regTableToCapability(reg),p);
921 922
}

923
/* -----------------------------------------------------------------------------
924 925 926 927
 * Stats and stuff
 * -------------------------------------------------------------------------- */

/* -----------------------------------------------------------------------------
928
 * updateNurseriesStats()
929
 *
930
 * Update the per-cap total_allocated numbers with an approximation of
931 932
 * the amount of memory used in each cap's nursery.
 *
933 934
 * Since this update is also performed by clearNurseries() then we only
 * need this function for the final stats when the RTS is shutting down.
935 936
 * -------------------------------------------------------------------------- */

937
void updateNurseriesStats (void)
938
{
939
    nat i;
940

941
    for (i = 0; i < n_capabilities; i++) {
942
        capabilities[i]->total_allocated += countOccupied(nurseries[i].blocks);
943 944
    }
}
945

946
W_ countOccupied (bdescr *bd)
947
{
948
    W_ words;
949 950 951

    words = 0;
    for (; bd != NULL; bd = bd->link) {
952
        ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
953 954 955 956 957
        words += bd->free - bd->start;
    }
    return words;
}

958
W_ genLiveWords (generation *gen)
Simon Marlow's avatar
Simon Marlow committed
959
{
960
    return gen->n_words + gen->n_large_words;
Simon Marlow's avatar
Simon Marlow committed
961 962
}

963
W_ genLiveBlocks (generation *gen)
Simon Marlow's avatar
Simon Marlow committed
964 965 966 967
{
    return gen->n_blocks + gen->n_large_blocks;
}

968
W_ gcThreadLiveWords (nat i, nat g)
Simon Marlow's avatar
Simon Marlow committed
969
{
970
    W_ words;
Simon Marlow's avatar
Simon Marlow committed
971 972 973 974 975 976 977 978

    words   = countOccupied(gc_threads[i]->gens[g].todo_bd);
    words  += countOccupied(gc_threads[i]->gens[g].part_list);
    words  += countOccupied(gc_threads[i]->gens[g