Storage.c 69 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
Gabor Greif's avatar
Gabor Greif committed
3
 * (c) The GHC Team, 1998-2012
4 5 6
 *
 * Storage manager front end
 *
7 8
 * Documentation on the architecture of the Storage Manager can be
 * found in the online commentary:
Simon Marlow's avatar
Simon Marlow committed
9
 *
10
 *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage
11
 *
12 13
 * ---------------------------------------------------------------------------*/

14 15 16 17 18 19 20 21 22 23 24 25
#include <ghcconfig.h>
#if RTS_LINKER_USE_MMAP
/*
 * On FreeBSD and Darwin, when _XOPEN_SOURCE is defined, MAP_ANONYMOUS is not
 * exposed from <sys/mman.h>.  Include <sys/mman.h> before "PosixSource.h".
 *
 * Alternatively, we could drop "PosixSource.h" from this file, but for just
 * one non-POSIX macro, that seems a needless price to pay.
 */
#include <sys/mman.h>
#endif

26
#include "PosixSource.h"
27
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
28 29

#include "Storage.h"
Simon Marlow's avatar
Simon Marlow committed
30
#include "GCThread.h"
31 32 33 34
#include "RtsUtils.h"
#include "Stats.h"
#include "BlockAlloc.h"
#include "Weak.h"
35
#include "Sanity.h"
36
#include "Arena.h"
37
#include "Capability.h"
38
#include "Schedule.h"
Gabor Greif's avatar
Gabor Greif committed
39
#include "RetainerProfile.h"        // for counting memory blocks (memInventory)
40
#include "OSMem.h"
Simon Marlow's avatar
Simon Marlow committed
41
#include "Trace.h"
42
#include "GC.h"
43
#include "Evac.h"
44
#include "NonMoving.h"
Moritz Angermann's avatar
Moritz Angermann committed
45
#if defined(ios_HOST_OS) || defined(darwin_HOST_OS)
46 47
#include "Hash.h"
#endif
48

49 50 51 52
#if RTS_LINKER_USE_MMAP
#include "LinkerInternals.h"
#endif

53 54
#include <string.h>

55 56
#include "ffi.h"

Simon Marlow's avatar
Simon Marlow committed
57
/*
58
 * All these globals require sm_mutex to access in THREADED_RTS mode.
59
 */
60 61 62
StgIndStatic  *dyn_caf_list        = NULL;
StgIndStatic  *debug_caf_list      = NULL;
StgIndStatic  *revertible_caf_list = NULL;
Ben Gamari's avatar
Ben Gamari committed
63
bool           keepCAFs;
GHC GitLab CI's avatar
GHC GitLab CI committed
64
bool           highMemDynamic;
65

Simon Marlow's avatar
Simon Marlow committed
66 67
W_ large_alloc_lim;    /* GC if n_large_blocks in any nursery
                        * reaches this. */
68

Simon Marlow's avatar
Simon Marlow committed
69
bdescr *exec_block;
70

Gabor Greif's avatar
Gabor Greif committed
71 72
generation *generations = NULL; /* all the generations */
generation *g0          = NULL; /* generation 0, for convenience */
73
generation *oldest_gen  = NULL; /* oldest generation, for convenience */
74

Simon Marlow's avatar
Simon Marlow committed
75 76 77
/*
 * Array of nurseries, size == n_capabilities
 *
Simon Marlow's avatar
Simon Marlow committed
78
 * nursery[i] belongs to NUMA node (i % n_numa_nodes)
Simon Marlow's avatar
Simon Marlow committed
79 80 81 82 83
 * This is chosen to be the same convention as capabilities[i], so
 * that when not using nursery chunks (+RTS -n), we just map
 * capabilities to nurseries 1:1.
 */
nursery *nurseries = NULL;
84
uint32_t n_nurseries;
Simon Marlow's avatar
Simon Marlow committed
85

86 87 88 89 90 91 92 93 94 95
/* Pinned Nursery Size, the number of blocks that we reserve for
 * pinned data. The number chosen here decides whether pinned objects
 * are allocated from the free_list (if n < BLOCKS_PER_MBLOCK) or whether
 * a fresh mblock is allocated each time.
 * See Note [Sources of Block Level Fragmentation]
 * */

#define PINNED_EMPTY_SIZE BLOCKS_PER_MBLOCK


Simon Marlow's avatar
Simon Marlow committed
96 97 98 99 100
/*
 * When we are using nursery chunks, we need a separate next_nursery
 * pointer for each NUMA node.
 */
volatile StgWord next_nursery[MAX_NUMA_NODES];
101

Ben Gamari's avatar
Ben Gamari committed
102
#if defined(THREADED_RTS)
103 104 105 106
/*
 * Storage manager mutex:  protects all the above state from
 * simultaneous access by two STG threads.
 */
107
Mutex sm_mutex;
108 109
#endif

110 111
static void allocNurseries (uint32_t from, uint32_t to);
static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
112

113
void
Simon Marlow's avatar
Simon Marlow committed
114
initGeneration (generation *gen, int g)
115
{
Simon Marlow's avatar
Simon Marlow committed
116 117 118 119 120 121 122 123 124 125 126 127 128
    gen->no = g;
    gen->collections = 0;
    gen->par_collections = 0;
    gen->failed_promotions = 0;
    gen->max_blocks = 0;
    gen->blocks = NULL;
    gen->n_blocks = 0;
    gen->n_words = 0;
    gen->live_estimate = 0;
    gen->old_blocks = NULL;
    gen->n_old_blocks = 0;
    gen->large_objects = NULL;
    gen->n_large_blocks = 0;
129
    gen->n_large_words = 0;
130
    gen->n_new_large_words = 0;
gcampax's avatar
gcampax committed
131 132
    gen->compact_objects = NULL;
    gen->n_compact_blocks = 0;
133 134
    gen->compact_blocks_in_import = NULL;
    gen->n_compact_blocks_in_import = 0;
Simon Marlow's avatar
Simon Marlow committed
135 136
    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;
gcampax's avatar
gcampax committed
137 138
    gen->live_compact_objects = NULL;
    gen->n_live_compact_blocks = 0;
139 140
    gen->compact_blocks_in_import = NULL;
    gen->n_compact_blocks_in_import = 0;
Simon Marlow's avatar
Simon Marlow committed
141 142 143
    gen->mark = 0;
    gen->compact = 0;
    gen->bitmap = NULL;
Ben Gamari's avatar
Ben Gamari committed
144
#if defined(THREADED_RTS)
Simon Marlow's avatar
Simon Marlow committed
145
    initSpinLock(&gen->sync);
146
#endif
Simon Marlow's avatar
Simon Marlow committed
147 148
    gen->threads = END_TSO_QUEUE;
    gen->old_threads = END_TSO_QUEUE;
149 150
    gen->weak_ptr_list = NULL;
    gen->old_weak_ptr_list = NULL;
151 152
}

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168

#if defined(TRACING)
// Defined as it's own top-level function so it can be passed to traceInitEvent
static void
traceHeapInfo (void){
  traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
                     RtsFlags.GcFlags.generations,
                     RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
                     RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
                     MBLOCK_SIZE,
                     BLOCK_SIZE);
}
#else
#define traceHeapInfo
#endif

169
void
Gabor Greif's avatar
Gabor Greif committed
170
initStorage (void)
171
{
Simon Marlow's avatar
Simon Marlow committed
172
  uint32_t g, n;
173

174 175 176 177 178
  if (generations != NULL) {
      // multi-init protection
      return;
  }

179 180
  initMBlocks();

sof's avatar
sof committed
181 182 183
  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
   * doing something reasonable.
   */
Ian Lynagh's avatar
Ian Lynagh committed
184
  /* We use the NOT_NULL variant or gcc warns that the test is always true */
185
  ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
186 187
  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
Simon Marlow's avatar
Simon Marlow committed
188

189
  initBlockAllocator();
Simon Marlow's avatar
Simon Marlow committed
190

191
#if defined(THREADED_RTS)
sof's avatar
sof committed
192
  initMutex(&sm_mutex);
sof's avatar
sof committed
193 194
#endif

195 196
  ACQUIRE_SM_LOCK;

197
  /* allocate generation info array */
Simon Marlow's avatar
Simon Marlow committed
198
  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
Gabor Greif's avatar
Gabor Greif committed
199 200
                                             * sizeof(struct generation_),
                                             "initStorage: gens");
201

202
  /* Initialise all generations */
203
  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
204
      initGeneration(&generations[g], g);
205 206
  }

207 208 209 210 211 212
  /* A couple of convenience pointers */
  g0 = &generations[0];
  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];

  /* Set up the destination pointers in each younger gen. step */
  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
Simon Marlow's avatar
Simon Marlow committed
213
      generations[g].to = &generations[g+1];
214
  }
Simon Marlow's avatar
Simon Marlow committed
215
  oldest_gen->to = oldest_gen;
Simon Marlow's avatar
Simon Marlow committed
216

217 218 219 220 221 222 223 224 225 226 227 228
  // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
  nonmovingInit();

#if defined(THREADED_RTS)
  // nonmovingAddCapabilities allocates segments, which requires taking the gc
  // sync lock, so initialize it before nonmovingAddCapabilities
  initSpinLock(&gc_alloc_block_sync);
#endif

  if (RtsFlags.GcFlags.useNonmoving)
      nonmovingAddCapabilities(n_capabilities);

229
  /* The oldest generation has one step. */
230
  if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
231
      if (RtsFlags.GcFlags.generations == 1) {
Gabor Greif's avatar
Gabor Greif committed
232
          errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
233
      } else {
Gabor Greif's avatar
Gabor Greif committed
234
          oldest_gen->mark = 1;
235
          if (RtsFlags.GcFlags.compact)
Simon Marlow's avatar
Simon Marlow committed
236
              oldest_gen->compact = 1;
237
      }
238
  }
239

240 241
  generations[0].max_blocks = 0;

242 243 244
  dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
  debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
  revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
Simon Marlow's avatar
Simon Marlow committed
245

Simon Marlow's avatar
Simon Marlow committed
246 247 248 249 250
  if (RtsFlags.GcFlags.largeAllocLim > 0) {
      large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
  } else {
      large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
  }
251

252 253
  exec_block = NULL;

254 255
  N = 0;

Simon Marlow's avatar
Simon Marlow committed
256
  for (n = 0; n < n_numa_nodes; n++) {
Simon Marlow's avatar
Simon Marlow committed
257 258
      next_nursery[n] = n;
  }
259
  storageAddCapabilities(0, n_capabilities);
260

261
  IF_DEBUG(gc, statDescribeGens());
262 263

  RELEASE_SM_LOCK;
264

265 266
  traceInitEvent(traceHeapInfo);

267 268
}

269
void storageAddCapabilities (uint32_t from, uint32_t to)
270
{
271
    uint32_t n, g, i, new_n_nurseries;
272
    nursery *old_nurseries;
273 274 275 276 277 278 279 280

    if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
        new_n_nurseries = to;
    } else {
        memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
        new_n_nurseries =
            stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
    }
281

282
    old_nurseries = nurseries;
283
    if (from > 0) {
284 285
        nurseries = stgReallocBytes(nurseries,
                                    new_n_nurseries * sizeof(struct nursery_),
286 287
                                    "storageAddCapabilities");
    } else {
288
        nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
Gabor Greif's avatar
Gabor Greif committed
289
                                   "storageAddCapabilities");
290 291
    }

292 293
    // we've moved the nurseries, so we have to update the rNursery
    // pointers from the Capabilities.
294 295 296
    for (i = 0; i < from; i++) {
        uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
        capabilities[i]->r.rNursery = &nurseries[index];
297 298
    }

299 300 301 302 303 304
    /* The allocation area.  Policy: keep the allocation area
     * small to begin with, even if we have a large suggested heap
     * size.  Reason: we're going to do a major collection first, and we
     * don't want it to be a big one.  This vague idea is borne out by
     * rigorous experimental evidence.
     */
305 306 307 308 309 310 311 312 313
    allocNurseries(n_nurseries, new_n_nurseries);
    n_nurseries = new_n_nurseries;

    /*
     * Assign each of the new capabilities a nursery.  Remember to start from
     * next_nursery, because we may have already consumed some of the earlier
     * nurseries.
     */
    assignNurseriesToCapabilities(from,to);
314 315 316 317

    // allocate a block for each mut list
    for (n = from; n < to; n++) {
        for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
318 319
            capabilities[n]->mut_lists[g] =
                allocBlockOnNode(capNoToNumaNode(n));
320 321 322
        }
    }

323 324 325 326 327 328 329 330
    // Initialize NonmovingAllocators and UpdRemSets
    if (RtsFlags.GcFlags.useNonmoving) {
        nonmovingAddCapabilities(to);
        for (i = 0; i < to; ++i) {
            init_upd_rem_set(&capabilities[i]->upd_rem_set);
        }
    }

331
#if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
332 333 334
    newThreadLocalKey(&gctKey);
#endif

335 336 337 338
    initGcThreads(from, to);
}


339 340 341
void
exitStorage (void)
{
342
    nonmovingExit();
343
    updateNurseriesStats();
344
    stat_exitReport();
Simon Marlow's avatar
Simon Marlow committed
345 346 347
}

void
Ben Gamari's avatar
Ben Gamari committed
348
freeStorage (bool free_heap)
Simon Marlow's avatar
Simon Marlow committed
349
{
350
    stgFree(generations);
351
    if (free_heap) freeAllMBlocks();
352 353 354
#if defined(THREADED_RTS)
    closeMutex(&sm_mutex);
#endif
355
    stgFree(nurseries);
356
#if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
357 358
    freeThreadLocalKey(&gctKey);
#endif
359
    freeGcThreads();
360 361
}

362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
static void
listGenBlocks (ListBlocksCb cb, void *user, generation* gen)
{
    cb(user, gen->blocks);
    cb(user, gen->large_objects);
    cb(user, gen->compact_objects);
    cb(user, gen->compact_blocks_in_import);
}

// Traverse all the different places that the rts stores blocks
// and call a callback on each of them.
void listAllBlocks (ListBlocksCb cb, void *user)
{
  uint32_t g, i;
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
      for (i = 0; i < n_capabilities; i++) {
          cb(user, capabilities[i]->mut_lists[g]);
          cb(user, gc_threads[i]->gens[g].part_list);
          cb(user, gc_threads[i]->gens[g].scavd_list);
          cb(user, gc_threads[i]->gens[g].todo_bd);
      }
      listGenBlocks(cb, user, &generations[g]);
  }

  for (i = 0; i < n_nurseries; i++) {
      cb(user, nurseries[i].blocks);
  }
  for (i = 0; i < n_capabilities; i++) {
      if (capabilities[i]->pinned_object_block != NULL) {
          cb(user, capabilities[i]->pinned_object_block);
      }
      cb(user, capabilities[i]->pinned_object_blocks);
394
      cb(user, capabilities[i]->pinned_object_empty);
395 396 397 398
  }
}


399
/* -----------------------------------------------------------------------------
400 401
   Note [CAF management]
   ~~~~~~~~~~~~~~~~~~~~~
402 403

   The entry code for every CAF does the following:
Simon Marlow's avatar
Simon Marlow committed
404

405
      - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
406
        updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
Simon Marlow's avatar
Simon Marlow committed
407

408
      - if newCAF returns zero, it re-enters the CAF (see Note [atomic
Simon Marlow's avatar
Simon Marlow committed
409 410 411 412
        CAF entry])

      - pushes an update frame pointing to the CAF_BLACKHOLE

Gabor Greif's avatar
Gabor Greif committed
413
   Why do we build a BLACKHOLE in the heap rather than just updating
414
   the thunk directly?  It's so that we only need one kind of update
Simon Marlow's avatar
Simon Marlow committed
415 416 417
   frame - otherwise we'd need a static version of the update frame
   too, and various other parts of the RTS that deal with update
   frames would also need special cases for static update frames.
418

419
   newCAF() does the following:
420 421 422 423 424

      - atomically locks the CAF (see [atomic CAF entry])

      - it builds a CAF_BLACKHOLE on the heap

Simon Marlow's avatar
Simon Marlow committed
425 426 427
      - it updates the CAF with an IND_STATIC pointing to the
        CAF_BLACKHOLE, atomically.

428 429
      - it puts the CAF on the oldest generation's mutable list.
        This is so that we treat the CAF as a root when collecting
Gabor Greif's avatar
Gabor Greif committed
430
        younger generations.
431

432 433
      - links the CAF onto the CAF list (see below)

Simon Marlow's avatar
Simon Marlow committed
434 435
   ------------------
   Note [atomic CAF entry]
436
   ~~~~~~~~~~~~~~~~~~~~~~~
Simon Marlow's avatar
Simon Marlow committed
437

438
   With THREADED_RTS, newCAF() is required to be atomic (see
Simon Marlow's avatar
Simon Marlow committed
439 440 441 442 443 444 445 446 447 448 449
   #5558). This is because if two threads happened to enter the same
   CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
   and so the normal threadPaused() machinery for detecting duplicate
   evaluation will not detect this.  Hence in lockCAF() below, we
   atomically lock the CAF with WHITEHOLE before updating it with
   IND_STATIC, and return zero if another thread locked the CAF first.
   In the event that we lost the race, CAF entry code will re-enter
   the CAF and block on the other thread's CAF_BLACKHOLE.

   ------------------
   Note [GHCi CAFs]
450
   ~~~~~~~~~~~~~~~~
Simon Marlow's avatar
Simon Marlow committed
451

452 453 454 455 456 457 458
   For GHCI, we have additional requirements when dealing with CAFs:

      - we must *retain* all dynamically-loaded CAFs ever entered,
        just in case we need them again.
      - we must be able to *revert* CAFs that have been evaluated, to
        their pre-evaluated form.

459
      To do this, we use an additional CAF list.  When newCAF() is
460 461 462 463 464 465 466 467 468 469
      called on a dynamically-loaded CAF, we add it to the CAF list
      instead of the old-generation mutable list, and save away its
      old info pointer (in caf->saved_info) for later reversion.

      To revert all the CAFs, we traverse the CAF list and reset the
      info pointer to caf->saved_info, then throw away the CAF list.
      (see GC.c:revertCAFs()).

      -- SDM 29/1/01

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
   ------------------
   Note [Static objects under the nonmoving collector]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   Static object management is a bit tricky under the nonmoving collector as we
   need to maintain a bit more state than in the moving collector. In
   particular, the moving collector uses the low bits of the STATIC_LINK field
   to determine whether the object has been moved to the scavenger's work list
   (see Note [STATIC_LINK fields] in Storage.h).

   However, the nonmoving collector also needs a place to keep its mark bit.
   This is problematic as we therefore need at least three bits of state
   but can assume only two bits are available in STATIC_LINK (due to 32-bit
   systems).

Brian Wignall's avatar
Brian Wignall committed
485
   To accommodate this we move handling of static objects entirely to the
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
   oldest generation when the nonmoving collector is in use. To do this safely
   and efficiently we allocate the blackhole created by lockCAF() directly in
   the non-moving heap. This means that the moving collector can completely
   ignore static objects in minor collections since they are guaranteed not to
   have any references into the moving heap. Of course, the blackhole itself
   likely will contain a reference into the moving heap but this is
   significantly easier to handle, being a heap-allocated object (see Note
   [Aging under the non-moving collector] in NonMoving.c for details).

   During the moving phase of a major collection we treat static objects
   as we do any other reference into the non-moving heap by pushing them
   to the non-moving mark queue (see Note [Aging under the non-moving
   collector]).

   This allows the non-moving collector to have full control over the flags
   in STATIC_LINK, which it uses as described in Note [STATIC_LINK fields]).
   This is implemented by NonMovingMark.c:bump_static_flag.

   In short, the plan is:

     - lockCAF allocates its blackhole in the nonmoving heap. This is important
       to ensure that we do not need to place the static object on the mut_list
       lest we would need somw way to ensure that it evacuate only once during
       a moving collection.

     - evacuate_static_object adds merely pushes objects to the mark queue

     - the nonmoving collector uses the flags in STATIC_LINK as its mark bit.

515 516
   -------------------------------------------------------------------------- */

517 518
STATIC_INLINE StgInd *
lockCAF (StgRegTable *reg, StgIndStatic *caf)
519
{
Simon Marlow's avatar
Simon Marlow committed
520
    const StgInfoTable *orig_info;
521 522
    Capability *cap = regTableToCapability(reg);
    StgInd *bh;
Simon Marlow's avatar
Simon Marlow committed
523

Ben Gamari's avatar
Ben Gamari committed
524
    orig_info = RELAXED_LOAD(&caf->header.info);
Simon Marlow's avatar
Simon Marlow committed
525

Ben Gamari's avatar
Ben Gamari committed
526
#if defined(THREADED_RTS)
Simon Marlow's avatar
Simon Marlow committed
527 528 529 530 531
    const StgInfoTable *cur_info;

    if (orig_info == &stg_IND_STATIC_info ||
        orig_info == &stg_WHITEHOLE_info) {
        // already claimed by another thread; re-enter the CAF
532
        return NULL;
Simon Marlow's avatar
Simon Marlow committed
533
    }
Simon Marlow's avatar
Simon Marlow committed
534 535 536 537 538 539 540 541

    cur_info = (const StgInfoTable *)
        cas((StgVolatilePtr)&caf->header.info,
            (StgWord)orig_info,
            (StgWord)&stg_WHITEHOLE_info);

    if (cur_info != orig_info) {
        // already claimed by another thread; re-enter the CAF
542
        return NULL;
Simon Marlow's avatar
Simon Marlow committed
543 544 545 546 547
    }

    // successfully claimed by us; overwrite with IND_STATIC
#endif

548 549 550 551 552 553 554 555
    // Push stuff that will become unreachable after updating to UpdRemSet to
    // maintain snapshot invariant
    const StgInfoTable *orig_info_tbl = INFO_PTR_TO_STRUCT(orig_info);
    // OSA: Assertions to make sure my understanding of static thunks is correct
    ASSERT(orig_info_tbl->type == THUNK_STATIC);
    // Secondly I think static thunks can't have payload: anything that they
    // reference should be in SRTs
    ASSERT(orig_info_tbl->layout.payload.ptrs == 0);
Gabor Greif's avatar
Gabor Greif committed
556
    // Because the payload is empty we just push the SRT
557
    IF_NONMOVING_WRITE_BARRIER_ENABLED {
558 559 560 561 562 563
        StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(orig_info_tbl);
        if (thunk_info->i.srt) {
            updateRemembSetPushClosure(cap, GET_SRT(thunk_info));
        }
    }

Simon Marlow's avatar
Simon Marlow committed
564
    // For the benefit of revertCAFs(), save the original info pointer
565
    caf->saved_info = orig_info;
Simon Marlow's avatar
Simon Marlow committed
566

567
    // Allocate the blackhole indirection closure
568 569 570 571 572 573 574 575 576 577
    if (RtsFlags.GcFlags.useNonmoving) {
        // See Note [Static objects under the nonmoving collector].
        ACQUIRE_SM_LOCK;
        bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
        RELEASE_SM_LOCK;
        recordMutableCap((StgClosure*)bh,
                         regTableToCapability(reg), oldest_gen->no);
    } else {
        bh = (StgInd *)allocate(cap, sizeofW(*bh));
    }
578
    bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
579
    SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
580

Ben Gamari's avatar
Ben Gamari committed
581 582 583 584
    // RELEASE ordering to ensure that above writes are visible before we
    // introduce reference as CAF indirectee.
    RELEASE_STORE(&caf->indirectee, (StgClosure *) bh);
    SET_INFO_RELEASE((StgClosure*)caf, &stg_IND_STATIC_info);
Simon Marlow's avatar
Simon Marlow committed
585

586
    return bh;
Simon Marlow's avatar
Simon Marlow committed
587 588
}

589 590
StgInd *
newCAF(StgRegTable *reg, StgIndStatic *caf)
Simon Marlow's avatar
Simon Marlow committed
591
{
592 593 594 595
    StgInd *bh;

    bh = lockCAF(reg, caf);
    if (!bh) return NULL;
Simon Marlow's avatar
Simon Marlow committed
596

GHC GitLab CI's avatar
GHC GitLab CI committed
597
    if(keepCAFs && !(highMemDynamic && (void*) caf > (void*) 0x80000000))
Simon Marlow's avatar
Simon Marlow committed
598
    {
599
        // Note [dyn_caf_list]
Simon Marlow's avatar
Simon Marlow committed
600
        // If we are in GHCi _and_ we are using dynamic libraries,
601 602
        // then we can't redirect newCAF calls to newRetainedCAF (see below),
        // so we make newCAF behave almost like newRetainedCAF.
Simon Marlow's avatar
Simon Marlow committed
603 604 605 606 607 608 609
        // The dynamic libraries might be used by both the interpreted
        // program and GHCi itself, so they must not be reverted.
        // This also means that in GHCi with dynamic libraries, CAFs are not
        // garbage collected. If this turns out to be a problem, we could
        // do another hack here and do an address range test on caf to figure
        // out whether it is from a dynamic library.

610 611
        ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
        caf->static_link = (StgClosure*)dyn_caf_list;
612
        dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
Simon Marlow's avatar
Simon Marlow committed
613 614 615 616 617
        RELEASE_SM_LOCK;
    }
    else
    {
        // Put this CAF on the mutable list for the old generation.
618 619 620
        // N.B. the nonmoving collector works a bit differently: see
        // Note [Static objects under the nonmoving collector].
        if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
621 622
            recordMutableCap((StgClosure*)caf,
                             regTableToCapability(reg), oldest_gen->no);
Simon Marlow's avatar
Simon Marlow committed
623
        }
624

Ben Gamari's avatar
Ben Gamari committed
625
#if defined(DEBUG)
626 627 628 629 630 631 632 633 634 635 636 637 638
        // In the DEBUG rts, we keep track of live CAFs by chaining them
        // onto a list debug_caf_list.  This is so that we can tell if we
        // ever enter a GC'd CAF, and emit a suitable barf().
        //
        // The saved_info field of the CAF is used as the link field for
        // debug_caf_list, because this field is only used by newDynCAF
        // for revertible CAFs, and we don't put those on the
        // debug_caf_list.
        ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
        ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
        debug_caf_list = (StgIndStatic*)caf;
        RELEASE_SM_LOCK;
#endif
Simon Marlow's avatar
Simon Marlow committed
639
    }
640

641
    return bh;
642 643
}

644 645 646 647 648 649 650
// External API for setting the keepCAFs flag. see #3900.
void
setKeepCAFs (void)
{
    keepCAFs = 1;
}

GHC GitLab CI's avatar
GHC GitLab CI committed
651 652 653 654 655 656
void
setHighMemDynamic (void)
{
    highMemDynamic = 1;
}

657
// An alternate version of newCAF which is used for dynamically loaded
658 659 660
// object code in GHCi.  In this case we want to retain *all* CAFs in
// the object code, because they might be demanded at any time from an
// expression evaluated on the command line.
661 662
// Also, GHCi might want to revert CAFs, so we add these to the
// revertible_caf_list.
663
//
664 665 666 667
// The linker hackily arranges that references to newCAF from dynamic
// code end up pointing to newRetainedCAF.
//
StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
668
{
669 670 671 672
    StgInd *bh;

    bh = lockCAF(reg, caf);
    if (!bh) return NULL;
Simon Marlow's avatar
Simon Marlow committed
673

674 675
    ACQUIRE_SM_LOCK;

676
    caf->static_link = (StgClosure*)revertible_caf_list;
677
    revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
678 679

    RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
680

681
    return bh;
682 683
}

684 685
// If we are using loadObj/unloadObj in the linker, then we want to
//
Ben Gamari's avatar
Ben Gamari committed
686
//  - retain all CAFs in statically linked code (keepCAFs == true),
687 688 689 690 691
//    because we might link a new object that uses any of these CAFs.
//
//  - GC CAFs in dynamically-linked code, so that we can detect when
//    a dynamically-linked object is unloadable.
//
Ben Gamari's avatar
Ben Gamari committed
692
// So for this case, we set keepCAFs to true, and link newCAF to newGCdCAF
693 694 695 696 697 698 699 700 701 702
// for dynamically-linked code.
//
StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
{
    StgInd *bh;

    bh = lockCAF(reg, caf);
    if (!bh) return NULL;

    // Put this CAF on the mutable list for the old generation.
703 704 705
    // N.B. the nonmoving collector works a bit differently:
    // see Note [Static objects under the nonmoving collector].
    if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
706 707 708 709 710 711 712
        recordMutableCap((StgClosure*)caf,
                         regTableToCapability(reg), oldest_gen->no);
    }

    return bh;
}

713 714 715 716
/* -----------------------------------------------------------------------------
   Nursery management.
   -------------------------------------------------------------------------- */

717
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
718
allocNursery (uint32_t node, bdescr *tail, W_ blocks)
719
{
Simon Marlow's avatar
Simon Marlow committed
720
    bdescr *bd = NULL;
Simon Marlow's avatar
Simon Marlow committed
721
    W_ i, n;
722 723 724 725 726 727 728 729

    // We allocate the nursery as a single contiguous block and then
    // divide it into single blocks manually.  This way we guarantee
    // that the nursery blocks are adjacent, so that the processor's
    // automatic prefetching works across nursery blocks.  This is a
    // tiny optimisation (~0.5%), but it's free.

    while (blocks > 0) {
730 731 732 733
        n = stg_min(BLOCKS_PER_MBLOCK, blocks);
        // allocLargeChunk will prefer large chunks, but will pick up
        // small chunks if there are any available.  We must allow
        // single blocks here to avoid fragmentation (#7257)
Simon Marlow's avatar
Simon Marlow committed
734
        bd = allocLargeChunkOnNode(node, 1, n);
735
        n = bd->blocks;
736 737 738 739 740 741 742 743 744 745
        blocks -= n;

        for (i = 0; i < n; i++) {
            initBdescr(&bd[i], g0, g0);

            bd[i].blocks = 1;
            bd[i].flags = 0;

            if (i > 0) {
                bd[i].u.back = &bd[i-1];
746 747
            } else {
                bd[i].u.back = NULL;
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
            }

            if (i+1 < n) {
                bd[i].link = &bd[i+1];
            } else {
                bd[i].link = tail;
                if (tail != NULL) {
                    tail->u.back = &bd[i];
                }
            }

            bd[i].free = bd[i].start;
        }

        tail = &bd[0];
763
    }
764 765

    return &bd[0];
766 767
}

768
STATIC_INLINE void
769
assignNurseryToCapability (Capability *cap, uint32_t n)
770
{
771
    ASSERT(n < n_nurseries);
772 773 774 775
    cap->r.rNursery = &nurseries[n];
    cap->r.rCurrentNursery = nurseries[n].blocks;
    newNurseryBlock(nurseries[n].blocks);
    cap->r.rCurrentAlloc   = NULL;
Simon Marlow's avatar
Simon Marlow committed
776
    ASSERT(cap->r.rCurrentNursery->node == cap->node);
777 778 779 780 781 782
}

/*
 * Give each Capability a nursery from the pool. No need to do atomic increments
 * here, everything must be stopped to call this function.
 */
783
static void
784
assignNurseriesToCapabilities (uint32_t from, uint32_t to)
785
{
Simon Marlow's avatar
Simon Marlow committed
786
    uint32_t i, node;
sof's avatar
sof committed
787

788
    for (i = from; i < to; i++) {
Simon Marlow's avatar
Simon Marlow committed
789 790
        node = capabilities[i]->node;
        assignNurseryToCapability(capabilities[i], next_nursery[node]);
Simon Marlow's avatar
Simon Marlow committed
791
        next_nursery[node] += n_numa_nodes;
792
    }
793
}
794

Simon Marlow's avatar
Simon Marlow committed
795
static void
796
allocNurseries (uint32_t from, uint32_t to)
Simon Marlow's avatar
Simon Marlow committed
797
{
798
    uint32_t i;
799 800 801 802 803 804 805
    memcount n_blocks;

    if (RtsFlags.GcFlags.nurseryChunkSize) {
        n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
    } else {
        n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
    }
806

807
    for (i = from; i < to; i++) {
Simon Marlow's avatar
Simon Marlow committed
808
        nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
809
        nurseries[i].n_blocks = n_blocks;
810 811
    }
}
Simon Marlow's avatar
Simon Marlow committed
812

813
void
814
resetNurseries (void)
815
{
Simon Marlow's avatar
Simon Marlow committed
816 817
    uint32_t n;

Simon Marlow's avatar
Simon Marlow committed
818
    for (n = 0; n < n_numa_nodes; n++) {
Simon Marlow's avatar
Simon Marlow committed
819 820
        next_nursery[n] = n;
    }
821 822
    assignNurseriesToCapabilities(0, n_capabilities);

Ben Gamari's avatar
Ben Gamari committed
823
#if defined(DEBUG)
824
    bdescr *bd;
825 826 827 828
    for (n = 0; n < n_nurseries; n++) {
        for (bd = nurseries[n].blocks; bd; bd = bd->link) {
            ASSERT(bd->gen_no == 0);
            ASSERT(bd->gen == g0);
Simon Marlow's avatar
Simon Marlow committed
829
            ASSERT(bd->node == capNoToNumaNode(n));
Tobias Guggenmos's avatar
Tobias Guggenmos committed
830
            IF_DEBUG(zero_on_gc, memset(bd->start, 0xaa, BLOCK_SIZE));
831
        }
832
    }
Simon Marlow's avatar
Simon Marlow committed
833
#endif
834 835
}

836
W_
837
countNurseryBlocks (void)
838
{
839
    uint32_t i;
840
    W_ blocks = 0;
841

842
    for (i = 0; i < n_nurseries; i++) {
Gabor Greif's avatar
Gabor Greif committed
843
        blocks += nurseries[i].n_blocks;
844
    }
845
    return blocks;
846 847
}

Simon Marlow's avatar
Simon Marlow committed
848
//
849 850
// Resize each of the nurseries to the specified size.
//
851 852
static void
resizeNurseriesEach (W_ blocks)
853
{
Simon Marlow's avatar
Simon Marlow committed
854 855 856 857
    uint32_t i, node;
    bdescr *bd;
    W_ nursery_blocks;
    nursery *nursery;
858 859

    for (i = 0; i < n_nurseries; i++) {
Simon Marlow's avatar
Simon Marlow committed
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
        nursery = &nurseries[i];
        nursery_blocks = nursery->n_blocks;
        if (nursery_blocks == blocks) continue;

        node = capNoToNumaNode(i);
        if (nursery_blocks < blocks) {
            debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
                       blocks);
            nursery->blocks = allocNursery(node, nursery->blocks,
                                           blocks-nursery_blocks);
        }
        else
        {
            bdescr *next_bd;

            debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
                       blocks);

            bd = nursery->blocks;
            while (nursery_blocks > blocks) {
                next_bd = bd->link;
                next_bd->u.back = NULL;
                nursery_blocks -= bd->blocks; // might be a large block
                freeGroup(bd);
                bd = next_bd;
            }
            nursery->blocks = bd;
            // might have gone just under, by freeing a large block, so make
            // up the difference.
            if (nursery_blocks < blocks) {
                nursery->blocks = allocNursery(node, nursery->blocks,
                                               blocks-nursery_blocks);
            }
        }
        nursery->n_blocks = blocks;
        ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
896
    }
897 898
}

899 900 901
void
resizeNurseriesFixed (void)
{
902
    uint32_t blocks;
903 904 905 906 907 908 909 910 911 912

    if (RtsFlags.GcFlags.nurseryChunkSize) {
        blocks = RtsFlags.GcFlags.nurseryChunkSize;
    } else {
        blocks = RtsFlags.GcFlags.minAllocAreaSize;
    }

    resizeNurseriesEach(blocks);
}

Simon Marlow's avatar
Simon Marlow committed
913
//
914 915 916
// Resize the nurseries to the total specified size.
//
void
Simon Marlow's avatar
Simon Marlow committed
917
resizeNurseries (W_ blocks)
918 919 920
{
    // If there are multiple nurseries, then we just divide the number
    // of available blocks between them.
921
    resizeNurseriesEach(blocks / n_nurseries);
922 923
}

Ben Gamari's avatar
Ben Gamari committed
924
bool
925 926
getNewNursery (Capability *cap)
{
927
    StgWord i;
Simon Marlow's avatar
Simon Marlow committed
928 929
    uint32_t node = cap->node;
    uint32_t n;
930 931

    for(;;) {
Simon Marlow's avatar
Simon Marlow committed
932 933
        i = next_nursery[node];
        if (i < n_nurseries) {
Simon Marlow's avatar
Simon Marlow committed
934
            if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) {
Simon Marlow's avatar
Simon Marlow committed
935
                assignNurseryToCapability(cap, i);
Ben Gamari's avatar
Ben Gamari committed
936
                return true;
Simon Marlow's avatar
Simon Marlow committed
937
            }
Simon Marlow's avatar
Simon Marlow committed
938
        } else if (n_numa_nodes > 1) {