Storage.c 55.5 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
Gabor Greif's avatar
Gabor Greif committed
3
 * (c) The GHC Team, 1998-2012
4
5
6
 *
 * Storage manager front end
 *
7
8
 * Documentation on the architecture of the Storage Manager can be
 * found in the online commentary:
Simon Marlow's avatar
Simon Marlow committed
9
 *
10
 *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage
11
 *
12
13
 * ---------------------------------------------------------------------------*/

14
#include "PosixSource.h"
15
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
16
17

#include "Storage.h"
Simon Marlow's avatar
Simon Marlow committed
18
#include "GCThread.h"
19
20
21
22
#include "RtsUtils.h"
#include "Stats.h"
#include "BlockAlloc.h"
#include "Weak.h"
23
#include "Sanity.h"
24
#include "Arena.h"
25
#include "Capability.h"
26
#include "Schedule.h"
Gabor Greif's avatar
Gabor Greif committed
27
#include "RetainerProfile.h"        // for counting memory blocks (memInventory)
28
#include "OSMem.h"
Simon Marlow's avatar
Simon Marlow committed
29
#include "Trace.h"
30
#include "GC.h"
31
#include "Evac.h"
32
#include "NonMoving.h"
33
34
35
#if defined(ios_HOST_OS)
#include "Hash.h"
#endif
36

37
38
#include <string.h>

39
40
#include "ffi.h"

Simon Marlow's avatar
Simon Marlow committed
41
/*
42
 * All these globals require sm_mutex to access in THREADED_RTS mode.
43
 */
44
45
46
StgIndStatic  *dyn_caf_list        = NULL;
StgIndStatic  *debug_caf_list      = NULL;
StgIndStatic  *revertible_caf_list = NULL;
Ben Gamari's avatar
Ben Gamari committed
47
bool           keepCAFs;
48

Simon Marlow's avatar
Simon Marlow committed
49
50
W_ large_alloc_lim;    /* GC if n_large_blocks in any nursery
                        * reaches this. */
51

Simon Marlow's avatar
Simon Marlow committed
52
bdescr *exec_block;
53

Gabor Greif's avatar
Gabor Greif committed
54
55
generation *generations = NULL; /* all the generations */
generation *g0          = NULL; /* generation 0, for convenience */
56
generation *oldest_gen  = NULL; /* oldest generation, for convenience */
57

Simon Marlow's avatar
Simon Marlow committed
58
59
60
/*
 * Array of nurseries, size == n_capabilities
 *
Simon Marlow's avatar
Simon Marlow committed
61
 * nursery[i] belongs to NUMA node (i % n_numa_nodes)
Simon Marlow's avatar
Simon Marlow committed
62
63
64
65
66
 * This is chosen to be the same convention as capabilities[i], so
 * that when not using nursery chunks (+RTS -n), we just map
 * capabilities to nurseries 1:1.
 */
nursery *nurseries = NULL;
67
uint32_t n_nurseries;
Simon Marlow's avatar
Simon Marlow committed
68
69
70
71
72
73

/*
 * When we are using nursery chunks, we need a separate next_nursery
 * pointer for each NUMA node.
 */
volatile StgWord next_nursery[MAX_NUMA_NODES];
74

Ben Gamari's avatar
Ben Gamari committed
75
#if defined(THREADED_RTS)
76
77
78
79
/*
 * Storage manager mutex:  protects all the above state from
 * simultaneous access by two STG threads.
 */
80
Mutex sm_mutex;
81
82
#endif

83
84
static void allocNurseries (uint32_t from, uint32_t to);
static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
85

86
void
Simon Marlow's avatar
Simon Marlow committed
87
initGeneration (generation *gen, int g)
88
{
Simon Marlow's avatar
Simon Marlow committed
89
90
91
92
93
94
95
96
97
98
99
100
101
    gen->no = g;
    gen->collections = 0;
    gen->par_collections = 0;
    gen->failed_promotions = 0;
    gen->max_blocks = 0;
    gen->blocks = NULL;
    gen->n_blocks = 0;
    gen->n_words = 0;
    gen->live_estimate = 0;
    gen->old_blocks = NULL;
    gen->n_old_blocks = 0;
    gen->large_objects = NULL;
    gen->n_large_blocks = 0;
102
    gen->n_large_words = 0;
103
    gen->n_new_large_words = 0;
gcampax's avatar
gcampax committed
104
105
    gen->compact_objects = NULL;
    gen->n_compact_blocks = 0;
106
107
    gen->compact_blocks_in_import = NULL;
    gen->n_compact_blocks_in_import = 0;
Simon Marlow's avatar
Simon Marlow committed
108
109
    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;
gcampax's avatar
gcampax committed
110
111
    gen->live_compact_objects = NULL;
    gen->n_live_compact_blocks = 0;
112
113
    gen->compact_blocks_in_import = NULL;
    gen->n_compact_blocks_in_import = 0;
Simon Marlow's avatar
Simon Marlow committed
114
115
116
    gen->mark = 0;
    gen->compact = 0;
    gen->bitmap = NULL;
Ben Gamari's avatar
Ben Gamari committed
117
#if defined(THREADED_RTS)
Simon Marlow's avatar
Simon Marlow committed
118
    initSpinLock(&gen->sync);
119
#endif
Simon Marlow's avatar
Simon Marlow committed
120
121
    gen->threads = END_TSO_QUEUE;
    gen->old_threads = END_TSO_QUEUE;
122
123
    gen->weak_ptr_list = NULL;
    gen->old_weak_ptr_list = NULL;
124
125
}

126
void
Gabor Greif's avatar
Gabor Greif committed
127
initStorage (void)
128
{
Simon Marlow's avatar
Simon Marlow committed
129
  uint32_t g, n;
130

131
132
133
134
135
  if (generations != NULL) {
      // multi-init protection
      return;
  }

136
137
  initMBlocks();

sof's avatar
sof committed
138
139
140
  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
   * doing something reasonable.
   */
Ian Lynagh's avatar
Ian Lynagh committed
141
  /* We use the NOT_NULL variant or gcc warns that the test is always true */
142
  ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
143
144
  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
Simon Marlow's avatar
Simon Marlow committed
145

146
  initBlockAllocator();
Simon Marlow's avatar
Simon Marlow committed
147

148
#if defined(THREADED_RTS)
sof's avatar
sof committed
149
  initMutex(&sm_mutex);
sof's avatar
sof committed
150
151
#endif

152
153
  ACQUIRE_SM_LOCK;

154
  /* allocate generation info array */
Simon Marlow's avatar
Simon Marlow committed
155
  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
Gabor Greif's avatar
Gabor Greif committed
156
157
                                             * sizeof(struct generation_),
                                             "initStorage: gens");
158

159
  /* Initialise all generations */
160
  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
161
      initGeneration(&generations[g], g);
162
163
  }

164
165
166
167
168
169
  /* A couple of convenience pointers */
  g0 = &generations[0];
  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];

  /* Set up the destination pointers in each younger gen. step */
  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
Simon Marlow's avatar
Simon Marlow committed
170
      generations[g].to = &generations[g+1];
171
  }
Simon Marlow's avatar
Simon Marlow committed
172
  oldest_gen->to = oldest_gen;
Simon Marlow's avatar
Simon Marlow committed
173

174
175
176
177
178
179
180
181
182
183
184
185
  // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
  nonmovingInit();

#if defined(THREADED_RTS)
  // nonmovingAddCapabilities allocates segments, which requires taking the gc
  // sync lock, so initialize it before nonmovingAddCapabilities
  initSpinLock(&gc_alloc_block_sync);
#endif

  if (RtsFlags.GcFlags.useNonmoving)
      nonmovingAddCapabilities(n_capabilities);

186
  /* The oldest generation has one step. */
187
  if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
188
      if (RtsFlags.GcFlags.generations == 1) {
Gabor Greif's avatar
Gabor Greif committed
189
          errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
190
      } else {
Gabor Greif's avatar
Gabor Greif committed
191
          oldest_gen->mark = 1;
192
          if (RtsFlags.GcFlags.compact)
Simon Marlow's avatar
Simon Marlow committed
193
              oldest_gen->compact = 1;
194
      }
195
  }
196

197
198
  generations[0].max_blocks = 0;

199
200
201
  dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
  debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
  revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
Simon Marlow's avatar
Simon Marlow committed
202

Simon Marlow's avatar
Simon Marlow committed
203
204
205
206
207
  if (RtsFlags.GcFlags.largeAllocLim > 0) {
      large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
  } else {
      large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
  }
208

209
210
  exec_block = NULL;

211
212
  N = 0;

Simon Marlow's avatar
Simon Marlow committed
213
  for (n = 0; n < n_numa_nodes; n++) {
Simon Marlow's avatar
Simon Marlow committed
214
215
      next_nursery[n] = n;
  }
216
  storageAddCapabilities(0, n_capabilities);
217

218
  IF_DEBUG(gc, statDescribeGens());
219
220

  RELEASE_SM_LOCK;
221
222
223

  traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
                     RtsFlags.GcFlags.generations,
224
225
226
227
                     RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
                     RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
                     MBLOCK_SIZE,
                     BLOCK_SIZE);
228
229
}

230
void storageAddCapabilities (uint32_t from, uint32_t to)
231
{
232
    uint32_t n, g, i, new_n_nurseries;
233
    nursery *old_nurseries;
234
235
236
237
238
239
240
241

    if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
        new_n_nurseries = to;
    } else {
        memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
        new_n_nurseries =
            stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
    }
242

243
    old_nurseries = nurseries;
244
    if (from > 0) {
245
246
        nurseries = stgReallocBytes(nurseries,
                                    new_n_nurseries * sizeof(struct nursery_),
247
248
                                    "storageAddCapabilities");
    } else {
249
        nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
Gabor Greif's avatar
Gabor Greif committed
250
                                   "storageAddCapabilities");
251
252
    }

253
254
    // we've moved the nurseries, so we have to update the rNursery
    // pointers from the Capabilities.
255
256
257
    for (i = 0; i < from; i++) {
        uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
        capabilities[i]->r.rNursery = &nurseries[index];
258
259
    }

260
261
262
263
264
265
    /* The allocation area.  Policy: keep the allocation area
     * small to begin with, even if we have a large suggested heap
     * size.  Reason: we're going to do a major collection first, and we
     * don't want it to be a big one.  This vague idea is borne out by
     * rigorous experimental evidence.
     */
266
267
268
269
270
271
272
273
274
    allocNurseries(n_nurseries, new_n_nurseries);
    n_nurseries = new_n_nurseries;

    /*
     * Assign each of the new capabilities a nursery.  Remember to start from
     * next_nursery, because we may have already consumed some of the earlier
     * nurseries.
     */
    assignNurseriesToCapabilities(from,to);
275
276
277
278

    // allocate a block for each mut list
    for (n = from; n < to; n++) {
        for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
279
280
            capabilities[n]->mut_lists[g] =
                allocBlockOnNode(capNoToNumaNode(n));
281
282
283
        }
    }

284
285
286
287
288
289
290
291
    // Initialize NonmovingAllocators and UpdRemSets
    if (RtsFlags.GcFlags.useNonmoving) {
        nonmovingAddCapabilities(to);
        for (i = 0; i < to; ++i) {
            init_upd_rem_set(&capabilities[i]->upd_rem_set);
        }
    }

292
#if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
293
294
295
    newThreadLocalKey(&gctKey);
#endif

296
297
298
299
    initGcThreads(from, to);
}


300
301
302
void
exitStorage (void)
{
303
    nonmovingExit();
304
305
    updateNurseriesStats();
    stat_exit();
Simon Marlow's avatar
Simon Marlow committed
306
307
308
}

void
Ben Gamari's avatar
Ben Gamari committed
309
freeStorage (bool free_heap)
Simon Marlow's avatar
Simon Marlow committed
310
{
311
    stgFree(generations);
312
    if (free_heap) freeAllMBlocks();
313
314
315
#if defined(THREADED_RTS)
    closeMutex(&sm_mutex);
#endif
316
    stgFree(nurseries);
317
#if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
318
319
    freeThreadLocalKey(&gctKey);
#endif
320
    freeGcThreads();
321
322
}

323
/* -----------------------------------------------------------------------------
324
325
   Note [CAF management]
   ~~~~~~~~~~~~~~~~~~~~~
326
327

   The entry code for every CAF does the following:
Simon Marlow's avatar
Simon Marlow committed
328

329
      - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
330
        updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
Simon Marlow's avatar
Simon Marlow committed
331

332
      - if newCAF returns zero, it re-enters the CAF (see Note [atomic
Simon Marlow's avatar
Simon Marlow committed
333
334
335
336
        CAF entry])

      - pushes an update frame pointing to the CAF_BLACKHOLE

Gabor Greif's avatar
Gabor Greif committed
337
   Why do we build a BLACKHOLE in the heap rather than just updating
338
   the thunk directly?  It's so that we only need one kind of update
Simon Marlow's avatar
Simon Marlow committed
339
340
341
   frame - otherwise we'd need a static version of the update frame
   too, and various other parts of the RTS that deal with update
   frames would also need special cases for static update frames.
342

343
   newCAF() does the following:
344
345
346
347
348

      - atomically locks the CAF (see [atomic CAF entry])

      - it builds a CAF_BLACKHOLE on the heap

Simon Marlow's avatar
Simon Marlow committed
349
350
351
      - it updates the CAF with an IND_STATIC pointing to the
        CAF_BLACKHOLE, atomically.

352
353
      - it puts the CAF on the oldest generation's mutable list.
        This is so that we treat the CAF as a root when collecting
Gabor Greif's avatar
Gabor Greif committed
354
        younger generations.
355

356
357
      - links the CAF onto the CAF list (see below)

Simon Marlow's avatar
Simon Marlow committed
358
359
   ------------------
   Note [atomic CAF entry]
360
   ~~~~~~~~~~~~~~~~~~~~~~~
Simon Marlow's avatar
Simon Marlow committed
361

362
   With THREADED_RTS, newCAF() is required to be atomic (see
Simon Marlow's avatar
Simon Marlow committed
363
364
365
366
367
368
369
370
371
372
373
   #5558). This is because if two threads happened to enter the same
   CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
   and so the normal threadPaused() machinery for detecting duplicate
   evaluation will not detect this.  Hence in lockCAF() below, we
   atomically lock the CAF with WHITEHOLE before updating it with
   IND_STATIC, and return zero if another thread locked the CAF first.
   In the event that we lost the race, CAF entry code will re-enter
   the CAF and block on the other thread's CAF_BLACKHOLE.

   ------------------
   Note [GHCi CAFs]
374
   ~~~~~~~~~~~~~~~~
Simon Marlow's avatar
Simon Marlow committed
375

376
377
378
379
380
381
382
   For GHCI, we have additional requirements when dealing with CAFs:

      - we must *retain* all dynamically-loaded CAFs ever entered,
        just in case we need them again.
      - we must be able to *revert* CAFs that have been evaluated, to
        their pre-evaluated form.

383
      To do this, we use an additional CAF list.  When newCAF() is
384
385
386
387
388
389
390
391
392
393
      called on a dynamically-loaded CAF, we add it to the CAF list
      instead of the old-generation mutable list, and save away its
      old info pointer (in caf->saved_info) for later reversion.

      To revert all the CAFs, we traverse the CAF list and reset the
      info pointer to caf->saved_info, then throw away the CAF list.
      (see GC.c:revertCAFs()).

      -- SDM 29/1/01

394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
   ------------------
   Note [Static objects under the nonmoving collector]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   Static object management is a bit tricky under the nonmoving collector as we
   need to maintain a bit more state than in the moving collector. In
   particular, the moving collector uses the low bits of the STATIC_LINK field
   to determine whether the object has been moved to the scavenger's work list
   (see Note [STATIC_LINK fields] in Storage.h).

   However, the nonmoving collector also needs a place to keep its mark bit.
   This is problematic as we therefore need at least three bits of state
   but can assume only two bits are available in STATIC_LINK (due to 32-bit
   systems).

Brian Wignall's avatar
Brian Wignall committed
409
   To accommodate this we move handling of static objects entirely to the
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
   oldest generation when the nonmoving collector is in use. To do this safely
   and efficiently we allocate the blackhole created by lockCAF() directly in
   the non-moving heap. This means that the moving collector can completely
   ignore static objects in minor collections since they are guaranteed not to
   have any references into the moving heap. Of course, the blackhole itself
   likely will contain a reference into the moving heap but this is
   significantly easier to handle, being a heap-allocated object (see Note
   [Aging under the non-moving collector] in NonMoving.c for details).

   During the moving phase of a major collection we treat static objects
   as we do any other reference into the non-moving heap by pushing them
   to the non-moving mark queue (see Note [Aging under the non-moving
   collector]).

   This allows the non-moving collector to have full control over the flags
   in STATIC_LINK, which it uses as described in Note [STATIC_LINK fields]).
   This is implemented by NonMovingMark.c:bump_static_flag.

   In short, the plan is:

     - lockCAF allocates its blackhole in the nonmoving heap. This is important
       to ensure that we do not need to place the static object on the mut_list
       lest we would need somw way to ensure that it evacuate only once during
       a moving collection.

     - evacuate_static_object adds merely pushes objects to the mark queue

     - the nonmoving collector uses the flags in STATIC_LINK as its mark bit.

439
440
   -------------------------------------------------------------------------- */

441
442
STATIC_INLINE StgInd *
lockCAF (StgRegTable *reg, StgIndStatic *caf)
443
{
Simon Marlow's avatar
Simon Marlow committed
444
    const StgInfoTable *orig_info;
445
446
    Capability *cap = regTableToCapability(reg);
    StgInd *bh;
Simon Marlow's avatar
Simon Marlow committed
447
448
449

    orig_info = caf->header.info;

Ben Gamari's avatar
Ben Gamari committed
450
#if defined(THREADED_RTS)
Simon Marlow's avatar
Simon Marlow committed
451
452
453
454
455
    const StgInfoTable *cur_info;

    if (orig_info == &stg_IND_STATIC_info ||
        orig_info == &stg_WHITEHOLE_info) {
        // already claimed by another thread; re-enter the CAF
456
        return NULL;
Simon Marlow's avatar
Simon Marlow committed
457
    }
Simon Marlow's avatar
Simon Marlow committed
458
459
460
461
462
463
464
465

    cur_info = (const StgInfoTable *)
        cas((StgVolatilePtr)&caf->header.info,
            (StgWord)orig_info,
            (StgWord)&stg_WHITEHOLE_info);

    if (cur_info != orig_info) {
        // already claimed by another thread; re-enter the CAF
466
        return NULL;
Simon Marlow's avatar
Simon Marlow committed
467
468
469
470
471
    }

    // successfully claimed by us; overwrite with IND_STATIC
#endif

472
473
474
475
476
477
478
479
    // Push stuff that will become unreachable after updating to UpdRemSet to
    // maintain snapshot invariant
    const StgInfoTable *orig_info_tbl = INFO_PTR_TO_STRUCT(orig_info);
    // OSA: Assertions to make sure my understanding of static thunks is correct
    ASSERT(orig_info_tbl->type == THUNK_STATIC);
    // Secondly I think static thunks can't have payload: anything that they
    // reference should be in SRTs
    ASSERT(orig_info_tbl->layout.payload.ptrs == 0);
Gabor Greif's avatar
Gabor Greif committed
480
    // Because the payload is empty we just push the SRT
481
    IF_NONMOVING_WRITE_BARRIER_ENABLED {
482
483
484
485
486
487
        StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(orig_info_tbl);
        if (thunk_info->i.srt) {
            updateRemembSetPushClosure(cap, GET_SRT(thunk_info));
        }
    }

Simon Marlow's avatar
Simon Marlow committed
488
    // For the benefit of revertCAFs(), save the original info pointer
489
    caf->saved_info = orig_info;
Simon Marlow's avatar
Simon Marlow committed
490

491
    // Allocate the blackhole indirection closure
492
493
494
495
496
497
498
499
500
501
    if (RtsFlags.GcFlags.useNonmoving) {
        // See Note [Static objects under the nonmoving collector].
        ACQUIRE_SM_LOCK;
        bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
        RELEASE_SM_LOCK;
        recordMutableCap((StgClosure*)bh,
                         regTableToCapability(reg), oldest_gen->no);
    } else {
        bh = (StgInd *)allocate(cap, sizeofW(*bh));
    }
502
    bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
503
504
505
    SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
    // Ensure that above writes are visible before we introduce reference as CAF indirectee.
    write_barrier();
506
507

    caf->indirectee = (StgClosure *)bh;
Simon Marlow's avatar
Simon Marlow committed
508
    write_barrier();
509
    SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
Simon Marlow's avatar
Simon Marlow committed
510

511
    return bh;
Simon Marlow's avatar
Simon Marlow committed
512
513
}

514
515
StgInd *
newCAF(StgRegTable *reg, StgIndStatic *caf)
Simon Marlow's avatar
Simon Marlow committed
516
{
517
518
519
520
    StgInd *bh;

    bh = lockCAF(reg, caf);
    if (!bh) return NULL;
Simon Marlow's avatar
Simon Marlow committed
521
522
523

    if(keepCAFs)
    {
524
        // Note [dyn_caf_list]
Simon Marlow's avatar
Simon Marlow committed
525
        // If we are in GHCi _and_ we are using dynamic libraries,
526
527
        // then we can't redirect newCAF calls to newRetainedCAF (see below),
        // so we make newCAF behave almost like newRetainedCAF.
Simon Marlow's avatar
Simon Marlow committed
528
529
530
531
532
533
534
        // The dynamic libraries might be used by both the interpreted
        // program and GHCi itself, so they must not be reverted.
        // This also means that in GHCi with dynamic libraries, CAFs are not
        // garbage collected. If this turns out to be a problem, we could
        // do another hack here and do an address range test on caf to figure
        // out whether it is from a dynamic library.

535
536
        ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
        caf->static_link = (StgClosure*)dyn_caf_list;
537
        dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
Simon Marlow's avatar
Simon Marlow committed
538
539
540
541
542
        RELEASE_SM_LOCK;
    }
    else
    {
        // Put this CAF on the mutable list for the old generation.
543
544
545
        // N.B. the nonmoving collector works a bit differently: see
        // Note [Static objects under the nonmoving collector].
        if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
546
547
            recordMutableCap((StgClosure*)caf,
                             regTableToCapability(reg), oldest_gen->no);
Simon Marlow's avatar
Simon Marlow committed
548
        }
549

Ben Gamari's avatar
Ben Gamari committed
550
#if defined(DEBUG)
551
552
553
554
555
556
557
558
559
560
561
562
563
        // In the DEBUG rts, we keep track of live CAFs by chaining them
        // onto a list debug_caf_list.  This is so that we can tell if we
        // ever enter a GC'd CAF, and emit a suitable barf().
        //
        // The saved_info field of the CAF is used as the link field for
        // debug_caf_list, because this field is only used by newDynCAF
        // for revertible CAFs, and we don't put those on the
        // debug_caf_list.
        ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
        ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
        debug_caf_list = (StgIndStatic*)caf;
        RELEASE_SM_LOCK;
#endif
Simon Marlow's avatar
Simon Marlow committed
564
    }
565

566
    return bh;
567
568
}

569
570
571
572
573
574
575
// External API for setting the keepCAFs flag. see #3900.
void
setKeepCAFs (void)
{
    keepCAFs = 1;
}

576
// An alternate version of newCAF which is used for dynamically loaded
577
578
579
// object code in GHCi.  In this case we want to retain *all* CAFs in
// the object code, because they might be demanded at any time from an
// expression evaluated on the command line.
580
581
// Also, GHCi might want to revert CAFs, so we add these to the
// revertible_caf_list.
582
//
583
584
585
586
// The linker hackily arranges that references to newCAF from dynamic
// code end up pointing to newRetainedCAF.
//
StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
587
{
588
589
590
591
    StgInd *bh;

    bh = lockCAF(reg, caf);
    if (!bh) return NULL;
Simon Marlow's avatar
Simon Marlow committed
592

593
594
    ACQUIRE_SM_LOCK;

595
    caf->static_link = (StgClosure*)revertible_caf_list;
596
    revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
597
598

    RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
599

600
    return bh;
601
602
}

603
604
// If we are using loadObj/unloadObj in the linker, then we want to
//
Ben Gamari's avatar
Ben Gamari committed
605
//  - retain all CAFs in statically linked code (keepCAFs == true),
606
607
608
609
610
//    because we might link a new object that uses any of these CAFs.
//
//  - GC CAFs in dynamically-linked code, so that we can detect when
//    a dynamically-linked object is unloadable.
//
Ben Gamari's avatar
Ben Gamari committed
611
// So for this case, we set keepCAFs to true, and link newCAF to newGCdCAF
612
613
614
615
616
617
618
619
620
621
// for dynamically-linked code.
//
StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
{
    StgInd *bh;

    bh = lockCAF(reg, caf);
    if (!bh) return NULL;

    // Put this CAF on the mutable list for the old generation.
622
623
624
    // N.B. the nonmoving collector works a bit differently:
    // see Note [Static objects under the nonmoving collector].
    if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
625
626
627
628
629
630
631
        recordMutableCap((StgClosure*)caf,
                         regTableToCapability(reg), oldest_gen->no);
    }

    return bh;
}

632
633
634
635
/* -----------------------------------------------------------------------------
   Nursery management.
   -------------------------------------------------------------------------- */

636
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
637
allocNursery (uint32_t node, bdescr *tail, W_ blocks)
638
{
Simon Marlow's avatar
Simon Marlow committed
639
    bdescr *bd = NULL;
Simon Marlow's avatar
Simon Marlow committed
640
    W_ i, n;
641
642
643
644
645
646
647
648

    // We allocate the nursery as a single contiguous block and then
    // divide it into single blocks manually.  This way we guarantee
    // that the nursery blocks are adjacent, so that the processor's
    // automatic prefetching works across nursery blocks.  This is a
    // tiny optimisation (~0.5%), but it's free.

    while (blocks > 0) {
649
650
651
652
        n = stg_min(BLOCKS_PER_MBLOCK, blocks);
        // allocLargeChunk will prefer large chunks, but will pick up
        // small chunks if there are any available.  We must allow
        // single blocks here to avoid fragmentation (#7257)
Simon Marlow's avatar
Simon Marlow committed
653
        bd = allocLargeChunkOnNode(node, 1, n);
654
        n = bd->blocks;
655
656
657
658
659
660
661
662
663
664
        blocks -= n;

        for (i = 0; i < n; i++) {
            initBdescr(&bd[i], g0, g0);

            bd[i].blocks = 1;
            bd[i].flags = 0;

            if (i > 0) {
                bd[i].u.back = &bd[i-1];
665
666
            } else {
                bd[i].u.back = NULL;
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
            }

            if (i+1 < n) {
                bd[i].link = &bd[i+1];
            } else {
                bd[i].link = tail;
                if (tail != NULL) {
                    tail->u.back = &bd[i];
                }
            }

            bd[i].free = bd[i].start;
        }

        tail = &bd[0];
682
    }
683
684

    return &bd[0];
685
686
}

687
STATIC_INLINE void
688
assignNurseryToCapability (Capability *cap, uint32_t n)
689
{
690
    ASSERT(n < n_nurseries);
691
692
693
694
    cap->r.rNursery = &nurseries[n];
    cap->r.rCurrentNursery = nurseries[n].blocks;
    newNurseryBlock(nurseries[n].blocks);
    cap->r.rCurrentAlloc   = NULL;
Simon Marlow's avatar
Simon Marlow committed
695
    ASSERT(cap->r.rCurrentNursery->node == cap->node);
696
697
698
699
700
701
}

/*
 * Give each Capability a nursery from the pool. No need to do atomic increments
 * here, everything must be stopped to call this function.
 */
702
static void
703
assignNurseriesToCapabilities (uint32_t from, uint32_t to)
704
{
Simon Marlow's avatar
Simon Marlow committed
705
    uint32_t i, node;
sof's avatar
sof committed
706

707
    for (i = from; i < to; i++) {
Simon Marlow's avatar
Simon Marlow committed
708
709
        node = capabilities[i]->node;
        assignNurseryToCapability(capabilities[i], next_nursery[node]);
Simon Marlow's avatar
Simon Marlow committed
710
        next_nursery[node] += n_numa_nodes;
711
    }
712
}
713

Simon Marlow's avatar
Simon Marlow committed
714
static void
715
allocNurseries (uint32_t from, uint32_t to)
Simon Marlow's avatar
Simon Marlow committed
716
{
717
    uint32_t i;
718
719
720
721
722
723
724
    memcount n_blocks;

    if (RtsFlags.GcFlags.nurseryChunkSize) {
        n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
    } else {
        n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
    }
725

726
    for (i = from; i < to; i++) {
Simon Marlow's avatar
Simon Marlow committed
727
        nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
728
        nurseries[i].n_blocks = n_blocks;
729
730
    }
}
Simon Marlow's avatar
Simon Marlow committed
731

732
void
733
resetNurseries (void)
734
{
Simon Marlow's avatar
Simon Marlow committed
735
736
    uint32_t n;

Simon Marlow's avatar
Simon Marlow committed
737
    for (n = 0; n < n_numa_nodes; n++) {
Simon Marlow's avatar
Simon Marlow committed
738
739
        next_nursery[n] = n;
    }
740
741
    assignNurseriesToCapabilities(0, n_capabilities);

Ben Gamari's avatar
Ben Gamari committed
742
#if defined(DEBUG)
743
    bdescr *bd;
744
745
746
747
    for (n = 0; n < n_nurseries; n++) {
        for (bd = nurseries[n].blocks; bd; bd = bd->link) {
            ASSERT(bd->gen_no == 0);
            ASSERT(bd->gen == g0);
Simon Marlow's avatar
Simon Marlow committed
748
            ASSERT(bd->node == capNoToNumaNode(n));
Tobias Guggenmos's avatar
Tobias Guggenmos committed
749
            IF_DEBUG(zero_on_gc, memset(bd->start, 0xaa, BLOCK_SIZE));
750
        }
751
    }
Simon Marlow's avatar
Simon Marlow committed
752
#endif
753
754
}

755
W_
756
countNurseryBlocks (void)
757
{
758
    uint32_t i;
759
    W_ blocks = 0;
760

761
    for (i = 0; i < n_nurseries; i++) {
Gabor Greif's avatar
Gabor Greif committed
762
        blocks += nurseries[i].n_blocks;
763
    }
764
    return blocks;
765
766
}

Simon Marlow's avatar
Simon Marlow committed
767
//
768
769
// Resize each of the nurseries to the specified size.
//
770
771
static void
resizeNurseriesEach (W_ blocks)
772
{
Simon Marlow's avatar
Simon Marlow committed
773
774
775
776
    uint32_t i, node;
    bdescr *bd;
    W_ nursery_blocks;
    nursery *nursery;
777
778

    for (i = 0; i < n_nurseries; i++) {
Simon Marlow's avatar
Simon Marlow committed
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
        nursery = &nurseries[i];
        nursery_blocks = nursery->n_blocks;
        if (nursery_blocks == blocks) continue;

        node = capNoToNumaNode(i);
        if (nursery_blocks < blocks) {
            debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
                       blocks);
            nursery->blocks = allocNursery(node, nursery->blocks,
                                           blocks-nursery_blocks);
        }
        else
        {
            bdescr *next_bd;

            debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
                       blocks);

            bd = nursery->blocks;
            while (nursery_blocks > blocks) {
                next_bd = bd->link;
                next_bd->u.back = NULL;
                nursery_blocks -= bd->blocks; // might be a large block
                freeGroup(bd);
                bd = next_bd;
            }
            nursery->blocks = bd;
            // might have gone just under, by freeing a large block, so make
            // up the difference.
            if (nursery_blocks < blocks) {
                nursery->blocks = allocNursery(node, nursery->blocks,
                                               blocks-nursery_blocks);
            }
        }
        nursery->n_blocks = blocks;
        ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
815
    }
816
817
}

818
819
820
void
resizeNurseriesFixed (void)
{
821
    uint32_t blocks;
822
823
824
825
826
827
828
829
830
831

    if (RtsFlags.GcFlags.nurseryChunkSize) {
        blocks = RtsFlags.GcFlags.nurseryChunkSize;
    } else {
        blocks = RtsFlags.GcFlags.minAllocAreaSize;
    }

    resizeNurseriesEach(blocks);
}

Simon Marlow's avatar
Simon Marlow committed
832
//
833
834
835
// Resize the nurseries to the total specified size.
//
void
Simon Marlow's avatar
Simon Marlow committed
836
resizeNurseries (W_ blocks)
837
838
839
{
    // If there are multiple nurseries, then we just divide the number
    // of available blocks between them.
840
    resizeNurseriesEach(blocks / n_nurseries);
841
842
}

Ben Gamari's avatar
Ben Gamari committed
843
bool
844
845
getNewNursery (Capability *cap)
{
846
    StgWord i;
Simon Marlow's avatar
Simon Marlow committed
847
848
    uint32_t node = cap->node;
    uint32_t n;
849
850

    for(;;) {
Simon Marlow's avatar
Simon Marlow committed
851
852
        i = next_nursery[node];
        if (i < n_nurseries) {
Simon Marlow's avatar
Simon Marlow committed
853
            if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) {
Simon Marlow's avatar
Simon Marlow committed
854
                assignNurseryToCapability(cap, i);
Ben Gamari's avatar
Ben Gamari committed
855
                return true;
Simon Marlow's avatar
Simon Marlow committed
856
            }
Simon Marlow's avatar
Simon Marlow committed
857
        } else if (n_numa_nodes > 1) {
Simon Marlow's avatar
Simon Marlow committed
858
859
860
            // Try to find an unused nursery chunk on other nodes.  We'll get
            // remote memory, but the rationale is that avoiding GC is better
            // than avoiding remote memory access.
Ben Gamari's avatar
Ben Gamari committed
861
            bool lost = false;
Simon Marlow's avatar
Simon Marlow committed
862
            for (n = 0; n < n_numa_nodes; n++) {
Simon Marlow's avatar
Simon Marlow committed
863
864
865
                if (n == node) continue;
                i = next_nursery[n];
                if (i < n_nurseries) {
Simon Marlow's avatar
Simon Marlow committed
866
                    if (cas(&next_nursery[n], i, i+n_numa_nodes) == i) {
Simon Marlow's avatar
Simon Marlow committed
867
                        assignNurseryToCapability(cap, i);
Ben Gamari's avatar
Ben Gamari committed
868
                        return true;
Simon Marlow's avatar
Simon Marlow committed
869
                    } else {
Ben Gamari's avatar
Ben Gamari committed
870
                        lost = true; /* lost a race */
Simon Marlow's avatar
Simon Marlow committed
871
872
873
                    }
                }
            }
Ben Gamari's avatar
Ben Gamari committed
874
            if (!lost) return false;
Simon Marlow's avatar
Simon Marlow committed
875
        } else {
Ben Gamari's avatar
Ben Gamari committed
876
            return false;
877
        }
878
879
    }
}
Simon Marlow's avatar
Simon Marlow committed
880

881
/* -----------------------------------------------------------------------------
882
   move_STACK is called to update the TSO structure after it has been
883
884
885
886
   moved from one place to another.
   -------------------------------------------------------------------------- */

void
887
move_STACK (StgStack *src, StgStack *dest)
888
889
890
{
    ptrdiff_t diff;

Simon Marlow's avatar
Simon Marlow committed
891
892
    // relocate the stack pointer...
    diff = (StgPtr)dest - (StgPtr)src; // In *words*
893
894
895
    dest->sp = (StgPtr)dest->sp + diff;
}

896
897
898
899
900
901
902
903
904
905
906
907
908
909
STATIC_INLINE void
accountAllocation(Capability *cap, W_ n)
{
    TICK_ALLOC_HEAP_NOCTR(WDS(n));
    CCS_ALLOC(cap->r.rCCCS,n);
    if (cap->r.rCurrentTSO != NULL) {
        // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
        ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
                     (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
                      - n*sizeof(W_)));
    }

}

910
/* -----------------------------------------------------------------------------
911
912
913
914
915
916
917
918
919
920
921
922
923
   StgPtr allocate (Capability *cap, W_ n)

   Allocates an area of memory n *words* large, from the nursery of
   the supplied Capability, or from the global block pool if the area
   requested is larger than LARGE_OBJECT_THRESHOLD.  Memory is not
   allocated from the current nursery block, so as not to interfere
   with Hp/HpLim.

   The address of the allocated memory is returned. allocate() never
   fails; if it returns, the returned value is a valid address.  If
   the nursery is already full, then another block is allocated from
   the global block pool.  If we need to get memory from the OS and
   that operation fails, then the whole process will be killed.
924
925
   -------------------------------------------------------------------------- */

926
927
928
929
/*
 * Allocate some n words of heap memory; terminating
 * on heap overflow
 */
930
931
StgPtr
allocate (Capability *cap, W_ n)
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
{
    StgPtr p = allocateMightFail(cap, n);
    if (p == NULL) {
        reportHeapOverflow();
        // heapOverflow() doesn't exit (see #2592), but we aren't
        // in a position to do a clean shutdown here: we
        // either have to allocate the memory or exit now.
        // Allocating the memory would be bad, because the user
        // has requested that we not exceed maxHeapSize, so we
        // just exit.
        stg_exit(EXIT_HEAPOVERFLOW);
    }
    return p;
}

/*
 * Allocate some n words of heap memory; returning NULL
 * on heap overflow
 */
StgPtr
allocateMightFail (Capability *cap, W_ n)
953
954
955
956
{
    bdescr *bd;
    StgPtr p;

957
    if (RTS_UNLIKELY(n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))) {
rwbarton's avatar
rwbarton committed
958
        // The largest number of words such that
959
        // the computation of req_blocks will not overflow.
rwbarton's avatar
rwbarton committed
960
        W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
961
962
        W_ req_blocks;

rwbarton's avatar
rwbarton committed
963
        if (n > max_words)
964
965
966
            req_blocks = HS_WORD_MAX; // signal overflow below
        else
            req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
967
968
969

        // Attempting to allocate an object larger than maxHeapSize
        // should definitely be disallowed.  (bug #1791)
970
971
972
973
974
        if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
             req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
            req_blocks >= HS_INT32_MAX)   // avoid overflow when
                                          // calling allocGroup() below
        {
975
            return NULL;
976
977
        }

978
979
980
        // Only credit allocation after we've passed the size check above
        accountAllocation(cap, n);

981
        ACQUIRE_SM_LOCK
Simon Marlow's avatar
Simon Marlow committed
982
        bd = allocGroupOnNode(cap->node,req_blocks);
Gabor Greif's avatar
Gabor Greif committed
983
984
        dbl_link_onto(bd, &g0->large_objects);
        g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
985
        g0->n_new_large_words += n;
986
        RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
987
        initBdescr(bd, g0, g0);
Gabor Greif's avatar
Gabor Greif committed
988
989
        bd->flags = BF_LARGE;
        bd->free = bd->start + n;
990
        cap->total_allocated += n;
Gabor Greif's avatar
Gabor Greif committed
991
        return bd->start;
992
    }
993

994
    /* small allocation (<LARGE_OBJECT_THRESHOLD) */
995

996
    accountAllocation(cap, n);
997
    bd = cap->r.rCurrentAlloc;
998
    if (RTS_UNLIKELY(bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W)) {
Simon Marlow's avatar
Simon Marlow committed
999

Simon Marlow's avatar
Simon Marlow committed
1000
1001
        if (bd) finishedNurseryBlock(cap,bd);

1002
1003
1004
1005
        // The CurrentAlloc block is full, we need to find another
        // one.  First, we try taking the next block from the
        // nursery:
        bd = cap->r.rCurrentNursery->link;
Simon Marlow's avatar
Simon Marlow committed
1006

Simon Marlow's avatar
Simon Marlow committed
1007
1008
1009
        if (bd == NULL) {
            // The nursery is empty: allocate a fresh block (we can't
            // fail here).
1010
            ACQUIRE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
1011
            bd = allocBlockOnNode(cap->node);
1012
1013
            cap->r.rNursery->n_blocks++;
            RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
1014
            initBdescr(bd, g0, g0);
1015
            bd->flags = 0;
1016
1017
            // If we had to allocate a new block, then we'll GC
            // pretty quickly now, because MAYBE_GC() will
1018
            // notice that CurrentNursery->link is NULL.
1019
        } else {
Simon Marlow's avatar
Simon Marlow committed
1020
            newNurseryBlock(bd);
1021
1022
1023
            // we have a block in the nursery: take it and put
            // it at the *front* of the nursery list, and use it
            // to allocate() from.
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037