Storage.c 35.1 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
Gabor Greif's avatar
Gabor Greif committed
3
 * (c) The GHC Team, 1998-2012
4
5
6
 *
 * Storage manager front end
 *
7
8
9
10
11
 * Documentation on the architecture of the Storage Manager can be
 * found in the online commentary:
 * 
 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
 *
12
13
 * ---------------------------------------------------------------------------*/

14
#include "PosixSource.h"
15
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
16
17

#include "Storage.h"
Simon Marlow's avatar
Simon Marlow committed
18
#include "GCThread.h"
19
20
21
22
#include "RtsUtils.h"
#include "Stats.h"
#include "BlockAlloc.h"
#include "Weak.h"
23
#include "Sanity.h"
24
#include "Arena.h"
25
#include "Capability.h"
26
#include "Schedule.h"
Gabor Greif's avatar
Gabor Greif committed
27
#include "RetainerProfile.h"        // for counting memory blocks (memInventory)
28
#include "OSMem.h"
Simon Marlow's avatar
Simon Marlow committed
29
#include "Trace.h"
30
#include "GC.h"
31
#include "Evac.h"
32

33
34
#include <string.h>

35
36
#include "ffi.h"

37
/* 
38
 * All these globals require sm_mutex to access in THREADED_RTS mode.
39
 */
40
StgClosure    *caf_list         = NULL;
41
42
StgClosure    *revertible_caf_list = NULL;
rtsBool       keepCAFs;
43

44
45
nat large_alloc_lim;    /* GC if n_large_blocks in any nursery
                         * reaches this. */
46

Simon Marlow's avatar
Simon Marlow committed
47
bdescr *exec_block;
48

Gabor Greif's avatar
Gabor Greif committed
49
50
generation *generations = NULL; /* all the generations */
generation *g0          = NULL; /* generation 0, for convenience */
51
generation *oldest_gen  = NULL; /* oldest generation, for convenience */
52

Simon Marlow's avatar
Simon Marlow committed
53
nursery *nurseries = NULL;     /* array of nurseries, size == n_capabilities */
54

55
#ifdef THREADED_RTS
56
57
58
59
/*
 * Storage manager mutex:  protects all the above state from
 * simultaneous access by two STG threads.
 */
60
Mutex sm_mutex;
61
62
#endif

63
static void allocNurseries (nat from, nat to);
64

65
static void
Simon Marlow's avatar
Simon Marlow committed
66
initGeneration (generation *gen, int g)
67
{
Simon Marlow's avatar
Simon Marlow committed
68
69
70
71
72
73
74
75
76
77
78
79
80
    gen->no = g;
    gen->collections = 0;
    gen->par_collections = 0;
    gen->failed_promotions = 0;
    gen->max_blocks = 0;
    gen->blocks = NULL;
    gen->n_blocks = 0;
    gen->n_words = 0;
    gen->live_estimate = 0;
    gen->old_blocks = NULL;
    gen->n_old_blocks = 0;
    gen->large_objects = NULL;
    gen->n_large_blocks = 0;
81
    gen->n_new_large_words = 0;
Simon Marlow's avatar
Simon Marlow committed
82
83
84
85
86
    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;
    gen->mark = 0;
    gen->compact = 0;
    gen->bitmap = NULL;
87
#ifdef THREADED_RTS
Simon Marlow's avatar
Simon Marlow committed
88
    initSpinLock(&gen->sync);
89
#endif
Simon Marlow's avatar
Simon Marlow committed
90
91
    gen->threads = END_TSO_QUEUE;
    gen->old_threads = END_TSO_QUEUE;
92
93
}

94
void
Gabor Greif's avatar
Gabor Greif committed
95
initStorage (void)
96
{
97
  nat g;
98

99
100
101
102
103
  if (generations != NULL) {
      // multi-init protection
      return;
  }

104
105
  initMBlocks();

sof's avatar
sof committed
106
107
108
  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
   * doing something reasonable.
   */
Ian Lynagh's avatar
Ian Lynagh committed
109
  /* We use the NOT_NULL variant or gcc warns that the test is always true */
110
  ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
sof's avatar
sof committed
111
112
113
  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
  
114
115
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.heapSizeSuggestion > 
116
      RtsFlags.GcFlags.maxHeapSize) {
Gabor Greif's avatar
Gabor Greif committed
117
      RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
118
119
  }

120
121
122
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.minAllocAreaSize > 
      RtsFlags.GcFlags.maxHeapSize) {
123
      errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
124
      RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
125
126
  }

127
128
  initBlockAllocator();
  
129
#if defined(THREADED_RTS)
sof's avatar
sof committed
130
  initMutex(&sm_mutex);
sof's avatar
sof committed
131
132
#endif

133
134
  ACQUIRE_SM_LOCK;

135
136
  /* allocate generation info array */
  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations 
Gabor Greif's avatar
Gabor Greif committed
137
138
                                             * sizeof(struct generation_),
                                             "initStorage: gens");
139

140
  /* Initialise all generations */
141
  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
142
      initGeneration(&generations[g], g);
143
144
  }

145
146
147
148
149
150
  /* A couple of convenience pointers */
  g0 = &generations[0];
  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];

  /* Set up the destination pointers in each younger gen. step */
  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
Simon Marlow's avatar
Simon Marlow committed
151
      generations[g].to = &generations[g+1];
152
  }
Simon Marlow's avatar
Simon Marlow committed
153
  oldest_gen->to = oldest_gen;
154
155
  
  /* The oldest generation has one step. */
156
  if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
157
      if (RtsFlags.GcFlags.generations == 1) {
Gabor Greif's avatar
Gabor Greif committed
158
          errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
159
      } else {
Gabor Greif's avatar
Gabor Greif committed
160
          oldest_gen->mark = 1;
161
          if (RtsFlags.GcFlags.compact)
Simon Marlow's avatar
Simon Marlow committed
162
              oldest_gen->compact = 1;
163
      }
164
  }
165

166
167
  generations[0].max_blocks = 0;

168
  weak_ptr_list = NULL;
Simon Marlow's avatar
Simon Marlow committed
169
170
  caf_list = END_OF_STATIC_LIST;
  revertible_caf_list = END_OF_STATIC_LIST;
171
172
   
  /* initialise the allocate() interface */
173
  large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
174

175
176
  exec_block = NULL;

177
178
#ifdef THREADED_RTS
  initSpinLock(&gc_alloc_block_sync);
179
  whitehole_spin = 0;
180
181
#endif

182
183
  N = 0;

184
  storageAddCapabilities(0, n_capabilities);
185

186
  IF_DEBUG(gc, statDescribeGens());
187
188

  RELEASE_SM_LOCK;
189
190
191
192
193
194
195

  traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
                     RtsFlags.GcFlags.generations,
                     RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE_W * sizeof(W_),
                     RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W * sizeof(W_),
                     MBLOCK_SIZE_W * sizeof(W_),
                     BLOCK_SIZE_W  * sizeof(W_));
196
197
}

198
199
void storageAddCapabilities (nat from, nat to)
{
200
    nat n, g, i;
201
202
203
204
205
206

    if (from > 0) {
        nurseries = stgReallocBytes(nurseries, to * sizeof(struct nursery_),
                                    "storageAddCapabilities");
    } else {
        nurseries = stgMallocBytes(to * sizeof(struct nursery_),
Gabor Greif's avatar
Gabor Greif committed
207
                                   "storageAddCapabilities");
208
209
    }

210
211
212
213
214
215
    // we've moved the nurseries, so we have to update the rNursery
    // pointers from the Capabilities.
    for (i = 0; i < to; i++) {
        capabilities[i].r.rNursery = &nurseries[i];
    }

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
    /* The allocation area.  Policy: keep the allocation area
     * small to begin with, even if we have a large suggested heap
     * size.  Reason: we're going to do a major collection first, and we
     * don't want it to be a big one.  This vague idea is borne out by
     * rigorous experimental evidence.
     */
    allocNurseries(from, to);

    // allocate a block for each mut list
    for (n = from; n < to; n++) {
        for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
            capabilities[n].mut_lists[g] = allocBlock();
        }
    }

    initGcThreads(from, to);
}


235
236
237
void
exitStorage (void)
{
238
239
    lnat allocated = updateNurseriesStats();
    stat_exit(allocated);
Simon Marlow's avatar
Simon Marlow committed
240
241
242
}

void
243
freeStorage (rtsBool free_heap)
Simon Marlow's avatar
Simon Marlow committed
244
{
245
    stgFree(generations);
246
    if (free_heap) freeAllMBlocks();
247
248
249
#if defined(THREADED_RTS)
    closeMutex(&sm_mutex);
#endif
250
    stgFree(nurseries);
251
    freeGcThreads();
252
253
}

254
255
/* -----------------------------------------------------------------------------
   CAF management.
256
257
258

   The entry code for every CAF does the following:
     
Simon Marlow's avatar
Simon Marlow committed
259
260
261
262
263
264
265
266
267
268
      - builds a CAF_BLACKHOLE in the heap

      - calls newCaf, which atomically updates the CAF with
        IND_STATIC pointing to the CAF_BLACKHOLE

      - if newCaf returns zero, it re-enters the CAF (see Note [atomic
        CAF entry])

      - pushes an update frame pointing to the CAF_BLACKHOLE

269
   Why do we build an BLACKHOLE in the heap rather than just updating
270
   the thunk directly?  It's so that we only need one kind of update
Simon Marlow's avatar
Simon Marlow committed
271
272
273
   frame - otherwise we'd need a static version of the update frame
   too, and various other parts of the RTS that deal with update
   frames would also need special cases for static update frames.
274
275
276

   newCaf() does the following:
       
Simon Marlow's avatar
Simon Marlow committed
277
278
279
      - it updates the CAF with an IND_STATIC pointing to the
        CAF_BLACKHOLE, atomically.

280
281
      - it puts the CAF on the oldest generation's mutable list.
        This is so that we treat the CAF as a root when collecting
Gabor Greif's avatar
Gabor Greif committed
282
        younger generations.
283

Simon Marlow's avatar
Simon Marlow committed
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
   ------------------
   Note [atomic CAF entry]

   With THREADED_RTS, newCaf() is required to be atomic (see
   #5558). This is because if two threads happened to enter the same
   CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
   and so the normal threadPaused() machinery for detecting duplicate
   evaluation will not detect this.  Hence in lockCAF() below, we
   atomically lock the CAF with WHITEHOLE before updating it with
   IND_STATIC, and return zero if another thread locked the CAF first.
   In the event that we lost the race, CAF entry code will re-enter
   the CAF and block on the other thread's CAF_BLACKHOLE.

   ------------------
   Note [GHCi CAFs]

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
   For GHCI, we have additional requirements when dealing with CAFs:

      - we must *retain* all dynamically-loaded CAFs ever entered,
        just in case we need them again.
      - we must be able to *revert* CAFs that have been evaluated, to
        their pre-evaluated form.

      To do this, we use an additional CAF list.  When newCaf() is
      called on a dynamically-loaded CAF, we add it to the CAF list
      instead of the old-generation mutable list, and save away its
      old info pointer (in caf->saved_info) for later reversion.

      To revert all the CAFs, we traverse the CAF list and reset the
      info pointer to caf->saved_info, then throw away the CAF list.
      (see GC.c:revertCAFs()).

      -- SDM 29/1/01

318
319
   -------------------------------------------------------------------------- */

Simon Marlow's avatar
Simon Marlow committed
320
STATIC_INLINE StgWord lockCAF (StgClosure *caf, StgClosure *bh)
321
{
Simon Marlow's avatar
Simon Marlow committed
322
323
324
325
326
327
328
329
330
331
332
    const StgInfoTable *orig_info;

    orig_info = caf->header.info;

#ifdef THREADED_RTS
    const StgInfoTable *cur_info;

    if (orig_info == &stg_IND_STATIC_info ||
        orig_info == &stg_WHITEHOLE_info) {
        // already claimed by another thread; re-enter the CAF
        return 0;
Simon Marlow's avatar
Simon Marlow committed
333
    }
Simon Marlow's avatar
Simon Marlow committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389

    cur_info = (const StgInfoTable *)
        cas((StgVolatilePtr)&caf->header.info,
            (StgWord)orig_info,
            (StgWord)&stg_WHITEHOLE_info);

    if (cur_info != orig_info) {
        // already claimed by another thread; re-enter the CAF
        return 0;
    }

    // successfully claimed by us; overwrite with IND_STATIC
#endif

    // For the benefit of revertCAFs(), save the original info pointer
    ((StgIndStatic *)caf)->saved_info  = orig_info;

    ((StgIndStatic*)caf)->indirectee = bh;
    write_barrier();
    SET_INFO(caf,&stg_IND_STATIC_info);

    return 1;
}

StgWord
newCAF(StgRegTable *reg, StgClosure *caf, StgClosure *bh)
{
    if (lockCAF(caf,bh) == 0) return 0;

    if(keepCAFs)
    {
        // HACK:
        // If we are in GHCi _and_ we are using dynamic libraries,
        // then we can't redirect newCAF calls to newDynCAF (see below),
        // so we make newCAF behave almost like newDynCAF.
        // The dynamic libraries might be used by both the interpreted
        // program and GHCi itself, so they must not be reverted.
        // This also means that in GHCi with dynamic libraries, CAFs are not
        // garbage collected. If this turns out to be a problem, we could
        // do another hack here and do an address range test on caf to figure
        // out whether it is from a dynamic library.

        ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex
        ((StgIndStatic *)caf)->static_link = caf_list;
        caf_list = caf;
        RELEASE_SM_LOCK;
    }
    else
    {
        // Put this CAF on the mutable list for the old generation.
        ((StgIndStatic *)caf)->saved_info = NULL;
        if (oldest_gen->no != 0) {
            recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no);
        }
    }
    return 1;
390
391
}

392
393
394
395
396
397
398
// External API for setting the keepCAFs flag. see #3900.
void
setKeepCAFs (void)
{
    keepCAFs = 1;
}

399
400
401
402
// An alternate version of newCaf which is used for dynamically loaded
// object code in GHCi.  In this case we want to retain *all* CAFs in
// the object code, because they might be demanded at any time from an
// expression evaluated on the command line.
403
404
// Also, GHCi might want to revert CAFs, so we add these to the
// revertible_caf_list.
405
406
407
//
// The linker hackily arranges that references to newCaf from dynamic
// code end up pointing to newDynCAF.
Simon Marlow's avatar
Simon Marlow committed
408
409
StgWord
newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf, StgClosure *bh)
410
{
Simon Marlow's avatar
Simon Marlow committed
411
412
    if (lockCAF(caf,bh) == 0) return 0;

413
414
    ACQUIRE_SM_LOCK;

415
416
    ((StgIndStatic *)caf)->static_link = revertible_caf_list;
    revertible_caf_list = caf;
417
418

    RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
419
420

    return 1;
421
422
}

423
424
425
426
/* -----------------------------------------------------------------------------
   Nursery management.
   -------------------------------------------------------------------------- */

427
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
428
allocNursery (bdescr *tail, nat blocks)
429
{
Simon Marlow's avatar
Simon Marlow committed
430
    bdescr *bd = NULL;
431
432
433
434
435
436
437
438
439
    nat i, n;

    // We allocate the nursery as a single contiguous block and then
    // divide it into single blocks manually.  This way we guarantee
    // that the nursery blocks are adjacent, so that the processor's
    // automatic prefetching works across nursery blocks.  This is a
    // tiny optimisation (~0.5%), but it's free.

    while (blocks > 0) {
440
441
442
443
444
445
446
447
        if (blocks >= BLOCKS_PER_MBLOCK) {
            bd = allocLargeChunk(); // see comment with allocLargeChunk()
            n = bd->blocks;
        } else {
            bd = allocGroup(blocks);
            n = blocks;
        }

448
449
450
451
452
453
454
455
456
457
        blocks -= n;

        for (i = 0; i < n; i++) {
            initBdescr(&bd[i], g0, g0);

            bd[i].blocks = 1;
            bd[i].flags = 0;

            if (i > 0) {
                bd[i].u.back = &bd[i-1];
458
459
            } else {
                bd[i].u.back = NULL;
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
            }

            if (i+1 < n) {
                bd[i].link = &bd[i+1];
            } else {
                bd[i].link = tail;
                if (tail != NULL) {
                    tail->u.back = &bd[i];
                }
            }

            bd[i].free = bd[i].start;
        }

        tail = &bd[0];
475
    }
476
477

    return &bd[0];
478
479
480
}

static void
481
assignNurseriesToCapabilities (nat from, nat to)
482
483
{
    nat i;
sof's avatar
sof committed
484

485
    for (i = from; i < to; i++) {
486
        capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
Gabor Greif's avatar
Gabor Greif committed
487
        capabilities[i].r.rCurrentAlloc   = NULL;
488
    }
489
}
490

Simon Marlow's avatar
Simon Marlow committed
491
static void
492
allocNurseries (nat from, nat to)
493
494
495
{ 
    nat i;

496
497
    for (i = from; i < to; i++) {
        nurseries[i].blocks =
Simon Marlow's avatar
Simon Marlow committed
498
            allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
Gabor Greif's avatar
Gabor Greif committed
499
        nurseries[i].n_blocks =
Simon Marlow's avatar
Simon Marlow committed
500
            RtsFlags.GcFlags.minAllocAreaSize;
501
    }
502
    assignNurseriesToCapabilities(from, to);
503
}
504
      
505
506
lnat
clearNursery (Capability *cap)
507
{
508
    bdescr *bd;
509
    lnat allocated = 0;
510

511
512
513
514
515
516
517
    for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) {
        allocated            += (lnat)(bd->free - bd->start);
        cap->total_allocated += (lnat)(bd->free - bd->start);
        bd->free = bd->start;
        ASSERT(bd->gen_no == 0);
        ASSERT(bd->gen == g0);
        IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
518
    }
519
520
521
522
523
524
525

    return allocated;
}

void
resetNurseries (void)
{
526
    assignNurseriesToCapabilities(0, n_capabilities);
527
528
}

529
530
lnat
countNurseryBlocks (void)
531
{
532
533
    nat i;
    lnat blocks = 0;
534

Simon Marlow's avatar
Simon Marlow committed
535
    for (i = 0; i < n_capabilities; i++) {
Gabor Greif's avatar
Gabor Greif committed
536
        blocks += nurseries[i].n_blocks;
537
    }
538
    return blocks;
539
540
}

541
static void
Gabor Greif's avatar
Gabor Greif committed
542
resizeNursery (nursery *nursery, nat blocks)
543
544
{
  bdescr *bd;
545
  nat nursery_blocks;
546

Simon Marlow's avatar
Simon Marlow committed
547
  nursery_blocks = nursery->n_blocks;
548
  if (nursery_blocks == blocks) return;
549

550
  if (nursery_blocks < blocks) {
Simon Marlow's avatar
Simon Marlow committed
551
      debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", 
Gabor Greif's avatar
Gabor Greif committed
552
                 blocks);
Simon Marlow's avatar
Simon Marlow committed
553
    nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
554
555
556
557
  } 
  else {
    bdescr *next_bd;
    
Simon Marlow's avatar
Simon Marlow committed
558
    debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", 
Gabor Greif's avatar
Gabor Greif committed
559
               blocks);
560

Simon Marlow's avatar
Simon Marlow committed
561
    bd = nursery->blocks;
562
    while (nursery_blocks > blocks) {
Gabor Greif's avatar
Gabor Greif committed
563
564
565
566
567
        next_bd = bd->link;
        next_bd->u.back = NULL;
        nursery_blocks -= bd->blocks; // might be a large block
        freeGroup(bd);
        bd = next_bd;
568
    }
Simon Marlow's avatar
Simon Marlow committed
569
    nursery->blocks = bd;
570
571
572
    // might have gone just under, by freeing a large block, so make
    // up the difference.
    if (nursery_blocks < blocks) {
Gabor Greif's avatar
Gabor Greif committed
573
        nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
574
    }
575
576
  }
  
Simon Marlow's avatar
Simon Marlow committed
577
578
  nursery->n_blocks = blocks;
  ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
579
}
580

581
582
583
584
// 
// Resize each of the nurseries to the specified size.
//
void
585
resizeNurseriesFixed (nat blocks)
586
587
{
    nat i;
Simon Marlow's avatar
Simon Marlow committed
588
    for (i = 0; i < n_capabilities; i++) {
Gabor Greif's avatar
Gabor Greif committed
589
        resizeNursery(&nurseries[i], blocks);
590
    }
591
592
}

593
594
595
596
597
598
599
600
// 
// Resize the nurseries to the total specified size.
//
void
resizeNurseries (nat blocks)
{
    // If there are multiple nurseries, then we just divide the number
    // of available blocks between them.
Simon Marlow's avatar
Simon Marlow committed
601
    resizeNurseriesFixed(blocks / n_capabilities);
602
603
}

604
605

/* -----------------------------------------------------------------------------
606
   move_STACK is called to update the TSO structure after it has been
607
608
609
610
   moved from one place to another.
   -------------------------------------------------------------------------- */

void
611
move_STACK (StgStack *src, StgStack *dest)
612
613
614
615
616
617
618
619
{
    ptrdiff_t diff;

    // relocate the stack pointer... 
    diff = (StgPtr)dest - (StgPtr)src; // In *words* 
    dest->sp = (StgPtr)dest->sp + diff;
}

620
/* -----------------------------------------------------------------------------
621
   allocate()
622
623
624
625
626
627
628
629
630
631
632
633

   This allocates memory in the current thread - it is intended for
   use primarily from STG-land where we have a Capability.  It is
   better than allocate() because it doesn't require taking the
   sm_mutex lock in the common case.

   Memory is allocated directly from the nursery if possible (but not
   from the current nursery block, so as not to interfere with
   Hp/HpLim).
   -------------------------------------------------------------------------- */

StgPtr
634
allocate (Capability *cap, lnat n)
635
636
637
638
{
    bdescr *bd;
    StgPtr p;

639
    TICK_ALLOC_HEAP_NOCTR(n);
640
    CCS_ALLOC(cap->r.rCCCS,n);
641
    
642
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
Gabor Greif's avatar
Gabor Greif committed
643
        lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
644
645
646

        // Attempting to allocate an object larger than maxHeapSize
        // should definitely be disallowed.  (bug #1791)
647
648
649
650
651
        if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
             req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
            req_blocks >= HS_INT32_MAX)   // avoid overflow when
                                          // calling allocGroup() below
        {
652
653
654
655
656
657
658
            heapOverflow();
            // heapOverflow() doesn't exit (see #2592), but we aren't
            // in a position to do a clean shutdown here: we
            // either have to allocate the memory or exit now.
            // Allocating the memory would be bad, because the user
            // has requested that we not exceed maxHeapSize, so we
            // just exit.
Gabor Greif's avatar
Gabor Greif committed
659
            stg_exit(EXIT_HEAPOVERFLOW);
660
661
        }

662
        ACQUIRE_SM_LOCK
Gabor Greif's avatar
Gabor Greif committed
663
664
665
        bd = allocGroup(req_blocks);
        dbl_link_onto(bd, &g0->large_objects);
        g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
666
        g0->n_new_large_words += n;
667
        RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
668
        initBdescr(bd, g0, g0);
Gabor Greif's avatar
Gabor Greif committed
669
670
        bd->flags = BF_LARGE;
        bd->free = bd->start + n;
671
        cap->total_allocated += n;
Gabor Greif's avatar
Gabor Greif committed
672
        return bd->start;
673
    }
674

675
    /* small allocation (<LARGE_OBJECT_THRESHOLD) */
676

677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
    bd = cap->r.rCurrentAlloc;
    if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
        
        // The CurrentAlloc block is full, we need to find another
        // one.  First, we try taking the next block from the
        // nursery:
        bd = cap->r.rCurrentNursery->link;
        
        if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
            // The nursery is empty, or the next block is already
            // full: allocate a fresh block (we can't fail here).
            ACQUIRE_SM_LOCK;
            bd = allocBlock();
            cap->r.rNursery->n_blocks++;
            RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
692
            initBdescr(bd, g0, g0);
693
            bd->flags = 0;
694
695
            // If we had to allocate a new block, then we'll GC
            // pretty quickly now, because MAYBE_GC() will
696
            // notice that CurrentNursery->link is NULL.
697
698
699
700
701
702
703
704
705
706
707
708
        } else {
            // we have a block in the nursery: take it and put
            // it at the *front* of the nursery list, and use it
            // to allocate() from.
            cap->r.rCurrentNursery->link = bd->link;
            if (bd->link != NULL) {
                bd->link->u.back = cap->r.rCurrentNursery;
            }
        }
        dbl_link_onto(bd, &cap->r.rNursery->blocks);
        cap->r.rCurrentAlloc = bd;
        IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
709
710
711
    }
    p = bd->free;
    bd->free += n;
Simon Marlow's avatar
Simon Marlow committed
712
713

    IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
714
715
716
    return p;
}

717
718
719
720
721
/* ---------------------------------------------------------------------------
   Allocate a fixed/pinned object.

   We allocate small pinned objects into a single block, allocating a
   new block when the current one overflows.  The block is chained
Simon Marlow's avatar
Simon Marlow committed
722
   onto the large_object_list of generation 0.
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740

   NOTE: The GC can't in general handle pinned objects.  This
   interface is only safe to use for ByteArrays, which have no
   pointers and don't require scavenging.  It works because the
   block's descriptor has the BF_LARGE flag set, so the block is
   treated as a large object and chained onto various lists, rather
   than the individual objects being copied.  However, when it comes
   to scavenge the block, the GC will only scavenge the first object.
   The reason is that the GC can't linearly scan a block of pinned
   objects at the moment (doing so would require using the
   mostly-copying techniques).  But since we're restricting ourselves
   to pinned ByteArrays, not scavenging is ok.

   This function is called by newPinnedByteArray# which immediately
   fills the allocated memory with a MutableByteArray#.
   ------------------------------------------------------------------------- */

StgPtr
741
allocatePinned (Capability *cap, lnat n)
742
743
{
    StgPtr p;
744
    bdescr *bd;
745
746
747
748

    // If the request is for a large object, then allocate()
    // will give us a pinned object anyway.
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
Gabor Greif's avatar
Gabor Greif committed
749
        p = allocate(cap, n);
Simon Marlow's avatar
Simon Marlow committed
750
751
        Bdescr(p)->flags |= BF_PINNED;
        return p;
752
753
    }

sof's avatar
sof committed
754
    TICK_ALLOC_HEAP_NOCTR(n);
755
    CCS_ALLOC(cap->r.rCCCS,n);
sof's avatar
sof committed
756

757
758
    bd = cap->pinned_object_block;
    
759
    // If we don't have a block of pinned objects yet, or the current
760
    // one isn't large enough to hold the new object, get a new one.
761
    if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801

        // stash the old block on cap->pinned_object_blocks.  On the
        // next GC cycle these objects will be moved to
        // g0->large_objects.
        if (bd != NULL) {
            dbl_link_onto(bd, &cap->pinned_object_blocks);
        }

        // We need to find another block.  We could just allocate one,
        // but that means taking a global lock and we really want to
        // avoid that (benchmarks that allocate a lot of pinned
        // objects scale really badly if we do this).
        //
        // So first, we try taking the next block from the nursery, in
        // the same way as allocate(), but note that we can only take
        // an *empty* block, because we're about to mark it as
        // BF_PINNED | BF_LARGE.
        bd = cap->r.rCurrentNursery->link;
        if (bd == NULL || bd->free != bd->start) { // must be empty!
            // The nursery is empty, or the next block is non-empty:
            // allocate a fresh block (we can't fail here).

            // XXX in the case when the next nursery block is
            // non-empty we aren't exerting any pressure to GC soon,
            // so if this case ever happens then we could in theory
            // keep allocating for ever without calling the GC. We
            // can't bump g0->n_new_large_words because that will be
            // counted towards allocation, and we're already counting
            // our pinned obects as allocation in
            // collect_pinned_object_blocks in the GC.
            ACQUIRE_SM_LOCK;
            bd = allocBlock();
            RELEASE_SM_LOCK;
            initBdescr(bd, g0, g0);
        } else {
            // we have a block in the nursery: steal it
            cap->r.rCurrentNursery->link = bd->link;
            if (bd->link != NULL) {
                bd->link->u.back = cap->r.rCurrentNursery;
            }
802
            cap->r.rNursery->n_blocks -= bd->blocks;
803
804
805
806
807
        }

        cap->pinned_object_block = bd;
        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED;

808
809
810
811
812
813
814
815
816
817
818
819
820
        // The pinned_object_block remains attached to the capability
        // until it is full, even if a GC occurs.  We want this
        // behaviour because otherwise the unallocated portion of the
        // block would be forever slop, and under certain workloads
        // (allocating a few ByteStrings per GC) we accumulate a lot
        // of slop.
        //
        // So, the pinned_object_block is initially marked
        // BF_EVACUATED so the GC won't touch it.  When it is full,
        // we place it on the large_objects list, and at the start of
        // the next GC the BF_EVACUATED flag will be cleared, and the
        // block will be promoted as usual (if anything in it is
        // live).
821
822
823
824
825
826
827
    }

    p = bd->free;
    bd->free += n;
    return p;
}

828
/* -----------------------------------------------------------------------------
829
830
831
832
   Write Barriers
   -------------------------------------------------------------------------- */

/*
833
834
835
836
   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
   and is put on the mutable list.
837
*/
838
void
839
dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
840
{
841
    Capability *cap = regTableToCapability(reg);
842
    if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
Gabor Greif's avatar
Gabor Greif committed
843
        p->header.info = &stg_MUT_VAR_DIRTY_info;
844
        recordClosureMutated(cap,p);
845
846
847
    }
}

848
849
850
851
852
853
854
855
856
// Setting a TSO's link field with a write barrier.
// It is *not* necessary to call this function when
//    * setting the link field to END_TSO_QUEUE
//    * putting a TSO on the blackhole_queue
//    * setting the link field of the currently running TSO, as it
//      will already be dirty.
void
setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
{
857
858
    if (tso->dirty == 0) {
        tso->dirty = 1;
859
        recordClosureMutated(cap,(StgClosure*)tso);
860
861
862
863
    }
    tso->_link = target;
}

864
865
866
void
setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
{
867
868
    if (tso->dirty == 0) {
        tso->dirty = 1;
869
870
871
872
873
        recordClosureMutated(cap,(StgClosure*)tso);
    }
    tso->block_info.prev = target;
}

874
875
876
void
dirty_TSO (Capability *cap, StgTSO *tso)
{
877
878
    if (tso->dirty == 0) {
        tso->dirty = 1;
879
        recordClosureMutated(cap,(StgClosure*)tso);
880
    }
881
882
883
884
885
886
887
888
889
}

void
dirty_STACK (Capability *cap, StgStack *stack)
{
    if (stack->dirty == 0) {
        stack->dirty = 1;
        recordClosureMutated(cap,(StgClosure*)stack);
    }
890
891
}

892
893
894
895
896
897
898
899
900
901
902
/*
   This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
   on the mutable list; a MVAR_DIRTY is.  When written to, a
   MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
   The check for MVAR_CLEAN is inlined at the call site for speed,
   this really does make a difference on concurrency-heavy benchmarks
   such as Chaneneos and cheap-concurrency.
*/
void
dirty_MVAR(StgRegTable *reg, StgClosure *p)
{
903
    recordClosureMutated(regTableToCapability(reg),p);
904
905
}

906
/* -----------------------------------------------------------------------------
907
908
909
910
 * Stats and stuff
 * -------------------------------------------------------------------------- */

/* -----------------------------------------------------------------------------
911
 * updateNurseriesStats()
912
 *
913
914
915
916
917
918
 * Update the per-cap total_allocated numbers with an approximation of
 * the amount of memory used in each cap's nursery. Also return the
 * total across all caps.
 * 
 * Since this update is also performed by clearNurseries() then we only
 * need this function for the final stats when the RTS is shutting down.
919
920
921
 * -------------------------------------------------------------------------- */

lnat
922
updateNurseriesStats (void)
923
{
924
925
    lnat allocated = 0;
    nat i;
926

927
928
929
930
931
    for (i = 0; i < n_capabilities; i++) {
        int cap_allocated = countOccupied(nurseries[i].blocks);
        capabilities[i].total_allocated += cap_allocated;
        allocated                       += cap_allocated;
    }
932

933
934
935
936
937
938
939
940
    return allocated;
}

lnat
countLargeAllocated (void)
{
    return g0->n_new_large_words;
}
941

Simon Marlow's avatar
Simon Marlow committed
942
lnat countOccupied (bdescr *bd)
943
944
945
946
947
{
    lnat words;

    words = 0;
    for (; bd != NULL; bd = bd->link) {
948
        ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
949
950
951
952
953
        words += bd->free - bd->start;
    }
    return words;
}

Simon Marlow's avatar
Simon Marlow committed
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
lnat genLiveWords (generation *gen)
{
    return gen->n_words + countOccupied(gen->large_objects);
}

lnat genLiveBlocks (generation *gen)
{
    return gen->n_blocks + gen->n_large_blocks;
}

lnat gcThreadLiveWords (nat i, nat g)
{
    lnat words;

    words   = countOccupied(gc_threads[i]->gens[g].todo_bd);
    words  += countOccupied(gc_threads[i]->gens[g].part_list);
    words  += countOccupied(gc_threads[i]->gens[g].scavd_list);

    return words;
}

lnat gcThreadLiveBlocks (nat i, nat g)
{
    lnat blocks;

    blocks  = countBlocks(gc_threads[i]->gens[g].todo_bd);
    blocks += gc_threads[i]->gens[g].n_part_blocks;
    blocks += gc_threads[i]->gens[g].n_scavd_blocks;

    return blocks;
}

Simon Marlow's avatar
Simon Marlow committed
986
987
// Return an accurate count of the live data in the heap, excluding
// generation 0.
Simon Marlow's avatar
Simon Marlow committed
988
lnat calcLiveWords (void)
989
{
Simon Marlow's avatar
Simon Marlow committed
990
    nat g;
991
    lnat live;
Simon Marlow's avatar
Simon Marlow committed
992

993
994
    live = 0;
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
        live += genLiveWords(&generations[g]);
    }
    return live;
}

lnat calcLiveBlocks (void)
{
    nat g;
    lnat live;

    live = 0;
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
        live += genLiveBlocks(&generations[g]);
1008
1009
1010
1011
    }
    return live;
}

1012
1013
1014
1015
1016
/* Determine which generation will be collected next, and approximate
 * the maximum amount of memory that will be required to do the GC,
 * taking into account data that will be copied, and the space needed
 * to store bitmaps and the mark stack.  Note: blocks_needed does not
 * include the blocks in the nursery.
1017
 *
Simon Marlow's avatar
Simon Marlow committed
1018
1019
1020
 * Assume: all data currently live will remain live.  Generationss
 * that will be collected next time will therefore need twice as many
 * blocks since all the data will be copied.
1021
1022
 */
extern lnat 
1023
calcNeeded (rtsBool force_major, lnat *blocks_needed)
1024
{
1025
1026
    lnat needed = 0, blocks;
    nat g, N;
Simon Marlow's avatar
Simon Marlow committed
1027
    generation *gen;
1028
    
1029
1030
1031
1032
1033
1034
    if (force_major) {
        N = RtsFlags.GcFlags.generations - 1;
    } else {
        N = 0;
    }

1035
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
1036
1037
        gen = &generations[g];

1038
1039
1040
        blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
               + gen->n_large_blocks;

Simon Marlow's avatar
Simon Marlow committed
1041
        // we need at least this much space
1042
        needed += blocks;
Simon Marlow's avatar
Simon Marlow committed
1043
        
1044
        // are we collecting this gen?
Simon Marlow's avatar
Simon Marlow committed
1045
        if (g == 0 || // always collect gen 0
1046
1047
1048
1049
            blocks > gen->max_blocks)
        {
            N = stg_max(N,g);

Simon Marlow's avatar
Simon Marlow committed
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
            // we will collect this gen next time
            if (gen->mark) {
                //  bitmap:
                needed += gen->n_blocks / BITS_IN(W_);
                //  mark stack:
                needed += gen->n_blocks / 100;
            }
            if (gen->compact) {
                continue; // no additional space needed for compaction
            } else {
                needed += gen->n_blocks;
            }
        }
1063
    }
1064
1065
1066
1067
1068

    if (blocks_needed != NULL) {
        *blocks_needed = needed;
    }
    return N;
1069
1070
}

1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
/* ----------------------------------------------------------------------------
   Executable memory

   Executable memory must be managed separately from non-executable
   memory.  Most OSs these days require you to jump through hoops to
   dynamically allocate executable memory, due to various security
   measures.

   Here we provide a small memory allocator for executable memory.
   Memory is managed with a page granularity; we allocate linearly
   in the page, and when the page is emptied (all objects on the page
   are free) we free the page again, not forgetting to make it
   non-executable.
1084
1085
1086
1087
1088

   TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
         the linker cannot use allocateExec for loading object code files
         on Windows. Once allocateExec can handle larger objects, the linker
         should be modified to use allocateExec instead of VirtualAlloc.
1089
1090
   ------------------------------------------------------------------------- */

1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
#if defined(linux_HOST_OS)

// On Linux we need to use libffi for allocating executable memory,
// because it knows how to work around the restrictions put in place
// by SELinux.

void *allocateExec (nat bytes, void **exec_ret)
{
    void **ret, **exec;
    ACQUIRE_SM_LOCK;
    ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
    RELEASE_SM_LOCK;
    if (ret == NULL) return ret;
    *ret = ret; // save the address of the writable mapping, for freeExec().
    *exec_ret = exec + 1;
    return (ret + 1);
}

// freeExec gets passed the executable address, not the writable address. 
void freeExec (void *addr)
{
    void *writable;
    writable = *((void**)addr - 1);
    ACQUIRE_SM_LOCK;
    ffi_closure_free (writable);
    RELEASE_SM_LOCK
}
1118

1119
1120
1121
#else

void *allocateExec (nat bytes, void **exec_ret)
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
{
    void *ret;
    nat n;

    ACQUIRE_SM_LOCK;

    // round up to words.
    n  = (bytes + sizeof(W_) + 1) / sizeof(W_);

    if (n+1 > BLOCK_SIZE_W) {
Gabor Greif's avatar
Gabor Greif committed
1132
        barf("allocateExec: can't handle large objects");
1133
1134
1135
    }

    if (exec_block == NULL || 
Gabor Greif's avatar
Gabor Greif committed
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
        exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
        bdescr *bd;
        lnat pagesize = getPageSize();
        bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
        debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
        bd->gen_no = 0;
        bd->flags = BF_EXEC;
        bd->link = exec_block;
        if (exec_block != NULL) {
            exec_block->u.back = bd;
        }
        bd->u.back = NULL;
        setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
        exec_block = bd;
1150
1151
1152
1153
1154
1155
1156
    }
    *(exec_block->free) = n;  // store the size of this chunk
    exec_block->gen_no += n;  // gen_no stores the number of words allocated
    ret = exec_block->free + 1;
    exec_block->free += n + 1;

    RELEASE_SM_LOCK
1157
    *exec_ret = ret;
1158
1159
1160
1161
1162
1163
1164
1165
1166
    return ret;
}

void freeExec (void *addr)
{
    StgPtr p = (StgPtr)addr - 1;
    bdescr *bd = Bdescr((StgPtr)p);

    if ((bd->flags & BF_EXEC) == 0) {
Gabor Greif's avatar
Gabor Greif committed
1167
        barf("freeExec: not executable");
1168
1169
1170
    }

    if (*(StgPtr)p == 0) {
Gabor Greif's avatar
Gabor Greif committed
1171
        barf("freeExec: already free?");
1172
1173
1174
1175
1176
1177
1178
    }

    ACQUIRE_SM_LOCK;

    bd->gen_no -= *(StgPtr)p;
    *(StgPtr)p = 0;

1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
    if (bd->gen_no == 0) {
        // Free the block if it is empty, but not if it is the block at
        // the head of the queue.
        if (bd != exec_block) {
            debugTrace(DEBUG_gc, "free exec block %p", bd->start);
            dbl_link_remove(bd, &exec_block);
            setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
            freeGroup(bd);
        } else {
            bd->free = bd->start;
        }
1190
1191
1192
1193
1194
    }

    RELEASE_SM_LOCK
}    

1195
1196
#endif /* mingw32_HOST_OS */

1197
1198
#ifdef DEBUG

1199
// handy function for use in gdb, because Bdescr() is inlined.
Gabor Greif's avatar
Gabor Greif committed
1200
extern bdescr *_bdescr (StgPtr p);