Storage.c 29.6 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2008
4
5
6
 *
 * Storage manager front end
 *
7
8
9
10
11
 * Documentation on the architecture of the Storage Manager can be
 * found in the online commentary:
 * 
 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
 *
12
13
 * ---------------------------------------------------------------------------*/

14
#include "PosixSource.h"
15
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
16
17

#include "Storage.h"
18
19
20
21
#include "RtsUtils.h"
#include "Stats.h"
#include "BlockAlloc.h"
#include "Weak.h"
22
#include "Sanity.h"
23
#include "Arena.h"
24
#include "Capability.h"
25
#include "Schedule.h"
26
#include "RetainerProfile.h"	// for counting memory blocks (memInventory)
27
#include "OSMem.h"
Simon Marlow's avatar
Simon Marlow committed
28
#include "Trace.h"
29
#include "GC.h"
30
#include "Evac.h"
31

32
33
#include <string.h>

34
35
#include "ffi.h"

36
/* 
37
 * All these globals require sm_mutex to access in THREADED_RTS mode.
38
 */
39
StgClosure    *caf_list         = NULL;
40
41
StgClosure    *revertible_caf_list = NULL;
rtsBool       keepCAFs;
42

43
44
nat alloc_blocks_lim;    /* GC if n_large_blocks in any nursery
                          * reaches this. */
45

Simon Marlow's avatar
Simon Marlow committed
46
bdescr *exec_block;
47

48
49
50
generation *generations = NULL;	/* all the generations */
generation *g0		= NULL; /* generation 0, for convenience */
generation *oldest_gen  = NULL; /* oldest generation, for convenience */
51

Simon Marlow's avatar
Simon Marlow committed
52
nursery *nurseries = NULL;     /* array of nurseries, size == n_capabilities */
53

54
#ifdef THREADED_RTS
55
56
57
58
/*
 * Storage manager mutex:  protects all the above state from
 * simultaneous access by two STG threads.
 */
59
Mutex sm_mutex;
60
61
#endif

Simon Marlow's avatar
Simon Marlow committed
62
static void allocNurseries ( void );
63

64
static void
Simon Marlow's avatar
Simon Marlow committed
65
initGeneration (generation *gen, int g)
66
{
Simon Marlow's avatar
Simon Marlow committed
67
68
69
70
71
72
73
74
75
76
77
78
79
    gen->no = g;
    gen->collections = 0;
    gen->par_collections = 0;
    gen->failed_promotions = 0;
    gen->max_blocks = 0;
    gen->blocks = NULL;
    gen->n_blocks = 0;
    gen->n_words = 0;
    gen->live_estimate = 0;
    gen->old_blocks = NULL;
    gen->n_old_blocks = 0;
    gen->large_objects = NULL;
    gen->n_large_blocks = 0;
80
    gen->n_new_large_blocks = 0;
Simon Marlow's avatar
Simon Marlow committed
81
82
83
84
85
86
    gen->mut_list = allocBlock();
    gen->scavenged_large_objects = NULL;
    gen->n_scavenged_large_blocks = 0;
    gen->mark = 0;
    gen->compact = 0;
    gen->bitmap = NULL;
87
#ifdef THREADED_RTS
Simon Marlow's avatar
Simon Marlow committed
88
    initSpinLock(&gen->sync_large_objects);
89
#endif
Simon Marlow's avatar
Simon Marlow committed
90
91
    gen->threads = END_TSO_QUEUE;
    gen->old_threads = END_TSO_QUEUE;
92
93
}

94
void
95
initStorage( void )
96
{
97
    nat g, n;
98

99
100
101
102
103
  if (generations != NULL) {
      // multi-init protection
      return;
  }

104
105
  initMBlocks();

sof's avatar
sof committed
106
107
108
  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
   * doing something reasonable.
   */
Ian Lynagh's avatar
Ian Lynagh committed
109
  /* We use the NOT_NULL variant or gcc warns that the test is always true */
110
  ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
sof's avatar
sof committed
111
112
113
  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
  
114
115
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.heapSizeSuggestion > 
116
      RtsFlags.GcFlags.maxHeapSize) {
117
    RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
118
119
  }

120
121
122
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.minAllocAreaSize > 
      RtsFlags.GcFlags.maxHeapSize) {
123
      errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
124
      RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
125
126
  }

127
128
  initBlockAllocator();
  
129
#if defined(THREADED_RTS)
sof's avatar
sof committed
130
  initMutex(&sm_mutex);
sof's avatar
sof committed
131
132
#endif

133
134
  ACQUIRE_SM_LOCK;

135
136
  /* allocate generation info array */
  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations 
137
					     * sizeof(struct generation_),
138
139
					     "initStorage: gens");

140
  /* Initialise all generations */
141
  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
142
      initGeneration(&generations[g], g);
143
144
  }

145
146
147
148
  /* A couple of convenience pointers */
  g0 = &generations[0];
  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];

Simon Marlow's avatar
Simon Marlow committed
149
150
  nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_),
                             "initStorage: nurseries");
151
  
152
153
  /* Set up the destination pointers in each younger gen. step */
  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
Simon Marlow's avatar
Simon Marlow committed
154
      generations[g].to = &generations[g+1];
155
  }
Simon Marlow's avatar
Simon Marlow committed
156
  oldest_gen->to = oldest_gen;
157
158
  
  /* The oldest generation has one step. */
159
  if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
160
      if (RtsFlags.GcFlags.generations == 1) {
161
	  errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
162
      } else {
Simon Marlow's avatar
Simon Marlow committed
163
	  oldest_gen->mark = 1;
164
          if (RtsFlags.GcFlags.compact)
Simon Marlow's avatar
Simon Marlow committed
165
              oldest_gen->compact = 1;
166
      }
167
  }
168

169
170
  generations[0].max_blocks = 0;

171
  /* The allocation area.  Policy: keep the allocation area
172
173
174
175
176
   * small to begin with, even if we have a large suggested heap
   * size.  Reason: we're going to do a major collection first, and we
   * don't want it to be a big one.  This vague idea is borne out by 
   * rigorous experimental evidence.
   */
177
  allocNurseries();
178
179

  weak_ptr_list = NULL;
Simon Marlow's avatar
Simon Marlow committed
180
181
  caf_list = END_OF_STATIC_LIST;
  revertible_caf_list = END_OF_STATIC_LIST;
182
183
184
185
   
  /* initialise the allocate() interface */
  alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;

186
187
  exec_block = NULL;

188
189
#ifdef THREADED_RTS
  initSpinLock(&gc_alloc_block_sync);
190
  whitehole_spin = 0;
191
192
#endif

193
194
  N = 0;

195
196
197
198
199
200
201
  // allocate a block for each mut list
  for (n = 0; n < n_capabilities; n++) {
      for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
          capabilities[n].mut_lists[g] = allocBlock();
      }
  }

202
203
  initGcThreads();

204
  IF_DEBUG(gc, statDescribeGens());
205
206

  RELEASE_SM_LOCK;
207
208
}

209
210
211
void
exitStorage (void)
{
212
    stat_exit(calcAllocated());
Simon Marlow's avatar
Simon Marlow committed
213
214
215
216
217
}

void
freeStorage (void)
{
218
    stgFree(generations);
219
    freeAllMBlocks();
220
221
222
#if defined(THREADED_RTS)
    closeMutex(&sm_mutex);
#endif
223
    stgFree(nurseries);
224
    freeGcThreads();
225
226
}

227
228
/* -----------------------------------------------------------------------------
   CAF management.
229
230
231

   The entry code for every CAF does the following:
     
232
233
      - builds a BLACKHOLE in the heap
      - pushes an update frame pointing to the BLACKHOLE
Simon Marlow's avatar
Simon Marlow committed
234
235
      - calls newCaf, below
      - updates the CAF with a static indirection to the BLACKHOLE
236
      
237
   Why do we build an BLACKHOLE in the heap rather than just updating
238
239
240
241
242
   the thunk directly?  It's so that we only need one kind of update
   frame - otherwise we'd need a static version of the update frame too.

   newCaf() does the following:
       
243
244
      - it puts the CAF on the oldest generation's mutable list.
        This is so that we treat the CAF as a root when collecting
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
	younger generations.

   For GHCI, we have additional requirements when dealing with CAFs:

      - we must *retain* all dynamically-loaded CAFs ever entered,
        just in case we need them again.
      - we must be able to *revert* CAFs that have been evaluated, to
        their pre-evaluated form.

      To do this, we use an additional CAF list.  When newCaf() is
      called on a dynamically-loaded CAF, we add it to the CAF list
      instead of the old-generation mutable list, and save away its
      old info pointer (in caf->saved_info) for later reversion.

      To revert all the CAFs, we traverse the CAF list and reset the
      info pointer to caf->saved_info, then throw away the CAF list.
      (see GC.c:revertCAFs()).

      -- SDM 29/1/01

265
266
   -------------------------------------------------------------------------- */

267
void
268
newCAF(StgRegTable *reg, StgClosure* caf)
269
{
270
271
272
273
274
275
276
277
278
279
280
281
282
  if(keepCAFs)
  {
    // HACK:
    // If we are in GHCi _and_ we are using dynamic libraries,
    // then we can't redirect newCAF calls to newDynCAF (see below),
    // so we make newCAF behave almost like newDynCAF.
    // The dynamic libraries might be used by both the interpreted
    // program and GHCi itself, so they must not be reverted.
    // This also means that in GHCi with dynamic libraries, CAFs are not
    // garbage collected. If this turns out to be a problem, we could
    // do another hack here and do an address range test on caf to figure
    // out whether it is from a dynamic library.
    ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
283
284

    ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex
285
286
    ((StgIndStatic *)caf)->static_link = caf_list;
    caf_list = caf;
287
    RELEASE_SM_LOCK;
288
289
290
  }
  else
  {
291
    // Put this CAF on the mutable list for the old generation.
292
    ((StgIndStatic *)caf)->saved_info = NULL;
Simon Marlow's avatar
Simon Marlow committed
293
294
295
    if (oldest_gen->no != 0) {
        recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no);
    }
296
  }
297
298
}

299
300
301
302
303
304
305
// External API for setting the keepCAFs flag. see #3900.
void
setKeepCAFs (void)
{
    keepCAFs = 1;
}

306
307
308
309
// An alternate version of newCaf which is used for dynamically loaded
// object code in GHCi.  In this case we want to retain *all* CAFs in
// the object code, because they might be demanded at any time from an
// expression evaluated on the command line.
310
311
// Also, GHCi might want to revert CAFs, so we add these to the
// revertible_caf_list.
312
313
314
315
//
// The linker hackily arranges that references to newCaf from dynamic
// code end up pointing to newDynCAF.
void
316
newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf)
317
318
319
320
{
    ACQUIRE_SM_LOCK;

    ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
321
322
    ((StgIndStatic *)caf)->static_link = revertible_caf_list;
    revertible_caf_list = caf;
323
324
325
326

    RELEASE_SM_LOCK;
}

327
328
329
330
/* -----------------------------------------------------------------------------
   Nursery management.
   -------------------------------------------------------------------------- */

331
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
332
allocNursery (bdescr *tail, nat blocks)
333
334
{
    bdescr *bd;
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    nat i, n;

    // We allocate the nursery as a single contiguous block and then
    // divide it into single blocks manually.  This way we guarantee
    // that the nursery blocks are adjacent, so that the processor's
    // automatic prefetching works across nursery blocks.  This is a
    // tiny optimisation (~0.5%), but it's free.

    while (blocks > 0) {
        n = stg_min(blocks, BLOCKS_PER_MBLOCK);
        blocks -= n;

        bd = allocGroup(n);
        for (i = 0; i < n; i++) {
            initBdescr(&bd[i], g0, g0);

            bd[i].blocks = 1;
            bd[i].flags = 0;

            if (i > 0) {
                bd[i].u.back = &bd[i-1];
            }

            if (i+1 < n) {
                bd[i].link = &bd[i+1];
            } else {
                bd[i].link = tail;
                if (tail != NULL) {
                    tail->u.back = &bd[i];
                }
            }

            bd[i].free = bd[i].start;
        }

        tail = &bd[0];
371
    }
372
373

    return &bd[0];
374
375
376
377
378
379
}

static void
assignNurseriesToCapabilities (void)
{
    nat i;
sof's avatar
sof committed
380

Simon Marlow's avatar
Simon Marlow committed
381
    for (i = 0; i < n_capabilities; i++) {
382
383
	capabilities[i].r.rNursery        = &nurseries[i];
	capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
384
	capabilities[i].r.rCurrentAlloc   = NULL;
385
    }
386
}
387

Simon Marlow's avatar
Simon Marlow committed
388
static void
389
390
391
392
allocNurseries( void )
{ 
    nat i;

Simon Marlow's avatar
Simon Marlow committed
393
    for (i = 0; i < n_capabilities; i++) {
394
	nurseries[i].blocks = 
Simon Marlow's avatar
Simon Marlow committed
395
396
397
            allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
	nurseries[i].n_blocks =
            RtsFlags.GcFlags.minAllocAreaSize;
398
399
400
    }
    assignNurseriesToCapabilities();
}
401
402
403
404
      
void
resetNurseries( void )
{
405
406
    nat i;
    bdescr *bd;
407

Simon Marlow's avatar
Simon Marlow committed
408
    for (i = 0; i < n_capabilities; i++) {
Simon Marlow's avatar
Simon Marlow committed
409
	for (bd = nurseries[i].blocks; bd; bd = bd->link) {
410
411
	    bd->free = bd->start;
	    ASSERT(bd->gen_no == 0);
Simon Marlow's avatar
Simon Marlow committed
412
	    ASSERT(bd->gen == g0);
413
414
	    IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
	}
415
    }
416
    assignNurseriesToCapabilities();
417
418
}

419
420
lnat
countNurseryBlocks (void)
421
{
422
423
    nat i;
    lnat blocks = 0;
424

Simon Marlow's avatar
Simon Marlow committed
425
    for (i = 0; i < n_capabilities; i++) {
426
	blocks += nurseries[i].n_blocks;
427
    }
428
    return blocks;
429
430
}

431
static void
Simon Marlow's avatar
Simon Marlow committed
432
resizeNursery ( nursery *nursery, nat blocks )
433
434
{
  bdescr *bd;
435
  nat nursery_blocks;
436

Simon Marlow's avatar
Simon Marlow committed
437
  nursery_blocks = nursery->n_blocks;
438
  if (nursery_blocks == blocks) return;
439

440
  if (nursery_blocks < blocks) {
Simon Marlow's avatar
Simon Marlow committed
441
442
      debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", 
		 blocks);
Simon Marlow's avatar
Simon Marlow committed
443
    nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
444
445
446
447
  } 
  else {
    bdescr *next_bd;
    
Simon Marlow's avatar
Simon Marlow committed
448
449
    debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", 
	       blocks);
450

Simon Marlow's avatar
Simon Marlow committed
451
    bd = nursery->blocks;
452
453
454
455
456
457
    while (nursery_blocks > blocks) {
	next_bd = bd->link;
	next_bd->u.back = NULL;
	nursery_blocks -= bd->blocks; // might be a large block
	freeGroup(bd);
	bd = next_bd;
458
    }
Simon Marlow's avatar
Simon Marlow committed
459
    nursery->blocks = bd;
460
461
462
    // might have gone just under, by freeing a large block, so make
    // up the difference.
    if (nursery_blocks < blocks) {
Simon Marlow's avatar
Simon Marlow committed
463
	nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
464
    }
465
466
  }
  
Simon Marlow's avatar
Simon Marlow committed
467
468
  nursery->n_blocks = blocks;
  ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
469
}
470

471
472
473
474
// 
// Resize each of the nurseries to the specified size.
//
void
475
resizeNurseriesFixed (nat blocks)
476
477
{
    nat i;
Simon Marlow's avatar
Simon Marlow committed
478
    for (i = 0; i < n_capabilities; i++) {
479
480
	resizeNursery(&nurseries[i], blocks);
    }
481
482
}

483
484
485
486
487
488
489
490
// 
// Resize the nurseries to the total specified size.
//
void
resizeNurseries (nat blocks)
{
    // If there are multiple nurseries, then we just divide the number
    // of available blocks between them.
Simon Marlow's avatar
Simon Marlow committed
491
    resizeNurseriesFixed(blocks / n_capabilities);
492
493
}

494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509

/* -----------------------------------------------------------------------------
   move_TSO is called to update the TSO structure after it has been
   moved from one place to another.
   -------------------------------------------------------------------------- */

void
move_TSO (StgTSO *src, StgTSO *dest)
{
    ptrdiff_t diff;

    // relocate the stack pointer... 
    diff = (StgPtr)dest - (StgPtr)src; // In *words* 
    dest->sp = (StgPtr)dest->sp + diff;
}

510
/* -----------------------------------------------------------------------------
511
512
513
   split N blocks off the front of the given bdescr, returning the
   new block group.  We add the remainder to the large_blocks list
   in the same step as the original block.
514
515
   -------------------------------------------------------------------------- */

516
517
518
519
520
bdescr *
splitLargeBlock (bdescr *bd, nat blocks)
{
    bdescr *new_bd;

521
522
    ACQUIRE_SM_LOCK;

Simon Marlow's avatar
Simon Marlow committed
523
    ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks);
524

525
    // subtract the original number of blocks from the counter first
Simon Marlow's avatar
Simon Marlow committed
526
    bd->gen->n_large_blocks -= bd->blocks;
527
528

    new_bd = splitBlockGroup (bd, blocks);
Simon Marlow's avatar
Simon Marlow committed
529
    initBdescr(new_bd, bd->gen, bd->gen->to);
530
531
    new_bd->flags   = BF_LARGE | (bd->flags & BF_EVACUATED); 
    // if new_bd is in an old generation, we have to set BF_EVACUATED
532
    new_bd->free    = bd->free;
Simon Marlow's avatar
Simon Marlow committed
533
    dbl_link_onto(new_bd, &bd->gen->large_objects);
534

535
    ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
536
537

    // add the new number of blocks to the counter.  Due to the gaps
538
    // for block descriptors, new_bd->blocks + bd->blocks might not be
539
    // equal to the original bd->blocks, which is why we do it this way.
Simon Marlow's avatar
Simon Marlow committed
540
    bd->gen->n_large_blocks += bd->blocks + new_bd->blocks;
541

Simon Marlow's avatar
Simon Marlow committed
542
    ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks);
543
544

    RELEASE_SM_LOCK;
545
546

    return new_bd;
547
}
548

549
/* -----------------------------------------------------------------------------
550
   allocate()
551
552
553
554
555
556
557
558
559
560
561
562

   This allocates memory in the current thread - it is intended for
   use primarily from STG-land where we have a Capability.  It is
   better than allocate() because it doesn't require taking the
   sm_mutex lock in the common case.

   Memory is allocated directly from the nursery if possible (but not
   from the current nursery block, so as not to interfere with
   Hp/HpLim).
   -------------------------------------------------------------------------- */

StgPtr
563
allocate (Capability *cap, lnat n)
564
565
566
567
568
{
    bdescr *bd;
    StgPtr p;

    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
	lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;

        // Attempting to allocate an object larger than maxHeapSize
        // should definitely be disallowed.  (bug #1791)
        if (RtsFlags.GcFlags.maxHeapSize > 0 && 
            req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
            heapOverflow();
            // heapOverflow() doesn't exit (see #2592), but we aren't
            // in a position to do a clean shutdown here: we
            // either have to allocate the memory or exit now.
            // Allocating the memory would be bad, because the user
            // has requested that we not exceed maxHeapSize, so we
            // just exit.
	    stg_exit(EXIT_HEAPOVERFLOW);
        }

585
        ACQUIRE_SM_LOCK
586
	bd = allocGroup(req_blocks);
Simon Marlow's avatar
Simon Marlow committed
587
588
	dbl_link_onto(bd, &g0->large_objects);
	g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
589
	g0->n_new_large_blocks += bd->blocks;
590
        RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
591
        initBdescr(bd, g0, g0);
592
593
594
	bd->flags = BF_LARGE;
	bd->free = bd->start + n;
	return bd->start;
595
    }
596

597
    /* small allocation (<LARGE_OBJECT_THRESHOLD) */
598

599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
    TICK_ALLOC_HEAP_NOCTR(n);
    CCS_ALLOC(CCCS,n);
    
    bd = cap->r.rCurrentAlloc;
    if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
        
        // The CurrentAlloc block is full, we need to find another
        // one.  First, we try taking the next block from the
        // nursery:
        bd = cap->r.rCurrentNursery->link;
        
        if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
            // The nursery is empty, or the next block is already
            // full: allocate a fresh block (we can't fail here).
            ACQUIRE_SM_LOCK;
            bd = allocBlock();
            cap->r.rNursery->n_blocks++;
            RELEASE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
617
            initBdescr(bd, g0, g0);
618
            bd->flags = 0;
619
620
            // If we had to allocate a new block, then we'll GC
            // pretty quickly now, because MAYBE_GC() will
621
            // notice that CurrentNursery->link is NULL.
622
623
624
625
626
627
628
629
630
631
632
633
        } else {
            // we have a block in the nursery: take it and put
            // it at the *front* of the nursery list, and use it
            // to allocate() from.
            cap->r.rCurrentNursery->link = bd->link;
            if (bd->link != NULL) {
                bd->link->u.back = cap->r.rCurrentNursery;
            }
        }
        dbl_link_onto(bd, &cap->r.rNursery->blocks);
        cap->r.rCurrentAlloc = bd;
        IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
634
635
636
    }
    p = bd->free;
    bd->free += n;
Simon Marlow's avatar
Simon Marlow committed
637
638

    IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
639
640
641
    return p;
}

642
643
644
645
646
/* ---------------------------------------------------------------------------
   Allocate a fixed/pinned object.

   We allocate small pinned objects into a single block, allocating a
   new block when the current one overflows.  The block is chained
Simon Marlow's avatar
Simon Marlow committed
647
   onto the large_object_list of generation 0.
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665

   NOTE: The GC can't in general handle pinned objects.  This
   interface is only safe to use for ByteArrays, which have no
   pointers and don't require scavenging.  It works because the
   block's descriptor has the BF_LARGE flag set, so the block is
   treated as a large object and chained onto various lists, rather
   than the individual objects being copied.  However, when it comes
   to scavenge the block, the GC will only scavenge the first object.
   The reason is that the GC can't linearly scan a block of pinned
   objects at the moment (doing so would require using the
   mostly-copying techniques).  But since we're restricting ourselves
   to pinned ByteArrays, not scavenging is ok.

   This function is called by newPinnedByteArray# which immediately
   fills the allocated memory with a MutableByteArray#.
   ------------------------------------------------------------------------- */

StgPtr
666
allocatePinned (Capability *cap, lnat n)
667
668
{
    StgPtr p;
669
    bdescr *bd;
670
671
672
673

    // If the request is for a large object, then allocate()
    // will give us a pinned object anyway.
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
674
	p = allocate(cap, n);
Simon Marlow's avatar
Simon Marlow committed
675
676
        Bdescr(p)->flags |= BF_PINNED;
        return p;
677
678
    }

sof's avatar
sof committed
679
680
681
    TICK_ALLOC_HEAP_NOCTR(n);
    CCS_ALLOC(CCCS,n);

682
683
    bd = cap->pinned_object_block;
    
684
685
686
    // If we don't have a block of pinned objects yet, or the current
    // one isn't large enough to hold the new object, allocate a new one.
    if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
Simon Marlow's avatar
Simon Marlow committed
687
        ACQUIRE_SM_LOCK;
688
	cap->pinned_object_block = bd = allocBlock();
Simon Marlow's avatar
Simon Marlow committed
689
690
	dbl_link_onto(bd, &g0->large_objects);
	g0->n_large_blocks++;
691
	g0->n_new_large_blocks++;
Simon Marlow's avatar
Simon Marlow committed
692
693
        RELEASE_SM_LOCK;
        initBdescr(bd, g0, g0);
694
	bd->flags  = BF_PINNED | BF_LARGE;
695
696
697
698
699
700
701
702
	bd->free   = bd->start;
    }

    p = bd->free;
    bd->free += n;
    return p;
}

703
/* -----------------------------------------------------------------------------
704
705
706
707
   Write Barriers
   -------------------------------------------------------------------------- */

/*
708
709
710
711
   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
   and is put on the mutable list.
712
*/
713
void
714
dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
715
{
716
    Capability *cap = regTableToCapability(reg);
717
718
    if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
	p->header.info = &stg_MUT_VAR_DIRTY_info;
719
        recordClosureMutated(cap,p);
720
721
722
    }
}

723
724
725
726
727
728
729
730
731
// Setting a TSO's link field with a write barrier.
// It is *not* necessary to call this function when
//    * setting the link field to END_TSO_QUEUE
//    * putting a TSO on the blackhole_queue
//    * setting the link field of the currently running TSO, as it
//      will already be dirty.
void
setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
{
732
    if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
733
        tso->flags |= TSO_LINK_DIRTY;
734
        recordClosureMutated(cap,(StgClosure*)tso);
735
736
737
738
    }
    tso->_link = target;
}

739
740
741
742
743
744
745
746
747
748
void
setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
{
    if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
        tso->flags |= TSO_LINK_DIRTY;
        recordClosureMutated(cap,(StgClosure*)tso);
    }
    tso->block_info.prev = target;
}

749
750
751
void
dirty_TSO (Capability *cap, StgTSO *tso)
{
752
    if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
753
        recordClosureMutated(cap,(StgClosure*)tso);
754
    }
755
    tso->dirty = 1;
756
757
}

758
759
760
761
762
763
764
765
766
767
768
/*
   This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
   on the mutable list; a MVAR_DIRTY is.  When written to, a
   MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
   The check for MVAR_CLEAN is inlined at the call site for speed,
   this really does make a difference on concurrency-heavy benchmarks
   such as Chaneneos and cheap-concurrency.
*/
void
dirty_MVAR(StgRegTable *reg, StgClosure *p)
{
769
    recordClosureMutated(regTableToCapability(reg),p);
770
771
}

772
/* -----------------------------------------------------------------------------
773
774
775
776
777
778
779
780
 * Stats and stuff
 * -------------------------------------------------------------------------- */

/* -----------------------------------------------------------------------------
 * calcAllocated()
 *
 * Approximate how much we've allocated: number of blocks in the
 * nursery + blocks allocated via allocate() - unused nusery blocks.
Simon Marlow's avatar
Simon Marlow committed
781
 * This leaves a little slop at the end of each block.
782
783
784
785
786
787
788
 * -------------------------------------------------------------------------- */

lnat
calcAllocated( void )
{
  nat allocated;
  bdescr *bd;
789
  nat i;
790

791
  allocated = countNurseryBlocks() * BLOCK_SIZE_W;
792
  
793
  for (i = 0; i < n_capabilities; i++) {
794
      Capability *cap;
795
      for ( bd = capabilities[i].r.rCurrentNursery->link; 
796
797
798
799
800
801
802
803
804
	    bd != NULL; bd = bd->link ) {
	  allocated -= BLOCK_SIZE_W;
      }
      cap = &capabilities[i];
      if (cap->r.rCurrentNursery->free < 
	  cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
	  allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
	      - cap->r.rCurrentNursery->free;
      }
805
806
807
808
      if (cap->pinned_object_block != NULL) {
          allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - 
              cap->pinned_object_block->free;
      }
809
  }
810

811
812
  allocated += g0->n_new_large_blocks * BLOCK_SIZE_W;

813
814
  return allocated;
}  
815
816
817
818

/* Approximate the amount of live data in the heap.  To be called just
 * after garbage collection (see GarbageCollect()).
 */
Simon Marlow's avatar
Simon Marlow committed
819
lnat calcLiveBlocks (void)
820
{
Simon Marlow's avatar
Simon Marlow committed
821
  nat g;
822
  lnat live = 0;
Simon Marlow's avatar
Simon Marlow committed
823
  generation *gen;
824
825
826

  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
      /* approximate amount of live data (doesn't take into account slop
827
828
       * at end of each block).
       */
Simon Marlow's avatar
Simon Marlow committed
829
830
      gen = &generations[g];
      live += gen->n_large_blocks + gen->n_blocks;
831
832
833
834
  }
  return live;
}

Simon Marlow's avatar
Simon Marlow committed
835
lnat countOccupied (bdescr *bd)
836
837
838
839
840
{
    lnat words;

    words = 0;
    for (; bd != NULL; bd = bd->link) {
841
        ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
842
843
844
845
846
        words += bd->free - bd->start;
    }
    return words;
}

Simon Marlow's avatar
Simon Marlow committed
847
848
// Return an accurate count of the live data in the heap, excluding
// generation 0.
Simon Marlow's avatar
Simon Marlow committed
849
lnat calcLiveWords (void)
850
{
Simon Marlow's avatar
Simon Marlow committed
851
    nat g;
852
    lnat live;
Simon Marlow's avatar
Simon Marlow committed
853
    generation *gen;
854
855
856
    
    live = 0;
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
857
858
        gen = &generations[g];
        live += gen->n_words + countOccupied(gen->large_objects);
859
860
861
862
    }
    return live;
}

863
864
865
/* Approximate the number of blocks that will be needed at the next
 * garbage collection.
 *
Simon Marlow's avatar
Simon Marlow committed
866
867
868
 * Assume: all data currently live will remain live.  Generationss
 * that will be collected next time will therefore need twice as many
 * blocks since all the data will be copied.
869
870
871
872
 */
extern lnat 
calcNeeded(void)
{
873
    lnat needed = 0;
Simon Marlow's avatar
Simon Marlow committed
874
875
    nat g;
    generation *gen;
876
877
    
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
        gen = &generations[g];

        // we need at least this much space
        needed += gen->n_blocks + gen->n_large_blocks;
        
        // any additional space needed to collect this gen next time?
        if (g == 0 || // always collect gen 0
            (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) {
            // we will collect this gen next time
            if (gen->mark) {
                //  bitmap:
                needed += gen->n_blocks / BITS_IN(W_);
                //  mark stack:
                needed += gen->n_blocks / 100;
            }
            if (gen->compact) {
                continue; // no additional space needed for compaction
            } else {
                needed += gen->n_blocks;
            }
        }
899
    }
900
    return needed;
901
902
}

903
904
905
906
907
908
909
910
911
912
913
914
915
/* ----------------------------------------------------------------------------
   Executable memory

   Executable memory must be managed separately from non-executable
   memory.  Most OSs these days require you to jump through hoops to
   dynamically allocate executable memory, due to various security
   measures.

   Here we provide a small memory allocator for executable memory.
   Memory is managed with a page granularity; we allocate linearly
   in the page, and when the page is emptied (all objects on the page
   are free) we free the page again, not forgetting to make it
   non-executable.
916
917
918
919
920

   TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
         the linker cannot use allocateExec for loading object code files
         on Windows. Once allocateExec can handle larger objects, the linker
         should be modified to use allocateExec instead of VirtualAlloc.
921
922
   ------------------------------------------------------------------------- */

923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
#if defined(linux_HOST_OS)

// On Linux we need to use libffi for allocating executable memory,
// because it knows how to work around the restrictions put in place
// by SELinux.

void *allocateExec (nat bytes, void **exec_ret)
{
    void **ret, **exec;
    ACQUIRE_SM_LOCK;
    ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
    RELEASE_SM_LOCK;
    if (ret == NULL) return ret;
    *ret = ret; // save the address of the writable mapping, for freeExec().
    *exec_ret = exec + 1;
    return (ret + 1);
}

// freeExec gets passed the executable address, not the writable address. 
void freeExec (void *addr)
{
    void *writable;
    writable = *((void**)addr - 1);
    ACQUIRE_SM_LOCK;
    ffi_closure_free (writable);
    RELEASE_SM_LOCK
}
950

951
952
953
#else

void *allocateExec (nat bytes, void **exec_ret)
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
{
    void *ret;
    nat n;

    ACQUIRE_SM_LOCK;

    // round up to words.
    n  = (bytes + sizeof(W_) + 1) / sizeof(W_);

    if (n+1 > BLOCK_SIZE_W) {
	barf("allocateExec: can't handle large objects");
    }

    if (exec_block == NULL || 
	exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
	bdescr *bd;
	lnat pagesize = getPageSize();
	bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
Simon Marlow's avatar
Simon Marlow committed
972
	debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
	bd->gen_no = 0;
	bd->flags = BF_EXEC;
	bd->link = exec_block;
	if (exec_block != NULL) {
	    exec_block->u.back = bd;
	}
	bd->u.back = NULL;
	setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
	exec_block = bd;
    }
    *(exec_block->free) = n;  // store the size of this chunk
    exec_block->gen_no += n;  // gen_no stores the number of words allocated
    ret = exec_block->free + 1;
    exec_block->free += n + 1;

    RELEASE_SM_LOCK
989
    *exec_ret = ret;
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
    return ret;
}

void freeExec (void *addr)
{
    StgPtr p = (StgPtr)addr - 1;
    bdescr *bd = Bdescr((StgPtr)p);

    if ((bd->flags & BF_EXEC) == 0) {
	barf("freeExec: not executable");
    }

    if (*(StgPtr)p == 0) {
	barf("freeExec: already free?");
    }

    ACQUIRE_SM_LOCK;

    bd->gen_no -= *(StgPtr)p;
    *(StgPtr)p = 0;

1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
    if (bd->gen_no == 0) {
        // Free the block if it is empty, but not if it is the block at
        // the head of the queue.
        if (bd != exec_block) {
            debugTrace(DEBUG_gc, "free exec block %p", bd->start);
            dbl_link_remove(bd, &exec_block);
            setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
            freeGroup(bd);
        } else {
            bd->free = bd->start;
        }
1022
1023
1024
1025
1026
    }

    RELEASE_SM_LOCK
}    

1027
1028
#endif /* mingw32_HOST_OS */

1029
1030
#ifdef DEBUG

1031
1032
1033
1034
1035
1036
1037
1038
1039
// handy function for use in gdb, because Bdescr() is inlined.
extern bdescr *_bdescr( StgPtr p );

bdescr *
_bdescr( StgPtr p )
{
    return Bdescr(p);
}

1040
#endif