Storage.c 34.3 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2004
4
5
6
7
8
 *
 * Storage manager front end
 *
 * ---------------------------------------------------------------------------*/

9
#include "PosixSource.h"
10
11
12
13
14
15
#include "Rts.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
#include "Stats.h"
#include "Hooks.h"
#include "BlockAlloc.h"
16
#include "MBlock.h"
17
#include "Weak.h"
18
#include "Sanity.h"
19
#include "Arena.h"
20
21
#include "OSThreads.h"
#include "Capability.h"
22
#include "Storage.h"
23
#include "Schedule.h"
24
#include "RetainerProfile.h"	// for counting memory blocks (memInventory)
25
#include "OSMem.h"
Simon Marlow's avatar
Simon Marlow committed
26
#include "Trace.h"
27

28
29
30
#include <stdlib.h>
#include <string.h>

31
/* 
32
 * All these globals require sm_mutex to access in THREADED_RTS mode.
33
 */
34
StgClosure    *caf_list         = NULL;
35
36
StgClosure    *revertible_caf_list = NULL;
rtsBool       keepCAFs;
37
38

bdescr *small_alloc_list;	/* allocate()d small objects */
39
bdescr *pinned_object_block;    /* allocate pinned objects into this block */
40
41
42
43
44
45
nat alloc_blocks;		/* number of allocate()d blocks since GC */
nat alloc_blocks_lim;		/* approximate limit on alloc_blocks */

StgPtr alloc_Hp    = NULL;	/* next free byte in small_alloc_list */
StgPtr alloc_HpLim = NULL;	/* end of block at small_alloc_list   */

46
47
48
49
generation *generations = NULL;	/* all the generations */
generation *g0		= NULL; /* generation 0, for convenience */
generation *oldest_gen  = NULL; /* oldest generation, for convenience */
step *g0s0 		= NULL; /* generation 0, step 0, for convenience */
50

51
ullong total_allocated = 0;	/* total memory allocated during run */
52

53
nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
54
step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
55

56
#ifdef THREADED_RTS
57
58
59
60
/*
 * Storage manager mutex:  protects all the above state from
 * simultaneous access by two STG threads.
 */
61
Mutex sm_mutex;
62
63
64
65
/*
 * This mutex is used by atomicModifyMutVar# only
 */
Mutex atomic_modify_mutvar_mutex;
66
67
#endif

68

69
70
71
72
73
74
75
/*
 * Forward references
 */
static void *stgAllocForGMP   (size_t size_in_bytes);
static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
static void  stgDeallocForGMP (void *ptr, size_t size);

76
77
78
79
80
81
static void
initStep (step *stp, int g, int s)
{
    stp->no = s;
    stp->blocks = NULL;
    stp->n_blocks = 0;
82
83
    stp->old_blocks = NULL;
    stp->n_old_blocks = 0;
84
85
86
87
88
    stp->gen = &generations[g];
    stp->gen_no = g;
    stp->hp = NULL;
    stp->hpLim = NULL;
    stp->hp_bd = NULL;
89
90
    stp->scavd_hp = NULL;
    stp->scavd_hpLim = NULL;
91
92
93
94
95
96
97
98
99
100
101
    stp->scan = NULL;
    stp->scan_bd = NULL;
    stp->large_objects = NULL;
    stp->n_large_blocks = 0;
    stp->new_large_objects = NULL;
    stp->scavenged_large_objects = NULL;
    stp->n_scavenged_large_blocks = 0;
    stp->is_compacted = 0;
    stp->bitmap = NULL;
}

102
void
103
initStorage( void )
104
{
105
  nat g, s;
106
  generation *gen;
107

108
109
110
111
112
  if (generations != NULL) {
      // multi-init protection
      return;
  }

sof's avatar
sof committed
113
114
115
116
117
118
119
  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
   * doing something reasonable.
   */
  ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
  
120
121
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.heapSizeSuggestion > 
122
      RtsFlags.GcFlags.maxHeapSize) {
123
    RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
124
125
  }

126
127
128
  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
      RtsFlags.GcFlags.minAllocAreaSize > 
      RtsFlags.GcFlags.maxHeapSize) {
129
      errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
130
      RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
131
132
  }

133
134
  initBlockAllocator();
  
135
#if defined(THREADED_RTS)
sof's avatar
sof committed
136
  initMutex(&sm_mutex);
137
  initMutex(&atomic_modify_mutvar_mutex);
sof's avatar
sof committed
138
139
#endif

140
141
  ACQUIRE_SM_LOCK;

142
143
  /* allocate generation info array */
  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations 
144
					     * sizeof(struct generation_),
145
146
					     "initStorage: gens");

147
  /* Initialise all generations */
148
  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
149
150
    gen = &generations[g];
    gen->no = g;
151
    gen->mut_list = allocBlock();
152
153
    gen->collections = 0;
    gen->failed_promotions = 0;
154
    gen->max_blocks = 0;
155
156
  }

157
158
159
160
161
162
163
164
165
166
167
  /* A couple of convenience pointers */
  g0 = &generations[0];
  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];

  /* Allocate step structures in each generation */
  if (RtsFlags.GcFlags.generations > 1) {
    /* Only for multiple-generations */

    /* Oldest generation: one step */
    oldest_gen->n_steps = 1;
    oldest_gen->steps = 
168
      stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
169
170
171

    /* set up all except the oldest generation with 2 steps */
    for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
172
173
      generations[g].n_steps = RtsFlags.GcFlags.steps;
      generations[g].steps  = 
174
	stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
175
			"initStorage: steps");
176
177
178
179
180
    }
    
  } else {
    /* single generation, i.e. a two-space collector */
    g0->n_steps = 1;
181
    g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
182
183
  }

184
#ifdef THREADED_RTS
185
  n_nurseries = n_capabilities;
186
187
188
189
190
191
192
  nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
			      "initStorage: nurseries");
#else
  n_nurseries = 1;
  nurseries = g0->steps; // just share nurseries[0] with g0s0
#endif  

193
194
  /* Initialise all steps */
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
195
    for (s = 0; s < generations[g].n_steps; s++) {
196
	initStep(&generations[g].steps[s], g, s);
197
198
199
    }
  }
  
200
#ifdef THREADED_RTS
201
202
203
204
205
  for (s = 0; s < n_nurseries; s++) {
      initStep(&nurseries[s], 0, s);
  }
#endif
  
206
207
  /* Set up the destination pointers in each younger gen. step */
  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
208
209
    for (s = 0; s < generations[g].n_steps-1; s++) {
      generations[g].steps[s].to = &generations[g].steps[s+1];
210
    }
211
    generations[g].steps[s].to = &generations[g+1].steps[0];
212
  }
213
  oldest_gen->steps[0].to = &oldest_gen->steps[0];
214
  
215
#ifdef THREADED_RTS
216
217
218
219
220
221
  for (s = 0; s < n_nurseries; s++) {
      nurseries[s].to = generations[0].steps[0].to;
  }
#endif
  
  /* The oldest generation has one step. */
222
  if (RtsFlags.GcFlags.compact) {
223
      if (RtsFlags.GcFlags.generations == 1) {
224
	  errorBelch("WARNING: compaction is incompatible with -G1; disabled");
225
226
227
      } else {
	  oldest_gen->steps[0].is_compacted = 1;
      }
228
  }
229

230
#ifdef THREADED_RTS
231
  if (RtsFlags.GcFlags.generations == 1) {
232
      errorBelch("-G1 is incompatible with -threaded");
233
      stg_exit(EXIT_FAILURE);
234
235
  }
#endif
236
237
238
239

  /* generation 0 is special: that's the nursery */
  generations[0].max_blocks = 0;

240
241
242
243
244
245
  /* G0S0: the allocation area.  Policy: keep the allocation area
   * small to begin with, even if we have a large suggested heap
   * size.  Reason: we're going to do a major collection first, and we
   * don't want it to be a big one.  This vague idea is borne out by 
   * rigorous experimental evidence.
   */
246
247
248
  g0s0 = &generations[0].steps[0];

  allocNurseries();
249
250
251

  weak_ptr_list = NULL;
  caf_list = NULL;
252
  revertible_caf_list = NULL;
253
254
255
256
257
258
259
260
   
  /* initialise the allocate() interface */
  small_alloc_list = NULL;
  alloc_blocks = 0;
  alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;

  /* Tell GNU multi-precision pkg about our custom alloc functions */
  mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
261

262
  IF_DEBUG(gc, statDescribeGens());
263
264

  RELEASE_SM_LOCK;
265
266
}

267
268
269
void
exitStorage (void)
{
270
    stat_exit(calcAllocated());
Simon Marlow's avatar
Simon Marlow committed
271
272
273
274
275
}

void
freeStorage (void)
{
276
277
278
279
280
    nat g;

    for(g = 0; g < RtsFlags.GcFlags.generations; g++)
      stgFree(generations[g].steps);
    stgFree(generations);
281
    freeAllMBlocks();
282
283
284
285
#if defined(THREADED_RTS)
    closeMutex(&sm_mutex);
    closeMutex(&atomic_modify_mutvar_mutex);
#endif
286
287
}

288
289
/* -----------------------------------------------------------------------------
   CAF management.
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

   The entry code for every CAF does the following:
     
      - builds a CAF_BLACKHOLE in the heap
      - pushes an update frame pointing to the CAF_BLACKHOLE
      - invokes UPD_CAF(), which:
          - calls newCaf, below
	  - updates the CAF with a static indirection to the CAF_BLACKHOLE
      
   Why do we build a BLACKHOLE in the heap rather than just updating
   the thunk directly?  It's so that we only need one kind of update
   frame - otherwise we'd need a static version of the update frame too.

   newCaf() does the following:
       
      - it puts the CAF on the oldest generation's mut-once list.
        This is so that we can treat the CAF as a root when collecting
	younger generations.

   For GHCI, we have additional requirements when dealing with CAFs:

      - we must *retain* all dynamically-loaded CAFs ever entered,
        just in case we need them again.
      - we must be able to *revert* CAFs that have been evaluated, to
        their pre-evaluated form.

      To do this, we use an additional CAF list.  When newCaf() is
      called on a dynamically-loaded CAF, we add it to the CAF list
      instead of the old-generation mutable list, and save away its
      old info pointer (in caf->saved_info) for later reversion.

      To revert all the CAFs, we traverse the CAF list and reset the
      info pointer to caf->saved_info, then throw away the CAF list.
      (see GC.c:revertCAFs()).

      -- SDM 29/1/01

327
328
   -------------------------------------------------------------------------- */

329
330
331
void
newCAF(StgClosure* caf)
{
sof's avatar
sof committed
332
  ACQUIRE_SM_LOCK;
333

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  if(keepCAFs)
  {
    // HACK:
    // If we are in GHCi _and_ we are using dynamic libraries,
    // then we can't redirect newCAF calls to newDynCAF (see below),
    // so we make newCAF behave almost like newDynCAF.
    // The dynamic libraries might be used by both the interpreted
    // program and GHCi itself, so they must not be reverted.
    // This also means that in GHCi with dynamic libraries, CAFs are not
    // garbage collected. If this turns out to be a problem, we could
    // do another hack here and do an address range test on caf to figure
    // out whether it is from a dynamic library.
    ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
    ((StgIndStatic *)caf)->static_link = caf_list;
    caf_list = caf;
  }
  else
  {
    /* Put this CAF on the mutable list for the old generation.
    * This is a HACK - the IND_STATIC closure doesn't really have
    * a mut_link field, but we pretend it has - in fact we re-use
    * the STATIC_LINK field for the time being, because when we
    * come to do a major GC we won't need the mut_link field
    * any more and can use it as a STATIC_LINK.
    */
    ((StgIndStatic *)caf)->saved_info = NULL;
    recordMutableGen(caf, oldest_gen);
  }
  
sof's avatar
sof committed
363
  RELEASE_SM_LOCK;
364
365
366
367

#ifdef PAR
  /* If we are PAR or DIST then  we never forget a CAF */
  { globalAddr *newGA;
368
    //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
369
370
371
    newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
    ASSERT(newGA);
  } 
sof's avatar
sof committed
372
#endif /* PAR */
373
374
}

375
376
377
378
// An alternate version of newCaf which is used for dynamically loaded
// object code in GHCi.  In this case we want to retain *all* CAFs in
// the object code, because they might be demanded at any time from an
// expression evaluated on the command line.
379
380
// Also, GHCi might want to revert CAFs, so we add these to the
// revertible_caf_list.
381
382
383
384
385
386
387
388
389
//
// The linker hackily arranges that references to newCaf from dynamic
// code end up pointing to newDynCAF.
void
newDynCAF(StgClosure *caf)
{
    ACQUIRE_SM_LOCK;

    ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
390
391
    ((StgIndStatic *)caf)->static_link = revertible_caf_list;
    revertible_caf_list = caf;
392
393
394
395

    RELEASE_SM_LOCK;
}

396
397
398
399
/* -----------------------------------------------------------------------------
   Nursery management.
   -------------------------------------------------------------------------- */

400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
static bdescr *
allocNursery (step *stp, bdescr *tail, nat blocks)
{
    bdescr *bd;
    nat i;

    // Allocate a nursery: we allocate fresh blocks one at a time and
    // cons them on to the front of the list, not forgetting to update
    // the back pointer on the tail of the list to point to the new block.
    for (i=0; i < blocks; i++) {
	// @LDV profiling
	/*
	  processNursery() in LdvProfile.c assumes that every block group in
	  the nursery contains only a single block. So, if a block group is
	  given multiple blocks, change processNursery() accordingly.
	*/
	bd = allocBlock();
	bd->link = tail;
	// double-link the nursery: we might need to insert blocks
	if (tail != NULL) {
	    tail->u.back = bd;
	}
	bd->step = stp;
	bd->gen_no = 0;
	bd->flags = 0;
	bd->free = bd->start;
	tail = bd;
    }
    tail->u.back = NULL;
    return tail;
}

static void
assignNurseriesToCapabilities (void)
{
435
#ifdef THREADED_RTS
436
    nat i;
sof's avatar
sof committed
437

438
439
440
    for (i = 0; i < n_nurseries; i++) {
	capabilities[i].r.rNursery        = &nurseries[i];
	capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
441
	capabilities[i].r.rCurrentAlloc   = NULL;
442
    }
443
#else /* THREADED_RTS */
444
445
    MainCapability.r.rNursery        = &nurseries[0];
    MainCapability.r.rCurrentNursery = nurseries[0].blocks;
446
    MainCapability.r.rCurrentAlloc   = NULL;
447
448
#endif
}
449
450
451
452
453
454
455
456
457
458
459

void
allocNurseries( void )
{ 
    nat i;

    for (i = 0; i < n_nurseries; i++) {
	nurseries[i].blocks = 
	    allocNursery(&nurseries[i], NULL, 
			 RtsFlags.GcFlags.minAllocAreaSize);
	nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
460
461
	nurseries[i].old_blocks   = NULL;
	nurseries[i].n_old_blocks = 0;
462
463
464
465
	/* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
    }
    assignNurseriesToCapabilities();
}
466
467
468
469
      
void
resetNurseries( void )
{
470
471
472
    nat i;
    bdescr *bd;
    step *stp;
473

474
475
476
477
478
479
480
481
    for (i = 0; i < n_nurseries; i++) {
	stp = &nurseries[i];
	for (bd = stp->blocks; bd; bd = bd->link) {
	    bd->free = bd->start;
	    ASSERT(bd->gen_no == 0);
	    ASSERT(bd->step == stp);
	    IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
	}
482
    }
483
    assignNurseriesToCapabilities();
484
485
}

486
487
lnat
countNurseryBlocks (void)
488
{
489
490
    nat i;
    lnat blocks = 0;
491

492
493
    for (i = 0; i < n_nurseries; i++) {
	blocks += nurseries[i].n_blocks;
494
    }
495
    return blocks;
496
497
}

498
499
static void
resizeNursery ( step *stp, nat blocks )
500
501
{
  bdescr *bd;
502
  nat nursery_blocks;
503

504
505
  nursery_blocks = stp->n_blocks;
  if (nursery_blocks == blocks) return;
506

507
  if (nursery_blocks < blocks) {
Simon Marlow's avatar
Simon Marlow committed
508
509
      debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", 
		 blocks);
510
    stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
511
512
513
514
  } 
  else {
    bdescr *next_bd;
    
Simon Marlow's avatar
Simon Marlow committed
515
516
    debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", 
	       blocks);
517

518
    bd = stp->blocks;
519
520
521
522
523
524
    while (nursery_blocks > blocks) {
	next_bd = bd->link;
	next_bd->u.back = NULL;
	nursery_blocks -= bd->blocks; // might be a large block
	freeGroup(bd);
	bd = next_bd;
525
    }
526
    stp->blocks = bd;
527
528
529
    // might have gone just under, by freeing a large block, so make
    // up the difference.
    if (nursery_blocks < blocks) {
530
	stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
531
    }
532
533
  }
  
534
535
536
  stp->n_blocks = blocks;
  ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
}
537

538
539
540
541
// 
// Resize each of the nurseries to the specified size.
//
void
542
resizeNurseriesFixed (nat blocks)
543
544
545
546
547
{
    nat i;
    for (i = 0; i < n_nurseries; i++) {
	resizeNursery(&nurseries[i], blocks);
    }
548
549
}

550
551
552
553
554
555
556
557
558
559
560
// 
// Resize the nurseries to the total specified size.
//
void
resizeNurseries (nat blocks)
{
    // If there are multiple nurseries, then we just divide the number
    // of available blocks between them.
    resizeNurseriesFixed(blocks / n_nurseries);
}

561
562
563
564
565
566
567
568
569
/* -----------------------------------------------------------------------------
   The allocate() interface

   allocate(n) always succeeds, and returns a chunk of memory n words
   long.  n can be larger than the size of a block if necessary, in
   which case a contiguous block group will be allocated.
   -------------------------------------------------------------------------- */

StgPtr
570
allocate( nat n )
571
{
572
573
    bdescr *bd;
    StgPtr p;
574

575
    ACQUIRE_SM_LOCK;
576

577
578
    TICK_ALLOC_HEAP_NOCTR(n);
    CCS_ALLOC(CCCS,n);
579

580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
    /* big allocation (>LARGE_OBJECT_THRESHOLD) */
    /* ToDo: allocate directly into generation 1 */
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
	nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
	bd = allocGroup(req_blocks);
	dbl_link_onto(bd, &g0s0->large_objects);
	g0s0->n_large_blocks += req_blocks;
	bd->gen_no  = 0;
	bd->step = g0s0;
	bd->flags = BF_LARGE;
	bd->free = bd->start + n;
	alloc_blocks += req_blocks;
	RELEASE_SM_LOCK;
	return bd->start;
	
	/* small allocation (<LARGE_OBJECT_THRESHOLD) */
    } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
	if (small_alloc_list) {
	    small_alloc_list->free = alloc_Hp;
	}
	bd = allocBlock();
	bd->link = small_alloc_list;
	small_alloc_list = bd;
	bd->gen_no = 0;
	bd->step = g0s0;
	bd->flags = 0;
	alloc_Hp = bd->start;
	alloc_HpLim = bd->start + BLOCK_SIZE_W;
	alloc_blocks++;
609
    }
610
611
612
613
614
    
    p = alloc_Hp;
    alloc_Hp += n;
    RELEASE_SM_LOCK;
    return p;
615
616
}

617
lnat
618
allocatedBytes( void )
619
{
620
621
622
623
624
625
626
627
628
    lnat allocated;

    allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
    if (pinned_object_block != NULL) {
	allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - 
	    pinned_object_block->free;
    }
	
    return allocated;
629
630
}

631
632
633
634
635
636
637
638
639
640
void
tidyAllocateLists (void)
{
    if (small_alloc_list != NULL) {
	ASSERT(alloc_Hp >= small_alloc_list->start && 
	       alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
	small_alloc_list->free = alloc_Hp;
    }
}

641
642
643
644
645
646
647
648
649
650
651
652
653
654
/* -----------------------------------------------------------------------------
   allocateLocal()

   This allocates memory in the current thread - it is intended for
   use primarily from STG-land where we have a Capability.  It is
   better than allocate() because it doesn't require taking the
   sm_mutex lock in the common case.

   Memory is allocated directly from the nursery if possible (but not
   from the current nursery block, so as not to interfere with
   Hp/HpLim).
   -------------------------------------------------------------------------- */

StgPtr
655
allocateLocal (Capability *cap, nat n)
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
{
    bdescr *bd;
    StgPtr p;

    TICK_ALLOC_HEAP_NOCTR(n);
    CCS_ALLOC(CCCS,n);
    
    /* big allocation (>LARGE_OBJECT_THRESHOLD) */
    /* ToDo: allocate directly into generation 1 */
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
	nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
	ACQUIRE_SM_LOCK;
	bd = allocGroup(req_blocks);
	dbl_link_onto(bd, &g0s0->large_objects);
	g0s0->n_large_blocks += req_blocks;
	bd->gen_no  = 0;
	bd->step = g0s0;
	bd->flags = BF_LARGE;
	bd->free = bd->start + n;
	alloc_blocks += req_blocks;
	RELEASE_SM_LOCK;
	return bd->start;
	
	/* small allocation (<LARGE_OBJECT_THRESHOLD) */
    } else {

682
	bd = cap->r.rCurrentAlloc;
683
684
685
686
687
	if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {

	    // The CurrentAlloc block is full, we need to find another
	    // one.  First, we try taking the next block from the
	    // nursery:
688
	    bd = cap->r.rCurrentNursery->link;
689
690
691
692
693
694

	    if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
		// The nursery is empty, or the next block is already
		// full: allocate a fresh block (we can't fail here).
		ACQUIRE_SM_LOCK;
		bd = allocBlock();
695
		cap->r.rNursery->n_blocks++;
696
697
		RELEASE_SM_LOCK;
		bd->gen_no = 0;
698
		bd->step = cap->r.rNursery;
699
700
701
702
703
		bd->flags = 0;
	    } else {
		// we have a block in the nursery: take it and put
		// it at the *front* of the nursery list, and use it
		// to allocate() from.
704
		cap->r.rCurrentNursery->link = bd->link;
705
		if (bd->link != NULL) {
706
		    bd->link->u.back = cap->r.rCurrentNursery;
707
		}
708
	    }
709
710
711
	    dbl_link_onto(bd, &cap->r.rNursery->blocks);
	    cap->r.rCurrentAlloc = bd;
	    IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
712
713
714
715
716
717
718
	}
    }
    p = bd->free;
    bd->free += n;
    return p;
}

719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
/* ---------------------------------------------------------------------------
   Allocate a fixed/pinned object.

   We allocate small pinned objects into a single block, allocating a
   new block when the current one overflows.  The block is chained
   onto the large_object_list of generation 0 step 0.

   NOTE: The GC can't in general handle pinned objects.  This
   interface is only safe to use for ByteArrays, which have no
   pointers and don't require scavenging.  It works because the
   block's descriptor has the BF_LARGE flag set, so the block is
   treated as a large object and chained onto various lists, rather
   than the individual objects being copied.  However, when it comes
   to scavenge the block, the GC will only scavenge the first object.
   The reason is that the GC can't linearly scan a block of pinned
   objects at the moment (doing so would require using the
   mostly-copying techniques).  But since we're restricting ourselves
   to pinned ByteArrays, not scavenging is ok.

   This function is called by newPinnedByteArray# which immediately
   fills the allocated memory with a MutableByteArray#.
   ------------------------------------------------------------------------- */

StgPtr
allocatePinned( nat n )
{
    StgPtr p;
    bdescr *bd = pinned_object_block;

    // If the request is for a large object, then allocate()
    // will give us a pinned object anyway.
    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
	return allocate(n);
    }

sof's avatar
sof committed
754
755
756
757
758
    ACQUIRE_SM_LOCK;
    
    TICK_ALLOC_HEAP_NOCTR(n);
    CCS_ALLOC(CCCS,n);

759
760
761
    // we always return 8-byte aligned memory.  bd->free must be
    // 8-byte aligned to begin with, so we just round up n to
    // the nearest multiple of 8 bytes.
762
763
764
    if (sizeof(StgWord) == 4) {
	n = (n+1) & ~1;
    }
765

766
767
768
769
770
771
772
    // If we don't have a block of pinned objects yet, or the current
    // one isn't large enough to hold the new object, allocate a new one.
    if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
	pinned_object_block = bd = allocBlock();
	dbl_link_onto(bd, &g0s0->large_objects);
	bd->gen_no = 0;
	bd->step   = g0s0;
773
	bd->flags  = BF_PINNED | BF_LARGE;
774
775
776
777
778
779
	bd->free   = bd->start;
	alloc_blocks++;
    }

    p = bd->free;
    bd->free += n;
sof's avatar
sof committed
780
    RELEASE_SM_LOCK;
781
782
783
    return p;
}

784
785
786
787
788
789
790
791
/* -----------------------------------------------------------------------------
   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
   and is put on the mutable list.
   -------------------------------------------------------------------------- */

void
792
dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
793
{
794
    Capability *cap = regTableToCapability(reg);
795
    bdescr *bd;
796
797
    if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
	p->header.info = &stg_MUT_VAR_DIRTY_info;
Simon Marlow's avatar
Simon Marlow committed
798
	bd = Bdescr((StgPtr)p);
799
	if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
800
801
802
    }
}

803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
/* -----------------------------------------------------------------------------
   Allocation functions for GMP.

   These all use the allocate() interface - we can't have any garbage
   collection going on during a gmp operation, so we use allocate()
   which always succeeds.  The gmp operations which might need to
   allocate will ask the storage manager (via doYouWantToGC()) whether
   a garbage collection is required, in case we get into a loop doing
   only allocate() style allocation.
   -------------------------------------------------------------------------- */

static void *
stgAllocForGMP (size_t size_in_bytes)
{
  StgArrWords* arr;
  nat data_size_in_words, total_size_in_words;
  
820
821
  /* round up to a whole number of words */
  data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
822
823
824
  total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
  
  /* allocate and fill it in. */
825
#if defined(THREADED_RTS)
826
  arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
827
#else
828
  arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
829
#endif
830
  SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
831
832
  
  /* and return a ptr to the goods inside the array */
833
  return arr->payload;
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
}

static void *
stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
{
    void *new_stuff_ptr = stgAllocForGMP(new_size);
    nat i = 0;
    char *p = (char *) ptr;
    char *q = (char *) new_stuff_ptr;

    for (; i < old_size; i++, p++, q++) {
	*q = *p;
    }

    return(new_stuff_ptr);
}

static void
stgDeallocForGMP (void *ptr STG_UNUSED, 
		  size_t size STG_UNUSED)
{
    /* easy for us: the garbage collector does the dealloc'n */
}
857

858
/* -----------------------------------------------------------------------------
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
 * Stats and stuff
 * -------------------------------------------------------------------------- */

/* -----------------------------------------------------------------------------
 * calcAllocated()
 *
 * Approximate how much we've allocated: number of blocks in the
 * nursery + blocks allocated via allocate() - unused nusery blocks.
 * This leaves a little slop at the end of each block, and doesn't
 * take into account large objects (ToDo).
 * -------------------------------------------------------------------------- */

lnat
calcAllocated( void )
{
  nat allocated;
  bdescr *bd;

877
  allocated = allocatedBytes();
878
  allocated += countNurseryBlocks() * BLOCK_SIZE_W;
879
  
880
  {
881
#ifdef THREADED_RTS
882
  nat i;
883
884
  for (i = 0; i < n_nurseries; i++) {
      Capability *cap;
885
      for ( bd = capabilities[i].r.rCurrentNursery->link; 
886
887
888
889
890
891
892
893
894
	    bd != NULL; bd = bd->link ) {
	  allocated -= BLOCK_SIZE_W;
      }
      cap = &capabilities[i];
      if (cap->r.rCurrentNursery->free < 
	  cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
	  allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
	      - cap->r.rCurrentNursery->free;
      }
895
  }
896
#else
897
  bdescr *current_nursery = MainCapability.r.rCurrentNursery;
898
899

  for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
900
      allocated -= BLOCK_SIZE_W;
901
902
  }
  if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
903
904
      allocated -= (current_nursery->start + BLOCK_SIZE_W)
	  - current_nursery->free;
905
906
  }
#endif
907
  }
908

909
  total_allocated += allocated;
910
911
  return allocated;
}  
912
913
914
915
916
917
918
919
920

/* Approximate the amount of live data in the heap.  To be called just
 * after garbage collection (see GarbageCollect()).
 */
extern lnat 
calcLive(void)
{
  nat g, s;
  lnat live = 0;
921
  step *stp;
922
923

  if (RtsFlags.GcFlags.generations == 1) {
924
    live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W + 
925
      ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
926
    return live;
927
928
929
930
931
  }

  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
    for (s = 0; s < generations[g].n_steps; s++) {
      /* approximate amount of live data (doesn't take into account slop
932
933
       * at end of each block).
       */
934
935
936
      if (g == 0 && s == 0) { 
	  continue; 
      }
937
      stp = &generations[g].steps[s];
938
      live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
939
940
941
942
      if (stp->hp_bd != NULL) {
	  live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start) 
	      / sizeof(W_);
      }
943
944
945
      if (stp->scavd_hp != NULL) {
	  live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
      }
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
    }
  }
  return live;
}

/* Approximate the number of blocks that will be needed at the next
 * garbage collection.
 *
 * Assume: all data currently live will remain live.  Steps that will
 * be collected next time will therefore need twice as many blocks
 * since all the data will be copied.
 */
extern lnat 
calcNeeded(void)
{
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
    lnat needed = 0;
    nat g, s;
    step *stp;
    
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
	for (s = 0; s < generations[g].n_steps; s++) {
	    if (g == 0 && s == 0) { continue; }
	    stp = &generations[g].steps[s];
	    if (generations[g].steps[0].n_blocks +
		generations[g].steps[0].n_large_blocks 
		> generations[g].max_blocks
		&& stp->is_compacted == 0) {
		needed += 2 * stp->n_blocks;
	    } else {
		needed += stp->n_blocks;
	    }
	}
978
    }
979
    return needed;
980
981
}

982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
/* ----------------------------------------------------------------------------
   Executable memory

   Executable memory must be managed separately from non-executable
   memory.  Most OSs these days require you to jump through hoops to
   dynamically allocate executable memory, due to various security
   measures.

   Here we provide a small memory allocator for executable memory.
   Memory is managed with a page granularity; we allocate linearly
   in the page, and when the page is emptied (all objects on the page
   are free) we free the page again, not forgetting to make it
   non-executable.
   ------------------------------------------------------------------------- */

static bdescr *exec_block;

void *allocateExec (nat bytes)
{
    void *ret;
    nat n;

    ACQUIRE_SM_LOCK;

    // round up to words.
    n  = (bytes + sizeof(W_) + 1) / sizeof(W_);

    if (n+1 > BLOCK_SIZE_W) {
	barf("allocateExec: can't handle large objects");
    }

    if (exec_block == NULL || 
	exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
	bdescr *bd;
	lnat pagesize = getPageSize();
	bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
Simon Marlow's avatar
Simon Marlow committed
1018
	debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
	bd->gen_no = 0;
	bd->flags = BF_EXEC;
	bd->link = exec_block;
	if (exec_block != NULL) {
	    exec_block->u.back = bd;
	}
	bd->u.back = NULL;
	setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
	exec_block = bd;
    }
    *(exec_block->free) = n;  // store the size of this chunk
    exec_block->gen_no += n;  // gen_no stores the number of words allocated
    ret = exec_block->free + 1;
    exec_block->free += n + 1;

    RELEASE_SM_LOCK
    return ret;
}

void freeExec (void *addr)
{
    StgPtr p = (StgPtr)addr - 1;
    bdescr *bd = Bdescr((StgPtr)p);

    if ((bd->flags & BF_EXEC) == 0) {
	barf("freeExec: not executable");
    }

    if (*(StgPtr)p == 0) {
	barf("freeExec: already free?");
    }

    ACQUIRE_SM_LOCK;

    bd->gen_no -= *(StgPtr)p;
    *(StgPtr)p = 0;

    // Free the block if it is empty, but not if it is the block at
    // the head of the queue.
    if (bd->gen_no == 0 && bd != exec_block) {
Simon Marlow's avatar
Simon Marlow committed
1059
	debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
	if (bd->u.back) {
	    bd->u.back->link = bd->link;
	} else {
	    exec_block = bd->link;
	}
	if (bd->link) {
	    bd->link->u.back = bd->u.back;
	}
	setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
	freeGroup(bd);
    }

    RELEASE_SM_LOCK
}    

1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
/* -----------------------------------------------------------------------------
   Debugging

   memInventory() checks for memory leaks by counting up all the
   blocks we know about and comparing that to the number of blocks
   allegedly floating around in the system.
   -------------------------------------------------------------------------- */

#ifdef DEBUG

1085
1086
1087
1088
1089
1090
1091
static lnat
stepBlocks (step *stp)
{
    lnat total_blocks;
    bdescr *bd;

    total_blocks = stp->n_blocks;    
1092
    total_blocks += stp->n_old_blocks;
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
    for (bd = stp->large_objects; bd; bd = bd->link) {
	total_blocks += bd->blocks;
	/* hack for megablock groups: they have an extra block or two in
	   the second and subsequent megablocks where the block
	   descriptors would normally go.
	*/
	if (bd->blocks > BLOCKS_PER_MBLOCK) {
	    total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
		* (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
	}
    }
    return total_blocks;
}

1107
void
1108
1109
memInventory(void)
{
1110
  nat g, s, i;
1111
  step *stp;
1112
1113
1114
1115
  bdescr *bd;
  lnat total_blocks = 0, free_blocks = 0;

  /* count the blocks we current have */
1116

1117
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1118
1119
1120
1121
1122
      for (i = 0; i < n_capabilities; i++) {
	  for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
	      total_blocks += bd->blocks;
	  }
      }	  
1123
1124
      for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
	  total_blocks += bd->blocks;
1125
      }
1126
      for (s = 0; s < generations[g].n_steps; s++) {
1127
	  if (g==0 && s==0) continue;
1128
	  stp = &generations[g].steps[s];
1129
	  total_blocks += stepBlocks(stp);
1130
1131
1132
      }
  }

1133
1134
1135
  for (i = 0; i < n_nurseries; i++) {
      total_blocks += stepBlocks(&nurseries[i]);
  }
1136
#ifdef THREADED_RTS
1137
1138
1139
  // We put pinned object blocks in g0s0, so better count blocks there too.
  total_blocks += stepBlocks(g0s0);
#endif
1140

1141
1142
1143
1144
  /* any blocks held by allocate() */
  for (bd = small_alloc_list; bd; bd = bd->link) {
    total_blocks += bd->blocks;
  }
1145
1146
1147

#ifdef PROFILING
  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
sof's avatar
sof committed
1148
      total_blocks += retainerStackBlocks();
1149
1150
1151
  }
#endif

1152
1153
1154
  // count the blocks allocated by the arena allocator
  total_blocks += arenaBlocks();

1155
1156
1157
1158
1159
  // count the blocks containing executable memory
  for (bd = exec_block; bd; bd = bd->link) {
    total_blocks += bd->blocks;
  }

1160
1161
1162
1163
1164
  /* count the blocks on the free list */
  free_blocks = countFreeList();

  if (total_blocks + free_blocks != mblocks_allocated *
      BLOCKS_PER_MBLOCK) {
1165
    debugBelch("Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
1166
1167
1168
1169
	    total_blocks, free_blocks, total_blocks + free_blocks,
	    mblocks_allocated * BLOCKS_PER_MBLOCK);
  }

1170
1171
  ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
}
1172

1173
1174

nat
1175
countBlocks(bdescr *bd)
1176
{
1177
1178
    nat n;
    for (n=0; bd != NULL; bd=bd->link) {
1179
	n += bd->blocks;
1180
    }
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
    return n;
}

/* Full heap sanity check. */
void
checkSanity( void )
{
    nat g, s;

    if (RtsFlags.GcFlags.generations == 1) {
1191
	checkHeap(g0s0->blocks);
1192
1193
1194
1195
1196
	checkChain(g0s0->large_objects);
    } else {
	
	for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
	    for (s = 0; s < generations[g].n_steps; s++) {
1197
		if (g == 0 && s == 0) { continue; }
1198
1199
		ASSERT(countBlocks(generations[g].steps[s].blocks)
		       == generations[g].steps[s].n_blocks);
1200
1201
		ASSERT(countBlocks(generations[g].steps[s].large_objects)
		       == generations[g].steps[s].n_large_blocks);
1202
1203
		checkHeap(generations[g].steps[s].blocks);
		checkChain(generations[g].steps[s].large_objects);
1204
1205
1206
1207
1208
		if (g > 0) {
		    checkMutableList(generations[g].mut_list, g);
		}
	    }
	}
1209
1210

	for (s = 0; s < n_nurseries; s++) {
1211
1212
1213
1214
	    ASSERT(countBlocks(nurseries[s].blocks)
		   == nurseries[s].n_blocks);
	    ASSERT(countBlocks(nurseries[s].large_objects)
		   == nurseries[s].n_large_blocks);
1215
1216
	}
	    
1217
	checkFreeListSanity();
1218
1219
1220
    }
}

1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
/* Nursery sanity check */
void
checkNurserySanity( step *stp )
{
    bdescr *bd, *prev;
    nat blocks = 0;

    prev = NULL;
    for (bd = stp->blocks; bd != NULL; bd = bd->link) {
	ASSERT(bd->u.back == prev);
	prev = bd;
	blocks += bd->blocks;
    }
    ASSERT(blocks == stp->n_blocks);
}

1237
1238
1239
1240
1241
1242
1243
1244
1245
// handy function for use in gdb, because Bdescr() is inlined.
extern bdescr *_bdescr( StgPtr p );

bdescr *
_bdescr( StgPtr p )
{
    return Bdescr(p);
}

1246
#endif