BlockAlloc.c 27.5 KB
Newer Older
1
2
/* -----------------------------------------------------------------------------
 *
3
 * (c) The GHC Team 1998-2008
4
 *
5
6
7
 * The block allocator and free list manager.
 *
 * This is the architecture independent part of the block allocator.
8
 * It requires only the following support from the operating system:
9
 *
Simon Marlow's avatar
Simon Marlow committed
10
 *    void *getMBlocks(uint32_t n);
11
 *
Simon Marlow's avatar
Simon Marlow committed
12
13
 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
 * an MBLOCK_SIZE boundary.  There are no other restrictions on the
Simon Marlow's avatar
Simon Marlow committed
14
 * addresses of memory returned by getMBlocks().
15
16
17
 *
 * ---------------------------------------------------------------------------*/

18
#include "PosixSource.h"
19
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
20
21

#include "Storage.h"
22
23
#include "RtsUtils.h"
#include "BlockAlloc.h"
24
#include "OSMem.h"
25

26
27
#include <string.h>

Simon Marlow's avatar
Simon Marlow committed
28
static void  initMBlock(void *mblock, uint32_t node);
29

Simon Marlow's avatar
Simon Marlow committed
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/* -----------------------------------------------------------------------------

  Implementation notes
  ~~~~~~~~~~~~~~~~~~~~

  Terminology:
    - bdescr = block descriptor
    - bgroup = block group (1 or more adjacent blocks)
    - mblock = mega block
    - mgroup = mega group (1 or more adjacent mblocks)

   Invariants on block descriptors
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   bd->start always points to the start of the block.

   bd->free is either:
      - zero for a non-group-head; bd->link points to the head
      - (-1) for the head of a free block group
48
      - or it points within the block (group)
Simon Marlow's avatar
Simon Marlow committed
49
50
51
52
53
54
55
56
57
58

   bd->blocks is either:
      - zero for a non-group-head; bd->link points to the head
      - number of blocks in this group otherwise

   bd->link either points to a block descriptor or is NULL

   The following fields are not used by the allocator:
     bd->flags
     bd->gen_no
Simon Marlow's avatar
Simon Marlow committed
59
     bd->gen
60
     bd->dest
Simon Marlow's avatar
Simon Marlow committed
61
62
63
64
65
66
67
68
69

  Exceptions: we don't maintain invariants for all the blocks within a
  group on the free list, because it is expensive to modify every
  bdescr in a group when coalescing.  Just the head and last bdescrs
  will be correct for a group on the free list.


  Free lists
  ~~~~~~~~~~
70

Simon Marlow's avatar
Simon Marlow committed
71
  Preliminaries:
72
    - most allocations are for a small number of blocks
73
74
75
76
77
78
    - sometimes the OS gives us new memory backwards in the address
      space, sometimes forwards, so we should not be biased towards
      any particular layout in the address space
    - We want to avoid fragmentation
    - We want allocation and freeing to be O(1) or close.

Simon Marlow's avatar
Simon Marlow committed
79
  Coalescing trick: when a bgroup is freed (freeGroup()), we can check
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  whether it can be coalesced with other free bgroups by checking the
  bdescrs for the blocks on either side of it.  This means that we can
  coalesce in O(1) time.  Every free bgroup must have its head and tail
  bdescrs initialised, the rest don't matter.

  We keep the free list in buckets, using a heap-sort strategy.
  Bucket N contains blocks with sizes 2^N - 2^(N+1)-1.  The list of
  blocks in each bucket is doubly-linked, so that if a block is
  coalesced we can easily remove it from its current free list.

  To allocate a new block of size S, grab a block from bucket
  log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
  least as big as S, and split it if necessary.  If there are no
  blocks in that bucket, look at bigger buckets until a block is found
  Allocation is therefore O(logN) time.

  To free a block:
    - coalesce it with neighbours.
    - remove coalesced neighbour(s) from free list(s)
    - add the new (coalesced) block to the front of the appropriate
      bucket, given by log2(S) where S is the size of the block.

  Free is O(1).

104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  Megablocks
  ~~~~~~~~~~

  Separately from the free list of block groups, which are smaller than
  an mblock, we maintain a free list of mblock groups.  This is the unit
  of memory the operating system gives us, and we may either split mblocks
  into blocks or allocate them directly (when very large contiguous regions
  of memory).  mblocks have a different set of invariants than blocks:

  bd->start points to the start of the block IF the block is in the first mblock
  bd->blocks and bd->link are only valid IF this block is the first block
    of the first mblock
  No other fields are used (in particular, free is not used, meaning that
    space that is not used by the (single) object is wasted.

  This has implications for the free list as well:
  We cannot play the coalescing trick with mblocks, because there is
121
122
123
124
  no requirement that the bdescrs in the second and subsequent mblock
  of an mgroup are initialised (the mgroup might be filled with a
  large array, overwriting the bdescrs for example).

125
  The separate free list for megablocks is thus sorted in *address*
126
127
128
129
  order, so that we can coalesce.  Allocation in this list is best-fit
  by traversing the whole list: we don't expect this list to be long,
  and allocation/freeing of large blocks is rare; avoiding
  fragmentation is more important than performance here.
Simon Marlow's avatar
Simon Marlow committed
130
131
132
133
134
135
136
137

  freeGroup() might end up moving a block from free_list to
  free_mblock_list, if after coalescing we end up with a full mblock.

  checkFreeListSanity() checks all the invariants on the free lists.

  --------------------------------------------------------------------------- */

138
139
140
141
142
/* ---------------------------------------------------------------------------
   WATCH OUT FOR OVERFLOW

   Be very careful with integer overflow here.  If you have an
   expression like (n_blocks * BLOCK_SIZE), and n_blocks is an int or
143
   a uint32_t, then it will very likely overflow on a 64-bit platform.
144
145
146
147
   Always cast to StgWord (or W_ for short) first: ((W_)n_blocks * BLOCK_SIZE).

  --------------------------------------------------------------------------- */

148
// free_list[i] contains blocks that are at least size 2^i, and at
149
150
// most size 2^(i+1) - 1.
//
151
152
// To find the free list in which to place a block, use log_2(size).
// To find a free block of the right size, use log_2_ceil(size).
153
154
155
156
157
158
159
160
//
// The largest free list (free_list[NUM_FREE_LISTS-1]) needs to contain sizes
// from half a megablock up to (but not including) a full megablock.

#define NUM_FREE_LISTS (MBLOCK_SHIFT-BLOCK_SHIFT)

// In THREADED_RTS mode, the free list is protected by sm_mutex.

Simon Marlow's avatar
Simon Marlow committed
161
162
static bdescr *free_list[MAX_NUMA_NODES][NUM_FREE_LISTS];
static bdescr *free_mblock_list[MAX_NUMA_NODES];
163

164
165
W_ n_alloc_blocks;   // currently allocated blocks
W_ hw_alloc_blocks;  // high-water allocated blocks
166

Simon Marlow's avatar
Simon Marlow committed
167
168
W_ n_alloc_blocks_by_node[MAX_NUMA_NODES];

169
170
171
172
173
174
/* -----------------------------------------------------------------------------
   Initialisation
   -------------------------------------------------------------------------- */

void initBlockAllocator(void)
{
Simon Marlow's avatar
Simon Marlow committed
175
176
177
178
179
180
181
    uint32_t i, node;
    for (node = 0; node < MAX_NUMA_NODES; node++) {
        for (i=0; i < NUM_FREE_LISTS; i++) {
            free_list[node][i] = NULL;
        }
        free_mblock_list[node] = NULL;
        n_alloc_blocks_by_node[node] = 0;
182
183
184
    }
    n_alloc_blocks = 0;
    hw_alloc_blocks = 0;
185
186
}

Simon Marlow's avatar
Simon Marlow committed
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/* -----------------------------------------------------------------------------
   Accounting
   -------------------------------------------------------------------------- */

STATIC_INLINE
void recordAllocatedBlocks(uint32_t node, uint32_t n)
{
    n_alloc_blocks += n;
    n_alloc_blocks_by_node[node] += n;
    if (n > 0 && n_alloc_blocks > hw_alloc_blocks) {
        hw_alloc_blocks = n_alloc_blocks;
    }
}

STATIC_INLINE
void recordFreedBlocks(uint32_t node, uint32_t n)
{
    ASSERT(n_alloc_blocks >= n);
    n_alloc_blocks -= n;
    n_alloc_blocks_by_node[node] -= n;
}

209
210
211
212
/* -----------------------------------------------------------------------------
   Allocation
   -------------------------------------------------------------------------- */

213
214
215
216
217
218
STATIC_INLINE bdescr *
tail_of (bdescr *bd)
{
    return bd + bd->blocks - 1;
}

219
STATIC_INLINE void
simonmarhaskell@gmail.com's avatar
simonmarhaskell@gmail.com committed
220
initGroup(bdescr *head)
221
{
simonmarhaskell@gmail.com's avatar
simonmarhaskell@gmail.com committed
222
223
  head->free   = head->start;
  head->link   = NULL;
224
225
226
227
228
229
230
231

  // If this is a block group (but not a megablock group), we
  // make the last block of the group point to the head.  This is used
  // when coalescing blocks in freeGroup().  We don't do this for
  // megablock groups because blocks in the second and subsequent
  // mblocks don't have bdescrs; freeing these is handled in a
  // different way by free_mblock_group().
  if (head->blocks > 1 && head->blocks <= BLOCKS_PER_MBLOCK) {
232
      bdescr *last = tail_of(head);
233
234
      last->blocks = 0;
      last->link = head;
235
236
237
  }
}

238
239
240
241
242
243
#if SIZEOF_VOID_P == SIZEOF_LONG
#define CLZW(n) (__builtin_clzl(n))
#else
#define CLZW(n) (__builtin_clzll(n))
#endif

244
// log base 2 (floor), needs to support up to (2^NUM_FREE_LISTS)-1
245
STATIC_INLINE uint32_t
246
log_2(W_ n)
247
{
248
    ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
249
250
251
252
253
#if defined(__GNUC__)
    return CLZW(n) ^ (sizeof(StgWord)*8 - 1);
    // generates good code on x86.  __builtin_clz() compiles to bsr+xor, but
    // we want just bsr, so the xor here cancels out gcc's xor.
#else
Simon Marlow's avatar
Simon Marlow committed
254
    W_ i, x;
255
    x = n;
256
    for (i=0; i < NUM_FREE_LISTS; i++) {
257
258
        x = x >> 1;
        if (x == 0) return i;
Simon Marlow's avatar
Simon Marlow committed
259
    }
260
    return NUM_FREE_LISTS;
261
#endif
Simon Marlow's avatar
Simon Marlow committed
262
}
263

264
// log base 2 (ceiling), needs to support up to (2^NUM_FREE_LISTS)-1
265
STATIC_INLINE uint32_t
266
log_2_ceil(W_ n)
Simon Marlow's avatar
Simon Marlow committed
267
{
268
    ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
269
#if defined(__GNUC__)
270
    uint32_t r = log_2(n);
271
272
    return (n & (n-1)) ? r+1 : r;
#else
Simon Marlow's avatar
Simon Marlow committed
273
    W_ i, x;
274
    x = 1;
275
    for (i=0; i < MAX_FREE_LIST; i++) {
276
277
        if (x >= n) return i;
        x = x << 1;
278
    }
279
    return MAX_FREE_LIST;
280
#endif
Simon Marlow's avatar
Simon Marlow committed
281
282
}

283
STATIC_INLINE void
Simon Marlow's avatar
Simon Marlow committed
284
free_list_insert (uint32_t node, bdescr *bd)
Simon Marlow's avatar
Simon Marlow committed
285
{
286
    uint32_t ln;
Simon Marlow's avatar
Simon Marlow committed
287

288
289
    ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
    ln = log_2(bd->blocks);
290

Simon Marlow's avatar
Simon Marlow committed
291
    dbl_link_onto(bd, &free_list[node][ln]);
Simon Marlow's avatar
Simon Marlow committed
292
293
294
295
}

// After splitting a group, the last block of each group must have a
// tail that points to the head block, to keep our invariants for
296
// coalescing.
Simon Marlow's avatar
Simon Marlow committed
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
STATIC_INLINE void
setup_tail (bdescr *bd)
{
    bdescr *tail;
    tail = tail_of(bd);
    if (tail != bd) {
        tail->blocks = 0;
        tail->free = 0;
        tail->link = bd;
    }
}


// Take a free block group bd, and split off a group of size n from
// it.  Adjust the free list as necessary, and return the new group.
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
313
split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
Simon Marlow's avatar
Simon Marlow committed
314
315
316
317
{
    bdescr *fg; // free group

    ASSERT(bd->blocks > n);
Simon Marlow's avatar
Simon Marlow committed
318
    dbl_link_remove(bd, &free_list[node][ln]);
Simon Marlow's avatar
Simon Marlow committed
319
320
321
322
    fg = bd + bd->blocks - n; // take n blocks off the end
    fg->blocks = n;
    bd->blocks -= n;
    setup_tail(bd);
323
    ln = log_2(bd->blocks);
Simon Marlow's avatar
Simon Marlow committed
324
    dbl_link_onto(bd, &free_list[node][ln]);
Simon Marlow's avatar
Simon Marlow committed
325
326
327
    return fg;
}

328
329
330
331
/* Only initializes the start pointers on the first megablock and the
 * blocks field of the first bdescr; callers are responsible for calling
 * initGroup afterwards.
 */
Simon Marlow's avatar
Simon Marlow committed
332
static bdescr *
Simon Marlow's avatar
Simon Marlow committed
333
alloc_mega_group (uint32_t node, StgWord mblocks)
Simon Marlow's avatar
Simon Marlow committed
334
335
{
    bdescr *best, *bd, *prev;
336
    StgWord n;
Simon Marlow's avatar
Simon Marlow committed
337
338
339
340
341

    n = MBLOCK_GROUP_BLOCKS(mblocks);

    best = NULL;
    prev = NULL;
Simon Marlow's avatar
Simon Marlow committed
342
    for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
Simon Marlow's avatar
Simon Marlow committed
343
    {
344
        if (bd->blocks == n)
Simon Marlow's avatar
Simon Marlow committed
345
346
347
348
        {
            if (prev) {
                prev->link = bd->link;
            } else {
Simon Marlow's avatar
Simon Marlow committed
349
                free_mblock_list[node] = bd->link;
Simon Marlow's avatar
Simon Marlow committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
            }
            return bd;
        }
        else if (bd->blocks > n)
        {
            if (!best || bd->blocks < best->blocks)
            {
                best = bd;
            }
        }
    }

    if (best)
    {
        // we take our chunk off the end here.
365
        StgWord best_mblocks  = BLOCKS_TO_MBLOCKS(best->blocks);
366
        bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
Simon Marlow's avatar
Simon Marlow committed
367
368
369
                          (best_mblocks-mblocks)*MBLOCK_SIZE);

        best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
Simon Marlow's avatar
Simon Marlow committed
370
        initMBlock(MBLOCK_ROUND_DOWN(bd), node);
Simon Marlow's avatar
Simon Marlow committed
371
372
373
    }
    else
    {
Simon Marlow's avatar
Simon Marlow committed
374
375
376
377
378
379
380
        void *mblock;
        if (RtsFlags.GcFlags.numa) {
            mblock = getMBlocksOnNode(node, mblocks);
        } else {
            mblock = getMBlocks(mblocks);
        }
        initMBlock(mblock, node); // only need to init the 1st one
Simon Marlow's avatar
Simon Marlow committed
381
382
383
384
385
386
387
        bd = FIRST_BDESCR(mblock);
    }
    bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
    return bd;
}

bdescr *
Simon Marlow's avatar
Simon Marlow committed
388
allocGroupOnNode (uint32_t node, W_ n)
Simon Marlow's avatar
Simon Marlow committed
389
390
{
    bdescr *bd, *rem;
391
    StgWord ln;
Simon Marlow's avatar
Simon Marlow committed
392
393

    if (n == 0) barf("allocGroup: requested zero blocks");
394

Simon Marlow's avatar
Simon Marlow committed
395
396
    if (n >= BLOCKS_PER_MBLOCK)
    {
397
        StgWord mblocks;
398
399
400
401
402

        mblocks = BLOCKS_TO_MBLOCKS(n);

        // n_alloc_blocks doesn't count the extra blocks we get in a
        // megablock group.
Simon Marlow's avatar
Simon Marlow committed
403
        recordAllocatedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
404

Simon Marlow's avatar
Simon Marlow committed
405
        bd = alloc_mega_group(node, mblocks);
Simon Marlow's avatar
Simon Marlow committed
406
        // only the bdescrs of the first MB are required to be initialised
simonmarhaskell@gmail.com's avatar
simonmarhaskell@gmail.com committed
407
        initGroup(bd);
408
        goto finish;
Simon Marlow's avatar
Simon Marlow committed
409
    }
410

Simon Marlow's avatar
Simon Marlow committed
411
    recordAllocatedBlocks(node, n);
412

413
414
    ln = log_2_ceil(n);

Simon Marlow's avatar
Simon Marlow committed
415
    while (ln < NUM_FREE_LISTS && free_list[node][ln] == NULL) {
416
417
418
        ln++;
    }

419
    if (ln == NUM_FREE_LISTS) {
420
421
422
#if 0  /* useful for debugging fragmentation */
        if ((W_)mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W
             - (W_)((n_alloc_blocks - n) * BLOCK_SIZE_W) > (2*1024*1024)/sizeof(W_)) {
Simon Marlow's avatar
Simon Marlow committed
423
            debugBelch("Fragmentation, wanted %d blocks, %ld MB free\n", n, ((mblocks_allocated * BLOCKS_PER_MBLOCK) - n_alloc_blocks) / BLOCKS_PER_MBLOCK);
424
425
426
427
428
            RtsFlags.DebugFlags.block_alloc = 1;
            checkFreeListSanity();
        }
#endif

Simon Marlow's avatar
Simon Marlow committed
429
        bd = alloc_mega_group(node,1);
430
        bd->blocks = n;
431
        initGroup(bd);                   // we know the group will fit
432
433
        rem = bd + n;
        rem->blocks = BLOCKS_PER_MBLOCK-n;
Simon Marlow's avatar
Simon Marlow committed
434
435
        initGroup(rem);                  // init the slop
        recordAllocatedBlocks(node,rem->blocks);
436
        freeGroup(rem);                  // add the slop on to the free list
437
        goto finish;
Simon Marlow's avatar
Simon Marlow committed
438
439
    }

Simon Marlow's avatar
Simon Marlow committed
440
    bd = free_list[node][ln];
441

442
    if (bd->blocks == n)                // exactly the right size!
443
    {
Simon Marlow's avatar
Simon Marlow committed
444
        dbl_link_remove(bd, &free_list[node][ln]);
445
        initGroup(bd);
446
447
    }
    else if (bd->blocks >  n)            // block too big...
448
    {
Simon Marlow's avatar
Simon Marlow committed
449
        bd = split_free_block(bd, node, n, ln);
450
451
        ASSERT(bd->blocks == n);
        initGroup(bd);
452
453
454
455
456
    }
    else
    {
        barf("allocGroup: free list corrupted");
    }
457
458
459

finish:
    IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
Simon Marlow's avatar
Simon Marlow committed
460
461
    IF_DEBUG(sanity, checkFreeListSanity());
    return bd;
462
463
}

Simon Marlow's avatar
Simon Marlow committed
464
465
466
467
468
STATIC_INLINE
uint32_t nodeWithLeastBlocks (void)
{
    uint32_t node = 0, i;
    uint32_t min_blocks = n_alloc_blocks_by_node[0];
Simon Marlow's avatar
Simon Marlow committed
469
    for (i = 1; i < n_numa_nodes; i++) {
Simon Marlow's avatar
Simon Marlow committed
470
471
472
473
474
475
476
477
478
479
480
481
482
483
        if (n_alloc_blocks_by_node[i] < min_blocks) {
            min_blocks = n_alloc_blocks_by_node[i];
            node = i;
        }
    }
    return node;
}

bdescr* allocGroup (W_ n)
{
    return allocGroupOnNode(nodeWithLeastBlocks(),n);
}


484
//
485
486
487
// Allocate a chunk of blocks that is at least min and at most max
// blocks in size. This API is used by the nursery allocator that
// wants contiguous memory preferably, but doesn't require it.  When
488
// memory is fragmented we might have lots of chunks that are
489
490
491
492
// less than a full megablock, so allowing the nursery allocator to
// use these reduces fragmentation considerably.  e.g. on a GHC build
// with +RTS -H, I saw fragmentation go from 17MB down to 3MB on a
// single compile.
493
//
494
495
496
497
498
499
// Further to this: in #7257 there is a program that creates serious
// fragmentation such that the heap is full of tiny <4 block chains.
// The nursery allocator therefore has to use single blocks to avoid
// fragmentation, but we make sure that we allocate large blocks
// preferably if there are any.
//
Simon Marlow's avatar
Simon Marlow committed
500
bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
501
502
{
    bdescr *bd;
503
    StgWord ln, lnmax;
504

505
    if (min >= BLOCKS_PER_MBLOCK) {
Simon Marlow's avatar
Simon Marlow committed
506
        return allocGroupOnNode(node,max);
507
508
509
    }

    ln = log_2_ceil(min);
510
    lnmax = log_2_ceil(max);
511

Simon Marlow's avatar
Simon Marlow committed
512
    while (ln < NUM_FREE_LISTS && ln < lnmax && free_list[node][ln] == NULL) {
513
514
        ln++;
    }
515
    if (ln == NUM_FREE_LISTS || ln == lnmax) {
Simon Marlow's avatar
Simon Marlow committed
516
        return allocGroupOnNode(node,max);
517
    }
Simon Marlow's avatar
Simon Marlow committed
518
    bd = free_list[node][ln];
519

520
521
    if (bd->blocks <= max)              // exactly the right size!
    {
Simon Marlow's avatar
Simon Marlow committed
522
        dbl_link_remove(bd, &free_list[node][ln]);
523
524
525
        initGroup(bd);
    }
    else   // block too big...
526
    {
Simon Marlow's avatar
Simon Marlow committed
527
        bd = split_free_block(bd, node, max, ln);
528
529
530
531
        ASSERT(bd->blocks == max);
        initGroup(bd);
    }

Simon Marlow's avatar
Simon Marlow committed
532
    recordAllocatedBlocks(node, bd->blocks);
533
534
535
536
537
538

    IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
    IF_DEBUG(sanity, checkFreeListSanity());
    return bd;
}

Simon Marlow's avatar
Simon Marlow committed
539
540
541
542
543
bdescr* allocLargeChunk (W_ min, W_ max)
{
    return allocLargeChunkOnNode(nodeWithLeastBlocks(), min, max);
}

544
bdescr *
Simon Marlow's avatar
Simon Marlow committed
545
allocGroup_lock(W_ n)
546
547
548
549
550
551
552
553
{
    bdescr *bd;
    ACQUIRE_SM_LOCK;
    bd = allocGroup(n);
    RELEASE_SM_LOCK;
    return bd;
}

554
bdescr *
Simon Marlow's avatar
Simon Marlow committed
555
allocBlock_lock(void)
556
{
Simon Marlow's avatar
Simon Marlow committed
557
558
559
560
561
    bdescr *bd;
    ACQUIRE_SM_LOCK;
    bd = allocBlock();
    RELEASE_SM_LOCK;
    return bd;
562
563
}

564
bdescr *
Simon Marlow's avatar
Simon Marlow committed
565
allocGroupOnNode_lock(uint32_t node, W_ n)
566
567
568
{
    bdescr *bd;
    ACQUIRE_SM_LOCK;
Simon Marlow's avatar
Simon Marlow committed
569
570
571
572
573
574
575
576
577
578
579
    bd = allocGroupOnNode(node,n);
    RELEASE_SM_LOCK;
    return bd;
}

bdescr *
allocBlockOnNode_lock(uint32_t node)
{
    bdescr *bd;
    ACQUIRE_SM_LOCK;
    bd = allocBlockOnNode(node);
580
581
582
583
    RELEASE_SM_LOCK;
    return bd;
}

584
/* -----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
585
   De-Allocation
586
587
   -------------------------------------------------------------------------- */

Simon Marlow's avatar
Simon Marlow committed
588
589
590
591
592
593
STATIC_INLINE bdescr *
coalesce_mblocks (bdescr *p)
{
    bdescr *q;

    q = p->link;
594
595
596
    if (q != NULL &&
        MBLOCK_ROUND_DOWN(q) ==
        (StgWord8*)MBLOCK_ROUND_DOWN(p) +
Simon Marlow's avatar
Simon Marlow committed
597
        BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
Simon Marlow's avatar
Simon Marlow committed
598
599
600
601
602
        // can coalesce
        p->blocks  = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
                                         BLOCKS_TO_MBLOCKS(q->blocks));
        p->link = q->link;
        return p;
603
    }
Simon Marlow's avatar
Simon Marlow committed
604
    return q;
605
606
}

Simon Marlow's avatar
Simon Marlow committed
607
608
static void
free_mega_group (bdescr *mg)
609
{
Simon Marlow's avatar
Simon Marlow committed
610
    bdescr *bd, *prev;
Simon Marlow's avatar
Simon Marlow committed
611
    uint32_t node;
Simon Marlow's avatar
Simon Marlow committed
612
613
614
615

    // Find the right place in the free list.  free_mblock_list is
    // sorted by *address*, not by size as the free_list is.
    prev = NULL;
Simon Marlow's avatar
Simon Marlow committed
616
617
    node = mg->node;
    bd = free_mblock_list[node];
Simon Marlow's avatar
Simon Marlow committed
618
619
620
621
    while (bd && bd->start < mg->start) {
        prev = bd;
        bd = bd->link;
    }
622

Simon Marlow's avatar
Simon Marlow committed
623
624
    // coalesce backwards
    if (prev)
625
    {
Simon Marlow's avatar
Simon Marlow committed
626
627
628
        mg->link = prev->link;
        prev->link = mg;
        mg = coalesce_mblocks(prev);
629
    }
Simon Marlow's avatar
Simon Marlow committed
630
631
    else
    {
Simon Marlow's avatar
Simon Marlow committed
632
633
        mg->link = free_mblock_list[node];
        free_mblock_list[node] = mg;
Simon Marlow's avatar
Simon Marlow committed
634
635
636
637
638
    }
    // coalesce forwards
    coalesce_mblocks(mg);

    IF_DEBUG(sanity, checkFreeListSanity());
639
}
Simon Marlow's avatar
Simon Marlow committed
640

641
642
643
644

void
freeGroup(bdescr *p)
{
645
  StgWord ln;
Simon Marlow's avatar
Simon Marlow committed
646
  uint32_t node;
647

Simon Marlow's avatar
Simon Marlow committed
648
  // not true in multithreaded GC:
Simon Marlow's avatar
Simon Marlow committed
649
  // ASSERT_SM_LOCK();
650

Simon Marlow's avatar
Simon Marlow committed
651
  ASSERT(p->free != (P_)-1);
652

Simon Marlow's avatar
Simon Marlow committed
653
654
  node = p->node;

655
  p->free = (void *)-1;  /* indicates that this block is free */
Simon Marlow's avatar
Simon Marlow committed
656
  p->gen = NULL;
657
  p->gen_no = 0;
658
  /* fill the block group with garbage if sanity checking is on */
659
  IF_DEBUG(sanity,memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE));
660

Simon Marlow's avatar
Simon Marlow committed
661
662
663
664
  if (p->blocks == 0) barf("freeGroup: block size is zero");

  if (p->blocks >= BLOCKS_PER_MBLOCK)
  {
665
      StgWord mblocks;
666
667

      mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
Simon Marlow's avatar
Simon Marlow committed
668
      // If this is an mgroup, make sure it has the right number of blocks
669
670
      ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));

Simon Marlow's avatar
Simon Marlow committed
671
      recordFreedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
672

Simon Marlow's avatar
Simon Marlow committed
673
674
      free_mega_group(p);
      return;
675
676
  }

Simon Marlow's avatar
Simon Marlow committed
677
  recordFreedBlocks(node, p->blocks);
678

Simon Marlow's avatar
Simon Marlow committed
679
680
681
682
683
684
685
  // coalesce forwards
  {
      bdescr *next;
      next = p + p->blocks;
      if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
      {
          p->blocks += next->blocks;
686
          ln = log_2(next->blocks);
Simon Marlow's avatar
Simon Marlow committed
687
          dbl_link_remove(next, &free_list[node][ln]);
Simon Marlow's avatar
Simon Marlow committed
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
          if (p->blocks == BLOCKS_PER_MBLOCK)
          {
              free_mega_group(p);
              return;
          }
          setup_tail(p);
      }
  }

  // coalesce backwards
  if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
  {
      bdescr *prev;
      prev = p - 1;
      if (prev->blocks == 0) prev = prev->link; // find the head

      if (prev->free == (P_)-1)
      {
706
          ln = log_2(prev->blocks);
Simon Marlow's avatar
Simon Marlow committed
707
          dbl_link_remove(prev, &free_list[node][ln]);
Simon Marlow's avatar
Simon Marlow committed
708
709
710
711
712
713
714
715
716
          prev->blocks += p->blocks;
          if (prev->blocks >= BLOCKS_PER_MBLOCK)
          {
              free_mega_group(prev);
              return;
          }
          p = prev;
      }
  }
717

718
  setup_tail(p);
Simon Marlow's avatar
Simon Marlow committed
719
  free_list_insert(node,p);
720
721
722
723

  IF_DEBUG(sanity, checkFreeListSanity());
}

724
725
726
727
728
729
730
731
void
freeGroup_lock(bdescr *p)
{
    ACQUIRE_SM_LOCK;
    freeGroup(p);
    RELEASE_SM_LOCK;
}

732
733
734
735
736
737
738
739
740
741
742
void
freeChain(bdescr *bd)
{
  bdescr *next_bd;
  while (bd != NULL) {
    next_bd = bd->link;
    freeGroup(bd);
    bd = next_bd;
  }
}

743
744
745
746
747
748
749
750
void
freeChain_lock(bdescr *bd)
{
    ACQUIRE_SM_LOCK;
    freeChain(bd);
    RELEASE_SM_LOCK;
}

751
static void
Simon Marlow's avatar
Simon Marlow committed
752
initMBlock(void *mblock, uint32_t node)
753
{
Simon Marlow's avatar
Simon Marlow committed
754
755
756
757
758
759
760
761
    bdescr *bd;
    StgWord8 *block;

    /* the first few Bdescr's in a block are unused, so we don't want to
     * put them all on the free list.
     */
    block = FIRST_BLOCK(mblock);
    bd    = FIRST_BDESCR(mblock);
762

Simon Marlow's avatar
Simon Marlow committed
763
764
    /* Initialise the start field of each block descriptor
     */
765
    for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
Simon Marlow's avatar
Simon Marlow committed
766
767
             block += BLOCK_SIZE) {
        bd->start = (void*)block;
Simon Marlow's avatar
Simon Marlow committed
768
        bd->node = node;
Simon Marlow's avatar
Simon Marlow committed
769
    }
770
771
}

Simon Marlow's avatar
Simon Marlow committed
772
773
774
775
/* -----------------------------------------------------------------------------
   Stats / metrics
   -------------------------------------------------------------------------- */

Simon Marlow's avatar
Simon Marlow committed
776
W_
Simon Marlow's avatar
Simon Marlow committed
777
778
countBlocks(bdescr *bd)
{
Simon Marlow's avatar
Simon Marlow committed
779
    W_ n;
Simon Marlow's avatar
Simon Marlow committed
780
    for (n=0; bd != NULL; bd=bd->link) {
781
        n += bd->blocks;
Simon Marlow's avatar
Simon Marlow committed
782
783
784
785
786
787
788
789
790
    }
    return n;
}

// (*1) Just like countBlocks, except that we adjust the count for a
// megablock group so that it doesn't include the extra few blocks
// that would be taken up by block descriptors in the second and
// subsequent megablock.  This is so we can tally the count with the
// number of blocks allocated in the system, for memInventory().
Simon Marlow's avatar
Simon Marlow committed
791
W_
Simon Marlow's avatar
Simon Marlow committed
792
793
countAllocdBlocks(bdescr *bd)
{
Simon Marlow's avatar
Simon Marlow committed
794
    W_ n;
Simon Marlow's avatar
Simon Marlow committed
795
    for (n=0; bd != NULL; bd=bd->link) {
796
        n += bd->blocks;
gcampax's avatar
gcampax committed
797

798
799
800
801
802
        // hack for megablock groups: see (*1) above
        if (bd->blocks > BLOCKS_PER_MBLOCK) {
            n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
                * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
        }
Simon Marlow's avatar
Simon Marlow committed
803
804
805
806
    }
    return n;
}

807
void returnMemoryToOS(uint32_t n /* megablocks */)
808
{
Simon Marlow's avatar
Simon Marlow committed
809
810
    bdescr *bd;
    uint32_t node;
811
    StgWord size;
812

Simon Marlow's avatar
Simon Marlow committed
813
    // ToDo: not fair, we free all the memory starting with node 0.
Simon Marlow's avatar
Simon Marlow committed
814
    for (node = 0; n > 0 && node < n_numa_nodes; node++) {
Simon Marlow's avatar
Simon Marlow committed
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
        bd = free_mblock_list[node];
        while ((n > 0) && (bd != NULL)) {
            size = BLOCKS_TO_MBLOCKS(bd->blocks);
            if (size > n) {
                StgWord newSize = size - n;
                char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
                freeAddr += newSize * MBLOCK_SIZE;
                bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
                freeMBlocks(freeAddr, n);
                n = 0;
            }
            else {
                char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
                n -= size;
                bd = bd->link;
                freeMBlocks(freeAddr, size);
            }
832
        }
Simon Marlow's avatar
Simon Marlow committed
833
        free_mblock_list[node] = bd;
834
835
    }

836
837
838
839
840
841
842
843
    // Ask the OS to release any address space portion
    // that was associated with the just released MBlocks
    //
    // Historically, we used to ask the OS directly (via
    // osReleaseFreeMemory()) - now the MBlock layer might
    // have a reason to preserve the address space range,
    // so we keep it
    releaseFreeMemory();
844

845
846
847
848
849
850
851
852
    IF_DEBUG(gc,
        if (n != 0) {
            debugBelch("Wanted to free %d more MBlocks than are freeable\n",
                       n);
        }
    );
}

853
854
855
856
/* -----------------------------------------------------------------------------
   Debugging
   -------------------------------------------------------------------------- */

Ben Gamari's avatar
Ben Gamari committed
857
#if defined(DEBUG)
858
static void
Simon Marlow's avatar
Simon Marlow committed
859
check_tail (bdescr *bd)
860
{
Simon Marlow's avatar
Simon Marlow committed
861
    bdescr *tail = tail_of(bd);
862

Simon Marlow's avatar
Simon Marlow committed
863
864
865
866
867
    if (tail != bd)
    {
        ASSERT(tail->blocks == 0);
        ASSERT(tail->free == 0);
        ASSERT(tail->link == bd);
868
869
870
    }
}

871
872
873
void
checkFreeListSanity(void)
{
Simon Marlow's avatar
Simon Marlow committed
874
    bdescr *bd, *prev;
875
    StgWord ln, min;
Simon Marlow's avatar
Simon Marlow committed
876
    uint32_t node;
877

Simon Marlow's avatar
Simon Marlow committed
878
    for (node = 0; node < n_numa_nodes; node++) {
Simon Marlow's avatar
Simon Marlow committed
879
880
        min = 1;
        for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
881
            IF_DEBUG(block_alloc,
Simon Marlow's avatar
Simon Marlow committed
882
                     debugBelch("free block list [%" FMT_Word "]:\n", ln));
Simon Marlow's avatar
Simon Marlow committed
883

Simon Marlow's avatar
Simon Marlow committed
884
885
            prev = NULL;
            for (bd = free_list[node][ln]; bd != NULL; prev = bd, bd = bd->link)
Simon Marlow's avatar
Simon Marlow committed
886
            {
Simon Marlow's avatar
Simon Marlow committed
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
                IF_DEBUG(block_alloc,
                         debugBelch("group at %p, length %ld blocks\n",
                                    bd->start, (long)bd->blocks));
                ASSERT(bd->free == (P_)-1);
                ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
                ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
                ASSERT(bd->link != bd); // catch easy loops
                ASSERT(bd->node == node);

                check_tail(bd);

                if (prev)
                    ASSERT(bd->u.back == prev);
                else
                    ASSERT(bd->u.back == NULL);

903
                {
Simon Marlow's avatar
Simon Marlow committed
904
905
906
907
908
909
                    bdescr *next;
                    next = bd + bd->blocks;
                    if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
                    {
                        ASSERT(next->free != (P_)-1);
                    }
910
                }
Simon Marlow's avatar
Simon Marlow committed
911
            }
Simon Marlow's avatar
Simon Marlow committed
912
            min = min << 1;
Simon Marlow's avatar
Simon Marlow committed
913
914
        }

Simon Marlow's avatar
Simon Marlow committed
915
916
917
918
919
920
        prev = NULL;
        for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
        {
            IF_DEBUG(block_alloc,
                     debugBelch("mega group at %p, length %ld blocks\n",
                                bd->start, (long)bd->blocks));
Simon Marlow's avatar
Simon Marlow committed
921

Simon Marlow's avatar
Simon Marlow committed
922
            ASSERT(bd->link != bd); // catch easy loops
Simon Marlow's avatar
Simon Marlow committed
923

Simon Marlow's avatar
Simon Marlow committed
924
925
926
927
928
            if (bd->link != NULL)
            {
                // make sure the list is sorted
                ASSERT(bd->start < bd->link->start);
            }
Simon Marlow's avatar
Simon Marlow committed
929

Simon Marlow's avatar
Simon Marlow committed
930
931
932
            ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
            ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
                   == bd->blocks);
Simon Marlow's avatar
Simon Marlow committed
933

Simon Marlow's avatar
Simon Marlow committed
934
935
936
937
938
939
940
            // make sure we're fully coalesced
            if (bd->link != NULL)
            {
                ASSERT(MBLOCK_ROUND_DOWN(bd->link) !=
                       (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
                       BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
            }
Simon Marlow's avatar
Simon Marlow committed
941
        }
942
943
    }
}
944

Simon Marlow's avatar
Simon Marlow committed
945
W_ /* BLOCKS */
946
947
948
countFreeList(void)
{
  bdescr *bd;
949
  W_ total_blocks = 0;
950
  StgWord ln;
Simon Marlow's avatar
Simon Marlow committed
951
  uint32_t node;
952

Simon Marlow's avatar
Simon Marlow committed
953
  for (node = 0; node < n_numa_nodes; node++) {
Simon Marlow's avatar
Simon Marlow committed
954
955
956
957
958
959
960
961
962
963
964
      for (ln=0; ln < NUM_FREE_LISTS; ln++) {
          for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
              total_blocks += bd->blocks;
          }
      }
      for (bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
          total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
          // The caller of this function, memInventory(), expects to match
          // the total number of blocks in the system against mblocks *
          // BLOCKS_PER_MBLOCK, so we must subtract the space for the
          // block descriptors from *every* mblock.
965
      }
966
967
968
  }
  return total_blocks;
}
969
970
971
972
973
974
975
976
977
978
979
980
981

void
markBlocks (bdescr *bd)
{
    for (; bd != NULL; bd = bd->link) {
        bd->flags |= BF_KNOWN;
    }
}

void
reportUnmarkedBlocks (void)
{
    void *mblock;
982
    void *state;
983
984
985
    bdescr *bd;

    debugBelch("Unreachable blocks:\n");
986
987
    for (mblock = getFirstMBlock(&state); mblock != NULL;
         mblock = getNextMBlock(&state, mblock)) {
988
989
990
991
992
        for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
            if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
                debugBelch("  %p\n",bd);
            }
            if (bd->blocks >= BLOCKS_PER_MBLOCK) {
Simon Marlow's avatar
Simon Marlow committed
993
994
                mblock = (StgWord8*)mblock +
                    (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
995
996
997
998
999
1000
1001
1002
                break;
            } else {
                bd += bd->blocks;
            }
        }
    }
}

1003
#endif