RetainerProfile.c 71 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

12
// Turn off inlining when debugging - it obfuscates things
Ben Gamari's avatar
Ben Gamari committed
13
#if defined(DEBUG)
14 15 16 17 18
#define INLINE
#else
#define INLINE inline
#endif

Simon Marlow's avatar
Simon Marlow committed
19
#include "PosixSource.h"
20
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
21

22 23 24 25 26 27
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
28
#include "sm/Sanity.h"
29 30 31
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
32
#include "Apply.h"
David Feuer's avatar
David Feuer committed
33 34
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
35
#include "sm/Storage.h" // for END_OF_STATIC_LIST
36

37 38
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
the heap objects. It does not list all the objects that keeps references
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
60 61 62
*/


63 64 65 66 67 68 69 70 71 72 73 74 75
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

76
static uint32_t retainerGeneration;  // generation
77

78 79 80
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
81 82 83 84 85 86 87 88 89

/*
  The rs field in the profile header of any object points to its retainer
  set in an indirect way: if flip is 0, it points to the retainer set;
  if flip is 1, it points to the next byte after the retainer set (even
  for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
  pointer. See retainerSetOf().
 */

90
StgWord flip = 0;     // flip bit
91 92 93 94 95
                      // must be 0 if DEBUG_RETAINER is on (for static closures)

#define setRetainerSetToNull(c)   \
  (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)

Ben Gamari's avatar
Ben Gamari committed
96
#if defined(DEBUG_RETAINER)
97
static uint32_t sumOfNewCost;        // sum of the cost of each object, computed
98
                                // when the object is first visited
99
static uint32_t sumOfNewCostExtra;   // for those objects not visited during
100
                                // retainer profiling, e.g., MUT_VAR
101
static uint32_t costArray[N_CLOSURE_TYPES];
102

103
uint32_t sumOfCostLinear;            // sum of the costs of all object, computed
104 105
                                // when linearly traversing the heap after
                                // retainer profiling
106
uint32_t costArrayLinear[N_CLOSURE_TYPES];
107 108 109 110 111 112 113 114 115 116 117 118 119
#endif

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
120 121
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
122
    posTypeStep,
123 124
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
125
    posTypePtrs,
126
    // Keeps SRT bitmap (stackPos.next.srt)
127
    posTypeSRT,
128 129 130
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
131 132 133 134 135 136 137 138
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
139 140 141
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
142
        StgPtr payload;
143 144 145 146
    } ptrs;

    // SRT
    struct {
147
        StgClosure *srt;
148 149 150
    } srt;
} nextPos;

151 152
// Tagged stack element, that keeps information how to process
// the next element in the traverse stack.
153 154 155 156 157
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

158 159
// Element in the traverse stack, keeps the element, information
// how to continue processing the element, and it's retainer set.
160 161
typedef struct {
    StgClosure *c;
162 163 164
    // parent of the current object, used
    // when posTypeFresh is set
    StgClosure *parent;
165
    retainer c_child_r;
166 167 168
    stackPos info;
} stackElement;

169
typedef struct {
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
/*
  Invariants:
    firstStack points to the first block group.
    currentStack points to the block group currently being used.
    currentStack->free == stackLimit.
    stackTop points to the topmost byte in the stack of currentStack.
    Unless the whole stack is empty, stackTop must point to the topmost
    object (or byte) in the whole stack. Thus, it is only when the whole stack
    is empty that stackTop == stackLimit (not during the execution of push()
    and pop()).
    stackBottom == currentStack->start.
    stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
  Note:
    When a current stack becomes empty, stackTop is set to point to
    the topmost element on the previous block group so as to satisfy
    the invariants described above.
 */
187 188 189
    bdescr *firstStack;
    bdescr *currentStack;
    stackElement *stackBottom, *stackTop, *stackLimit;
190 191 192 193 194 195 196

/*
  currentStackBoundary is used to mark the current stack chunk.
  If stackTop == currentStackBoundary, it means that the current stack chunk
  is empty. It is the responsibility of the user to keep currentStackBoundary
  valid all the time if it is to be employed.
 */
197
    stackElement *currentStackBoundary;
198

199
#if defined(DEBUG_RETAINER)
200 201 202 203 204 205 206 207 208
/*
  stackSize records the current size of the stack.
  maxStackSize records its high water mark.
  Invariants:
    stackSize <= maxStackSize
  Note:
    stackSize is just an estimate measure of the depth of the graph. The reason
    is that some heap objects have only a single child and may not result
    in a new element being pushed onto the stack. Therefore, at the end of
209
    retainer profiling, maxStackSize is some value no greater
210 211
    than the actual depth of the graph.
 */
212
    int stackSize, maxStackSize;
213
#endif
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
} traverseState;

traverseState g_retainerTraverseState;


static void retainStack(traverseState *, StgClosure *, retainer, StgPtr, StgPtr);
static void retainClosure(traverseState *, StgClosure *, StgClosure *, retainer);
static void retainPushClosure(traverseState *, StgClosure *, StgClosure *, retainer);
static void retainActualPush(traverseState *, stackElement *);

#if defined(DEBUG_RETAINER)
static void belongToHeap(StgPtr p);
static uint32_t checkHeapSanityForRetainerProfiling( void );
#endif

229 230 231 232 233 234 235 236 237

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
238
static INLINE void
239
newStackBlock( traverseState *ts, bdescr *bd )
240
{
241 242 243 244 245
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
246 247 248 249 250 251 252
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
253
static INLINE void
254
returnToOldStack( traverseState *ts, bdescr *bd )
255
{
256 257 258 259 260
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
261 262 263 264 265 266
}

/* -----------------------------------------------------------------------------
 *  Initializes the traverse stack.
 * -------------------------------------------------------------------------- */
static void
267
initializeTraverseStack( traverseState *ts )
268
{
269 270
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
271 272
    }

273 274 275
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
276

277
    newStackBlock(ts, ts->firstStack);
278 279 280 281 282 283 284 285
}

/* -----------------------------------------------------------------------------
 * Frees all the block groups in the traverse stack.
 * Invariants:
 *   firstStack != NULL
 * -------------------------------------------------------------------------- */
static void
286
closeTraverseStack( traverseState *ts )
287
{
288 289
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
290 291 292
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
293
 * Returns true if the whole stack is empty.
294
 * -------------------------------------------------------------------------- */
Ben Gamari's avatar
Ben Gamari committed
295
static INLINE bool
296
isEmptyRetainerStack( traverseState *ts )
297
{
298
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
299 300
}

sof's avatar
sof committed
301 302 303
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
304
W_
305
retainerStackBlocks( void )
sof's avatar
sof committed
306 307
{
    bdescr* bd;
308
    W_ res = 0;
309
    traverseState *ts = &g_retainerTraverseState;
sof's avatar
sof committed
310

311
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
312 313 314 315 316
      res += bd->blocks;

    return res;
}

317
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
318
 * Returns true if stackTop is at the stack boundary of the current stack,
319 320
 * i.e., if the current stack chunk is empty.
 * -------------------------------------------------------------------------- */
Ben Gamari's avatar
Ben Gamari committed
321
static INLINE bool
322
isOnBoundary( traverseState *ts )
323
{
324
    return ts->stackTop == ts->currentStackBoundary;
325 326 327 328 329 330 331
}

/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
332
static INLINE void
333
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
334 335 336 337 338 339 340 341 342 343
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
344
static INLINE StgClosure *
345 346 347
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
348
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
349
    } else {
350
        return NULL;
351 352 353 354 355 356
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
357
static INLINE void
358
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
359
{
360 361 362
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
363
    } else {
364
        info->next.srt.srt = NULL;
365
    }
366 367
}

368
static INLINE void
369
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
370
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
371
    info->type = posTypeSRT;
372 373
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
374
    } else {
375
        info->next.srt.srt = NULL;
376
    }
377 378 379 380 381
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
382
static INLINE StgClosure *
383 384 385
find_srt( stackPos *info )
{
    StgClosure *c;
386
    if (info->type == posTypeSRT) {
387 388 389
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
390 391 392
    }
}

393 394 395 396
/* -----------------------------------------------------------------------------
 * Pushes an element onto traverse stack
 * -------------------------------------------------------------------------- */
static void
397
retainActualPush(traverseState *ts, stackElement *se) {
398
    bdescr *nbd;      // Next Block Descriptor
399
    if (ts->stackTop - 1 < ts->stackBottom) {
400 401 402 403 404
#if defined(DEBUG_RETAINER)
        // debugBelch("push() to the next stack.\n");
#endif
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
405
        ts->currentStack->free = (StgPtr)ts->stackTop;
406

407
        if (ts->currentStack->link == NULL) {
408 409
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
410 411
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
412
        } else
413
            nbd = ts->currentStack->link;
414

415
        newStackBlock(ts, nbd);
416 417 418
    }

    // adjust stackTop (acutal push)
419
    ts->stackTop--;
420 421 422 423
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
424
    *ts->stackTop = *se;
425 426

#if defined(DEBUG_RETAINER)
427 428 429 430
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
    debugBelch("stackSize = %d\n", ts->stackSize);
431
#endif
432

433 434 435 436 437 438 439 440 441 442 443
}

/* Push an object onto traverse stack. This method can be used anytime
 * instead of calling retainClosure(), it exists in order to use an
 * explicit stack instead of direct recursion.
 *
 *  *p - object's parent
 *  *c - closure
 *  c_child_r - closure retainer.
 */
static INLINE void
444
retainPushClosure( traverseState *ts, StgClosure *c, StgClosure *p, retainer c_child_r) {
445 446 447 448
    stackElement se;

    se.c = c;
    se.c_child_r = c_child_r;
449
    se.parent = p;
450 451
    se.info.type = posTypeFresh;

452
    retainActualPush(ts, &se);
453 454
};

455 456 457 458
/* -----------------------------------------------------------------------------
 *  push() pushes a stackElement representing the next child of *c
 *  onto the traverse stack. If *c has no child, *first_child is set
 *  to NULL and nothing is pushed onto the stack. If *c has only one
459
 *  child, *c_child is set to that child and nothing is pushed onto
460 461 462 463 464 465
 *  the stack.  If *c has more than two children, *first_child is set
 *  to the first child and a stackElement representing the second
 *  child is pushed onto the stack.

 *  Invariants:
 *     *c_child_r is the most recent retainer of *c's children.
466
 *     *c is not any of TSO, AP, PAP, AP_STACK, which means that
467 468 469
 *        there cannot be any stack objects.
 *  Note: SRTs are considered to  be children as well.
 * -------------------------------------------------------------------------- */
470
static INLINE void
471
push( traverseState *ts, StgClosure *c, retainer c_child_r, StgClosure **first_child )
472 473 474 475
{
    stackElement se;
    bdescr *nbd;      // Next Block Descriptor

Ben Gamari's avatar
Ben Gamari committed
476
#if defined(DEBUG_RETAINER)
477
    debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
478 479 480
#endif

    ASSERT(get_itbl(c)->type != TSO);
481
    ASSERT(get_itbl(c)->type != AP_STACK);
482 483 484 485 486 487 488 489 490 491

    //
    // fill in se
    //

    se.c = c;
    se.c_child_r = c_child_r;

    // fill in se.info
    switch (get_itbl(c)->type) {
492
        // no child, no SRT
493 494 495
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
496
    case COMPACT_NFDATA:
497 498
        *first_child = NULL;
        return;
499

500
        // one child (fixed), no SRT
501 502
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
503 504
        *first_child = ((StgMutVar *)c)->var;
        return;
505
    case THUNK_SELECTOR:
506 507
        *first_child = ((StgSelector *)c)->selectee;
        return;
508
    case BLACKHOLE:
509 510
        *first_child = ((StgInd *)c)->indirectee;
        return;
511 512
    case CONSTR_1_0:
    case CONSTR_1_1:
513 514
        *first_child = c->payload[0];
        return;
515

516 517 518
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
519

520 521
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
522
    case CONSTR_2_0:
523
        *first_child = c->payload[0];         // return the first pointer
524 525
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
526
        break;
527

528 529
        // three children (fixed), no SRT
        // need to push a stackElement
530 531
    case MVAR_CLEAN:
    case MVAR_DIRTY:
532 533 534
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
535
        se.info.type = posTypeStep;
536 537 538 539
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
540
    case WEAK:
541
        *first_child = ((StgWeak *)c)->key;
542
        se.info.type = posTypeStep;
543 544
        se.info.next.step = 2;
        break;
545

546
        // layout.payload.ptrs, no SRT
547
    case TVAR:
548
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
549
    case CONSTR_NOCAF:
550
    case PRIM:
551
    case MUT_PRIM:
552
    case BCO:
553 554 555 556 557 558 559 560
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
561 562
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
563 564
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
565 566 567 568 569 570 571 572
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
573 574
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
575 576
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
577 578 579 580 581 582
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
583

584
    // layout.payload.ptrs, SRT
585
    case FUN_STATIC:
586 587
    case FUN:           // *c is a heap object.
    case FUN_2_0:
588 589 590 591 592 593
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
594

595 596
    case THUNK:
    case THUNK_2_0:
597 598 599 600 601 602 603 604 605
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
606 607
    case FUN_1_0:
    case FUN_1_1:
608 609 610 611
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
612

613 614
    case THUNK_1_0:
    case THUNK_1_1:
615 616 617 618
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
619

620
    case FUN_0_1:      // *c is a heap object.
621
    case FUN_0_2:
622 623
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
624 625 626 627
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
628 629 630

    // SRT only
    case THUNK_STATIC:
631
        ASSERT(get_itbl(c)->srt != 0);
632 633
    case THUNK_0_1:
    case THUNK_0_2:
634 635
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
636 637 638 639 640
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

641
    case TREC_CHUNK:
642
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
643
        se.info.type = posTypeStep;
644 645
        se.info.next.step = 0;  // entry no.
        break;
646

647
        // cannot appear
648
    case PAP:
649 650
    case AP:
    case AP_STACK:
651
    case TSO:
652
    case STACK:
653
    case IND_STATIC:
654
        // stack objects
655 656
    case UPDATE_FRAME:
    case CATCH_FRAME:
657
    case UNDERFLOW_FRAME:
658 659 660 661
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
662
        // invalid objects
663 664 665
    case IND:
    case INVALID_OBJECT:
    default:
666
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
667
        return;
668 669
    }

670
    retainActualPush(ts, &se);
671 672 673 674 675 676 677 678 679 680 681 682
}

/* -----------------------------------------------------------------------------
 *  popOff() and popOffReal(): Pop a stackElement off the traverse stack.
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
 *    empty, in which case popOff() is not allowed.
 *  Note:
 *    You can think of popOffReal() as a part of popOff() which is
 *    executed at the end of popOff() in necessary. Since popOff() is
 *    likely to be executed quite often while popOffReal() is not, we
 *    separate popOffReal() from popOff(), which is declared as an
683
 *    INLINE function (for the sake of execution speed).  popOffReal()
684 685 686
 *    is called only within popOff() and nowhere else.
 * -------------------------------------------------------------------------- */
static void
687
popOffReal(traverseState *ts)
688 689 690
{
    bdescr *pbd;    // Previous Block Descriptor

Ben Gamari's avatar
Ben Gamari committed
691
#if defined(DEBUG_RETAINER)
692
    debugBelch("pop() to the previous stack.\n");
693 694
#endif

695 696
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
697

698
    if (ts->firstStack == ts->currentStack) {
699
        // The stack is completely empty.
700 701
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
Ben Gamari's avatar
Ben Gamari committed
702
#if defined(DEBUG_RETAINER)
703 704 705 706
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
        debugBelch("stackSize = %d\n", ts->stackSize);
707
#endif
708
        return;
709 710 711 712
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
713
    ts->currentStack->free = (StgPtr)ts->stackLimit;
714 715

    // find the previous block descriptor
716
    pbd = ts->currentStack->u.back;
717 718
    ASSERT(pbd != NULL);

719
    returnToOldStack(ts, pbd);
720

Ben Gamari's avatar
Ben Gamari committed
721
#if defined(DEBUG_RETAINER)
722 723 724 725
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
    debugBelch("stackSize = %d\n", ts->stackSize);
726 727 728
#endif
}

729
static INLINE void
730
popOff(traverseState *ts) {
Ben Gamari's avatar
Ben Gamari committed
731
#if defined(DEBUG_RETAINER)
732
    debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
733 734
#endif

735 736
    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyRetainerStack(ts));
737 738

    // <= (instead of <) is wrong!
739 740
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
Ben Gamari's avatar
Ben Gamari committed
741
#if defined(DEBUG_RETAINER)
742 743 744 745
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
        debugBelch("stackSize = %d\n", ts->stackSize);
746
#endif
747
        return;
748 749
    }

750
    popOffReal(ts);
751 752 753 754 755
}

/* -----------------------------------------------------------------------------
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
756 757 758
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
759 760 761 762 763 764
 *  and if so, retrieve the first object and store its pointer to *c. Also,
 *  set *cp and *r appropriately, both of which are stored in the stack element.
 *  The topmost stack element then is overwritten so as for it to now denote
 *  the next object.
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
Ben Gamari's avatar
Ben Gamari committed
765
 *  the current stack chunk becomes empty, indicated by true returned by
766 767 768 769 770
 *  isOnBoundary(), in which case *c is set to NULL.
 *  Note:
 *    It is okay to call this function even when the current stack chunk
 *    is empty.
 * -------------------------------------------------------------------------- */
771
static INLINE void
772
pop( traverseState *ts, StgClosure **c, StgClosure **cp, retainer *r )
773 774 775
{
    stackElement *se;

Ben Gamari's avatar
Ben Gamari committed
776
#if defined(DEBUG_RETAINER)
777
    debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
778 779 780
#endif

    do {
781
        if (isOnBoundary(ts)) {     // if the current stack chunk is depleted
782 783 784 785
            *c = NULL;
            return;
        }

786
        se = ts->stackTop;
787

788 789
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
790
            *cp = se->parent;
791 792
            *c = se->c;
            *r = se->c_child_r;
793
            popOff(ts);
794 795 796
            return;
        }

797 798 799 800 801 802 803
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
            *cp = se->c;
            *r = se->c_child_r;
804
            popOff(ts);
805 806 807 808
            return;

            // three children (fixed), no SRT
            // need to push a stackElement
809 810
        case MVAR_CLEAN:
        case MVAR_DIRTY:
811 812 813 814 815 816
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
                // no popOff
            } else {
                *c = ((StgMVar *)se->c)->value;
817
                popOff(ts);
818 819 820 821 822 823 824 825 826 827 828 829 830
            }
            *cp = se->c;
            *r = se->c_child_r;
            return;

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
                // no popOff
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
831
                popOff(ts);
832 833 834 835 836 837 838 839 840 841 842 843
            }
            *cp = se->c;
            *r = se->c_child_r;
            return;

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
844 845
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
846 847
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
848
                popOff(ts);
849
                break;
850 851 852 853 854 855 856 857 858 859 860 861 862 863
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            *cp = se->c;
            *r = se->c_child_r;
            se->info.next.step++;
            return;
        }
864

865 866
        case TVAR:
        case CONSTR:
867 868 869 870 871 872
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
873 874
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
875 876 877 878
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
879 880
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
881
                popOff(ts);
882 883 884 885 886 887 888 889
                break;
            }
            *cp = se->c;
            *r = se->c_child_r;
            return;

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
890
        case FUN_STATIC:
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
                    *cp = se->c;
                    *r = se->c_child_r;
                    return;
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
                    *cp = se->c;
                    *r = se->c_child_r;
                    return;
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
            if (*c != NULL) {
                *cp = se->c;
                *r = se->c_child_r;
                return;
            }
933
            popOff(ts);
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
            break;

            // no child (fixed), no SRT
        case CONSTR_0_1:
        case CONSTR_0_2:
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR_CLEAN:
        case MUT_VAR_DIRTY:
        case THUNK_SELECTOR:
        case CONSTR_1_1:
            // cannot appear
        case PAP:
        case AP:
        case AP_STACK:
        case TSO:
950 951
        case STACK:
        case IND_STATIC:
Simon Marlow's avatar
Simon Marlow committed
952
        case CONSTR_NOCAF:
953
            // stack objects
954
        case UPDATE_FRAME:
955
        case CATCH_FRAME:
956 957
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
958 959 960 961 962 963 964
        case RET_BCO:
        case RET_SMALL:
        case RET_BIG:
            // invalid objects
        case IND:
        case INVALID_OBJECT:
        default:
965
            barf("Invalid object *c in pop(): %d", get_itbl(se->c)->type);
966 967
            return;
        }
Ben Gamari's avatar
Ben Gamari committed
968
    } while (true);
969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
}

/* -----------------------------------------------------------------------------
 * RETAINER PROFILING ENGINE
 * -------------------------------------------------------------------------- */

void
initRetainerProfiling( void )
{
    initializeAllRetainerSet();
    retainerGeneration = 0;
}

/* -----------------------------------------------------------------------------
 *  This function must be called before f-closing prof_file.
 * -------------------------------------------------------------------------- */
void
endRetainerProfiling( void )
{
Ben Gamari's avatar
Ben Gamari committed
988
#if defined(SECOND_APPROACH)
989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
    outputAllRetainerSet(prof_file);
#endif
}

/* -----------------------------------------------------------------------------
 *  Returns the actual pointer to the retainer set of the closure *c.
 *  It may adjust RSET(c) subject to flip.
 *  Side effects:
 *    RSET(c) is initialized to NULL if its current value does not
 *    conform to flip.
 *  Note:
 *    Even though this function has side effects, they CAN be ignored because
 *    subsequent calls to retainerSetOf() always result in the same return value
 *    and retainerSetOf() is the only way to retrieve retainerSet of a given
 *    closure.
 *    We have to perform an XOR (^) operation each time a closure is examined.
 *    The reason is that we do not know when a closure is visited last.
 * -------------------------------------------------------------------------- */
1007
static INLINE void
1008 1009 1010
maybeInitRetainerSet( StgClosure *c )
{
    if (!isRetainerSetFieldValid(c)) {
1011
        setRetainerSetToNull(c);
1012 1013 1014 1015
    }
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
1016
 * Returns true if *c is a retainer.
1017 1018 1019 1020 1021 1022
 * In general the retainers are the objects that may be the roots of the
 * collection. Basically this roots represents programmers threads
 * (TSO) with their stack and thunks.
 *
 * In addition we mark all mutable objects as a retainers, the reason for
 * that decision is lost in time.
1023
 * -------------------------------------------------------------------------- */
Ben Gamari's avatar
Ben Gamari committed
1024
static INLINE bool
1025 1026 1027
isRetainer( StgClosure *c )
{
    switch (get_itbl(c)->type) {
1028 1029 1030 1031
        //
        //  True case
        //
        // TSOs MUST be retainers: they constitute the set of roots.
1032
    case TSO:
1033
    case STACK:
1034

1035
        // mutable objects
1036
    case MUT_PRIM:
1037 1038
    case MVAR_CLEAN:
    case MVAR_DIRTY:
1039
    case TVAR:
1040 1041
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
1042 1043
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
1044 1045 1046
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
    case BLOCKING_QUEUE:
1047

1048
        // thunks are retainers.
1049 1050 1051 1052 1053 1054 1055
    case THUNK:
    case THUNK_1_0:
    case THUNK_0_1:
    case THUNK_2_0:
    case THUNK_1_1:
    case THUNK_0_2:
    case THUNK_SELECTOR:
1056 1057
    case AP:
    case AP_STACK:
1058

1059
        // Static thunks, or CAFS, are obviously retainers.
1060 1061
    case THUNK_STATIC:

1062 1063
        // WEAK objects are roots; there is separate code in which traversing
        // begins from WEAK objects.
1064
    case WEAK:
Ben Gamari's avatar
Ben Gamari committed
1065
        return true;
1066

1067 1068 1069
        //
        // False case
        //
1070

1071
        // constructors
1072
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
1073
    case CONSTR_NOCAF:
1074 1075 1076 1077 1078
    case CONSTR_1_0:
    case CONSTR_0_1:
    case CONSTR_2_0:
    case CONSTR_1_1:
    case CONSTR_0_2:
1079
        // functions
1080 1081 1082 1083 1084 1085
    case FUN:
    case FUN_1_0:
    case FUN_0_1:
    case FUN_2_0:
    case FUN_1_1:
    case FUN_0_2:
1086
        // partial applications
1087
    case PAP:
1088
        // indirection
Ian Lynagh's avatar
Ian Lynagh committed
1089 1090 1091 1092
    // IND_STATIC used to be an error, but at the moment it can happen
    // as isAlive doesn't look through IND_STATIC as it ignores static
    // closures. See trac #3956 for a program that hit this error.
    case IND_STATIC:
1093
    case BLACKHOLE:
1094
    case WHITEHOLE:
1095
        // static objects
1096
    case FUN_STATIC:
1097
        // misc
1098
    case PRIM:
1099 1100
    case BCO:
    case ARR_WORDS:
1101
    case COMPACT_NFDATA:
1102
        // STM
1103
    case TREC_CHUNK:
1104
        // immutable arrays
1105 1106 1107 1108
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1109
        return false;
1110

1111 1112 1113 1114 1115
        //
        // Error case
        //
        // Stack objects are invalid because they are never treated as
        // legal objects during retainer profiling.
1116 1117
    case UPDATE_FRAME:
    case CATCH_FRAME:
1118 1119
    case CATCH_RETRY_FRAME:
    case CATCH_STM_FRAME:
1120
    case UNDERFLOW_FRAME:
1121
    case ATOMICALLY_FRAME:
1122 1123 1124 1125
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
1126
    case RET_FUN:
1127
        // other cases
1128 1129 1130
    case IND:
    case INVALID_OBJECT:
    default:
1131
        barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
Ben Gamari's avatar
Ben Gamari committed
1132
        return false;
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
    }
}

/* -----------------------------------------------------------------------------
 *  Returns the retainer function value for the closure *c, i.e., R(*c).
 *  This function does NOT return the retainer(s) of *c.
 *  Invariants:
 *    *c must be a retainer.
 *  Note:
 *    Depending on the definition of this function, the maintenance of retainer
 *    sets can be made easier. If most retainer sets are likely to be created
 *    again across garbage collections, refreshAllRetainerSet() in
1145
 *    RetainerSet.c can simply do nothing.
1146 1147 1148 1149
 *    If this is not the case, we can free all the retainer sets and
 *    re-initialize the hash table.
 *    See refreshAllRetainerSet() in RetainerSet.c.
 * -------------------------------------------------------------------------- */
1150
static INLINE retainer
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
getRetainerFrom( StgClosure *c )
{
    ASSERT(isRetainer(c));

    return c->header.prof.ccs;
}

/* -----------------------------------------------------------------------------
 *  Associates the retainer set *s with the closure *c, that is, *s becomes
 *  the retainer set of *c.
 *  Invariants:
 *    c != NULL
 *    s != NULL
 * -------------------------------------------------------------------------- */
1165
static INLINE void
1166
associate( StgClosure *c, RetainerSet *s )
1167 1168 1169 1170 1171 1172
{
    // StgWord has the same size as pointers, so the following type
    // casting is okay.
    RSET(c) = (RetainerSet *)((StgWord)s | flip);
}

1173
/* -----------------------------------------------------------------------------
1174
   Call retainPushClosure for each of the closures covered by a large bitmap.
1175 1176 1177
   -------------------------------------------------------------------------- */

static void
1178 1179
retain_large_bitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap,
                     uint32_t size, StgClosure *c, retainer c_child_r)