RetainerProfile.c 60.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

12
// Turn off inlining when debugging - it obfuscates things
Ben Gamari's avatar
Ben Gamari committed
13
#if defined(DEBUG)
14 15 16 17 18
#define INLINE
#else
#define INLINE inline
#endif

Simon Marlow's avatar
Simon Marlow committed
19
#include "PosixSource.h"
20
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
21

22 23 24 25 26 27
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
28
#include "sm/Sanity.h"
29 30 31
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
32
#include "Apply.h"
David Feuer's avatar
David Feuer committed
33 34
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
35
#include "sm/Storage.h" // for END_OF_STATIC_LIST
36

37 38
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
39 40
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
41
the heap objects. It does not list all the objects that keep references
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
60 61 62
*/


63 64 65 66 67 68 69 70 71 72 73 74 75
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

76
static uint32_t retainerGeneration;  // generation
77

78 79 80
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
81

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
/** Note [Profiling heap traversal visited bit]
 *
 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
 * profiling code to store per-heap object information.
 *
 * When using the generic heap traversal code we use this field to store
 * profiler specific information. However we reserve the LSB of the *entire*
 * 'trav' union (which will overlap with the other fields) for the generic
 * traversal code. We use the bit to decide whether we've already visited this
 * closure in this pass or not. We do this as the heap may contain cyclic
 * references, it being a graph and all, so we would likely just infinite loop
 * if we didn't.
 *
 * We assume that at least the LSB of the largest field in the corresponding
 * union is insignificant. This is true at least for the word aligned pointers
 * which the retainer profiler currently stores there and should be maintained
 * by new users of the 'trav' union.
 *
 * Now the way the traversal works is that the interpretation of the "visited?"
 * bit depends on the value of the global 'flip' variable. We don't want to have
 * to do another pass over the heap just to reset the bit to zero so instead on
 * each traversal (i.e. each run of the profiling code) we invert the value of
 * the global 'flip' variable. We interpret this as resetting all the "visited?"
 * flags on the heap.
 *
 * There is one exception to this rule, namely: static objects. There we do just
 * go over the heap and reset the bit manually. See
109
 * 'resetStaticObjectForProfiling'.
110
 */
111
StgWord flip = 0;     // flip bit
112 113
                      // must be 0 if DEBUG_RETAINER is on (for static closures)

114 115
#define setTravDataToZero(c) \
  (c)->header.prof.hp.trav.lsb = flip
116 117 118 119 120 121 122 123 124 125 126 127

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
128 129
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
130
    posTypeStep,
131 132
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
133
    posTypePtrs,
134
    // Keeps SRT bitmap (stackPos.next.srt)
135
    posTypeSRT,
136 137 138
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
139 140 141 142 143 144 145 146
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
147 148 149
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
150
        StgPtr payload;
151 152 153 154
    } ptrs;

    // SRT
    struct {
155
        StgClosure *srt;
156 157 158
    } srt;
} nextPos;

159 160 161 162
/**
 * Position pointer into a closure. Determines what the next element to return
 * for a stackElement is.
 */
163 164 165 166 167
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

168 169 170 171 172 173 174
typedef union {
     /**
      * Most recent retainer for the corresponding closure on the stack.
      */
    retainer c_child_r;
} stackData;

175 176 177 178 179 180 181 182 183 184 185
/**
 * An element of the traversal work-stack. Besides the closure itself this also
 * stores it's parent and associated data.
 *
 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
 * offset into the children of 'c'. This is to support returning a closure's
 * children one-by-one without pushing one element per child onto the stack. See
 * traversePushChildren() and traversePop().
 *
 */
186 187
typedef struct {
    stackPos info;
188
    StgClosure *c;
189
    StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
190
    stackData data;
191 192
} stackElement;

193
typedef struct {
194 195
/*
  Invariants:
196

197
    firstStack points to the first block group.
198

199
    currentStack points to the block group currently being used.
200

201
    currentStack->free == stackLimit.
202

203
    stackTop points to the topmost byte in the stack of currentStack.
204

205 206
    Unless the whole stack is empty, stackTop must point to the topmost
    object (or byte) in the whole stack. Thus, it is only when the whole stack
207 208 209
    is empty that stackTop == stackLimit (not during the execution of
    pushStackElement() and popStackElement()).

210
    stackBottom == currentStack->start.
211

212
    stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
213 214


215
  Note:
216

217 218 219 220
    When a current stack becomes empty, stackTop is set to point to
    the topmost element on the previous block group so as to satisfy
    the invariants described above.
 */
221 222 223
    bdescr *firstStack;
    bdescr *currentStack;
    stackElement *stackBottom, *stackTop, *stackLimit;
224 225 226 227 228 229 230

/*
  currentStackBoundary is used to mark the current stack chunk.
  If stackTop == currentStackBoundary, it means that the current stack chunk
  is empty. It is the responsibility of the user to keep currentStackBoundary
  valid all the time if it is to be employed.
 */
231
    stackElement *currentStackBoundary;
232 233 234 235 236 237 238 239 240 241

/*
  stackSize records the current size of the stack.
  maxStackSize records its high water mark.
  Invariants:
    stackSize <= maxStackSize
  Note:
    stackSize is just an estimate measure of the depth of the graph. The reason
    is that some heap objects have only a single child and may not result
    in a new element being pushed onto the stack. Therefore, at the end of
242
    retainer profiling, maxStackSize is some value no greater
243 244
    than the actual depth of the graph.
 */
245 246 247
    int stackSize, maxStackSize;
} traverseState;

248 249
/**
 * Callback called when heap traversal visits a closure.
250 251 252 253 254 255 256 257 258 259 260 261 262 263
 *
 * Before this callback is called the profiling header of the visited closure
 * 'c' is zero'd with 'setTravDataToZero' if this closure hasn't been visited in
 * this run yet. See Note [Profiling heap traversal visited bit].
 *
 * Return 'true' when this is not the first visit to this element. The generic
 * traversal code will then skip traversing the children.
 */
typedef bool (*visitClosure_cb) (
    const StgClosure *c,
    const StgClosure *cp,
    const stackData data,
    stackData *child_data);

264 265 266
traverseState g_retainerTraverseState;


267 268 269
static void traverseStack(traverseState *, StgClosure *, stackData, StgPtr, StgPtr);
static void traverseClosure(traverseState *, StgClosure *, StgClosure *, retainer);
static void traversePushClosure(traverseState *, StgClosure *, StgClosure *, stackData);
270

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
#if defined(DEBUG)
unsigned int g_traversalDebugLevel = 0;
static inline void debug(const char *s, ...)
{
    va_list ap;

    if(g_traversalDebugLevel == 0)
        return;

    va_start(ap,s);
    vdebugBelch(s, ap);
    va_end(ap);
}
#else
#define debug(...)
#endif
287 288 289 290 291 292 293 294 295

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
296
static INLINE void
297
newStackBlock( traverseState *ts, bdescr *bd )
298
{
299 300 301 302 303
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
304 305 306 307 308 309 310
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
311
static INLINE void
312
returnToOldStack( traverseState *ts, bdescr *bd )
313
{
314 315 316 317 318
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
319 320
}

321 322 323
/**
 *  Initializes the traversal work-stack.
 */
324
static void
325
initializeTraverseStack( traverseState *ts )
326
{
327 328
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
329 330
    }

331 332 333
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
334

335
    newStackBlock(ts, ts->firstStack);
336 337
}

338 339 340
/**
 * Frees all the block groups in the traversal works-stack.
 *
341 342
 * Invariants:
 *   firstStack != NULL
343
 */
344
static void
345
closeTraverseStack( traverseState *ts )
346
{
347 348
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
349 350 351
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
352
 * Returns true if the whole stack is empty.
353
 * -------------------------------------------------------------------------- */
Ben Gamari's avatar
Ben Gamari committed
354
static INLINE bool
355
isEmptyWorkStack( traverseState *ts )
356
{
357
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
358 359
}

sof's avatar
sof committed
360 361 362
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
363
W_
364
traverseWorkStackBlocks(traverseState *ts)
sof's avatar
sof committed
365 366
{
    bdescr* bd;
367
    W_ res = 0;
sof's avatar
sof committed
368

369
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
370 371 372 373 374
      res += bd->blocks;

    return res;
}

375 376 377 378 379 380
W_
retainerStackBlocks(void)
{
    return traverseWorkStackBlocks(&g_retainerTraverseState);
}

381
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
382
 * Returns true if stackTop is at the stack boundary of the current stack,
383 384
 * i.e., if the current stack chunk is empty.
 * -------------------------------------------------------------------------- */
Ben Gamari's avatar
Ben Gamari committed
385
static INLINE bool
386
isOnBoundary( traverseState *ts )
387
{
388
    return ts->stackTop == ts->currentStackBoundary;
389 390 391 392 393 394 395
}

/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
396
static INLINE void
397
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
398 399 400 401 402 403 404 405 406 407
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
408
static INLINE StgClosure *
409 410 411
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
412
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
413
    } else {
414
        return NULL;
415 416 417 418 419 420
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
421
static INLINE void
422
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
423
{
424 425 426
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
427
    } else {
428
        info->next.srt.srt = NULL;
429
    }
430 431
}

432
static INLINE void
433
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
434
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
435
    info->type = posTypeSRT;
436 437
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
438
    } else {
439
        info->next.srt.srt = NULL;
440
    }
441 442 443 444 445
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
446
static INLINE StgClosure *
447 448 449
find_srt( stackPos *info )
{
    StgClosure *c;
450
    if (info->type == posTypeSRT) {
451 452 453
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
454 455 456
    }
}

457 458 459 460
/**
 * Push a set of closures, represented by a single 'stackElement', onto the
 * traversal work-stack.
 */
461
static void
462 463
pushStackElement(traverseState *ts, stackElement *se)
{
464
    bdescr *nbd;      // Next Block Descriptor
465
    if (ts->stackTop - 1 < ts->stackBottom) {
466 467
        debug("pushStackElement() to the next stack.\n");

468 469
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
470
        ts->currentStack->free = (StgPtr)ts->stackTop;
471

472
        if (ts->currentStack->link == NULL) {
473 474
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
475 476
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
477
        } else
478
            nbd = ts->currentStack->link;
479

480
        newStackBlock(ts, nbd);
481 482 483
    }

    // adjust stackTop (acutal push)
484
    ts->stackTop--;
485 486 487 488
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
489
    *ts->stackTop = *se;
490

491 492 493
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
494
    debug("stackSize = %d\n", ts->stackSize);
495 496
}

497 498
/**
 * Push a single closure onto the traversal work-stack.
499
 *
500 501 502
 *  cp   - object's parent
 *  c    - closure
 *  data - data associated with closure.
503 504
 */
static INLINE void
505
traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
506 507 508
    stackElement se;

    se.c = c;
509 510
    se.cp = cp;
    se.data = data;
511 512
    se.info.type = posTypeFresh;

513
    pushStackElement(ts, &se);
514 515
};

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
/**
 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
 * conceptually pushes all remaining children of 'c' onto the traversal stack
 * while associating 'data' with the pushed elements to be returned upon poping.
 *
 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
 * onto the stack.
 *
 * If 'c' has only one child, 'first_child' is set to that child and nothing is
 * pushed onto the stack.
 *
 * Invariants:
 *
 *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
 *       be any stack objects.
 *
 * Note: SRTs are considered to be children as well.
 *
 * Note: When pushing onto the stack we only really push one 'stackElement'
 * representing all children onto the stack. See traversePop()
 */
537
static INLINE void
538
traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
539 540 541 542
{
    stackElement se;
    bdescr *nbd;      // Next Block Descriptor

543
    debug("traversePushChildren(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
544 545

    ASSERT(get_itbl(c)->type != TSO);
546
    ASSERT(get_itbl(c)->type != AP_STACK);
547 548 549 550 551 552

    //
    // fill in se
    //

    se.c = c;
553
    se.data = data;
554
    // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
555 556 557

    // fill in se.info
    switch (get_itbl(c)->type) {
558
        // no child, no SRT
559 560 561
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
562
    case COMPACT_NFDATA:
563 564
        *first_child = NULL;
        return;
565

566
        // one child (fixed), no SRT
567 568
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
569 570
        *first_child = ((StgMutVar *)c)->var;
        return;
571
    case THUNK_SELECTOR:
572 573
        *first_child = ((StgSelector *)c)->selectee;
        return;
574
    case BLACKHOLE:
575 576
        *first_child = ((StgInd *)c)->indirectee;
        return;
577 578
    case CONSTR_1_0:
    case CONSTR_1_1:
579 580
        *first_child = c->payload[0];
        return;
581

582 583 584
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
585

586 587
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
588
    case CONSTR_2_0:
589
        *first_child = c->payload[0];         // return the first pointer
590 591
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
592
        break;
593

594 595
        // three children (fixed), no SRT
        // need to push a stackElement
596 597
    case MVAR_CLEAN:
    case MVAR_DIRTY:
598 599 600
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
601
        se.info.type = posTypeStep;
602 603 604 605
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
606
    case WEAK:
607
        *first_child = ((StgWeak *)c)->key;
608
        se.info.type = posTypeStep;
609 610
        se.info.next.step = 2;
        break;
611

612
        // layout.payload.ptrs, no SRT
613
    case TVAR:
614
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
615
    case CONSTR_NOCAF:
616
    case PRIM:
617
    case MUT_PRIM:
618
    case BCO:
619 620 621 622 623 624 625 626
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
627 628
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
629 630
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
631 632 633 634 635 636 637 638
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
639 640
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
641 642
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
643 644 645 646 647 648
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
649

650
    // layout.payload.ptrs, SRT
651
    case FUN_STATIC:
652 653
    case FUN:           // *c is a heap object.
    case FUN_2_0:
654 655 656 657 658 659
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
660

661 662
    case THUNK:
    case THUNK_2_0:
663 664 665 666 667 668 669 670 671
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
672 673
    case FUN_1_0:
    case FUN_1_1:
674 675 676 677
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
678

679 680
    case THUNK_1_0:
    case THUNK_1_1:
681 682 683 684
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
685

686
    case FUN_0_1:      // *c is a heap object.
687
    case FUN_0_2:
688 689
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
690 691 692 693
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
694 695 696

    // SRT only
    case THUNK_STATIC:
697
        ASSERT(get_itbl(c)->srt != 0);
698 699
    case THUNK_0_1:
    case THUNK_0_2:
700 701
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
702 703 704 705 706
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

707
    case TREC_CHUNK:
708
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
709
        se.info.type = posTypeStep;
710 711
        se.info.next.step = 0;  // entry no.
        break;
712

713
        // cannot appear
714
    case PAP:
715 716
    case AP:
    case AP_STACK:
717
    case TSO:
718
    case STACK:
719
    case IND_STATIC:
720
        // stack objects
721 722
    case UPDATE_FRAME:
    case CATCH_FRAME:
723
    case UNDERFLOW_FRAME:
724 725 726 727
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
728
        // invalid objects
729 730 731
    case IND:
    case INVALID_OBJECT:
    default:
732
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
733
        return;
734 735
    }

736 737 738 739
    // se.cp has to be initialized when type==posTypeFresh. We don't do that
    // here though. So type must be !=posTypeFresh.
    ASSERT(se.info.type != posTypeFresh);

740
    pushStackElement(ts, &se);
741 742
}

743 744 745 746
/**
 *  popStackElement(): Remove a depleted stackElement from the top of the
 *  traversal work-stack.
 *
747 748
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
749 750
 *    empty, in which case popStackElement() is not allowed.
 */
751
static void
752
popStackElement(traverseState *ts) {
753
    debug("popStackElement(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
754 755 756 757 758 759 760

    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyWorkStack(ts));

    // <= (instead of <) is wrong!
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
761

762 763 764
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
765 766
        debug("stackSize = (--) %d\n", ts->stackSize);

767 768 769
        return;
    }

770 771
    bdescr *pbd;    // Previous Block Descriptor

772
    debug("popStackElement() to the previous stack.\n");
773

774 775
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
776

777
    if (ts->firstStack == ts->currentStack) {
778
        // The stack is completely empty.
779 780
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
781

782 783 784
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
785 786
        debug("stackSize = %d\n", ts->stackSize);

787
        return;
788 789 790 791
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
792
    ts->currentStack->free = (StgPtr)ts->stackLimit;
793 794

    // find the previous block descriptor
795
    pbd = ts->currentStack->u.back;
796 797
    ASSERT(pbd != NULL);

798
    returnToOldStack(ts, pbd);
799

800 801 802
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
803
    debug("stackSize = %d\n", ts->stackSize);
804 805
}

806
/**
807 808
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
809
 *
810 811 812
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
813
 *  and if so, retrieve the first object and store its pointer to *c. Also,
814 815 816
 *  set *cp and *data appropriately, both of which are stored in the stack
 *  element.  The topmost stack element then is overwritten so as for it to now
 *  denote the next object.
817
 *
818 819
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
Ben Gamari's avatar
Ben Gamari committed
820
 *  the current stack chunk becomes empty, indicated by true returned by
821
 *  isOnBoundary(), in which case *c is set to NULL.
822
 *
823
 *  Note:
824
 *
825 826
 *    It is okay to call this function even when the current stack chunk
 *    is empty.
827
 */
828
static INLINE void
829
traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
830 831 832
{
    stackElement *se;

833
    debug("traversePop(): stackTop = 0x%x currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
834

835 836 837 838
    // Is this the last internal element? If so instead of modifying the current
    // stackElement in place we actually remove it from the stack.
    bool last = false;

839
    do {
840
        if (isOnBoundary(ts)) {     // if the current stack chunk is depleted
841 842 843 844
            *c = NULL;
            return;
        }

845
        // Note: Below every `break`, where the loop condition is true, must be
846 847
        // accompanied by a popStackElement() otherwise this is an infinite
        // loop.
848
        se = ts->stackTop;
849

850 851
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
852
            *cp = se->cp;
853
            *c = se->c;
854
            *data = se->data;
855
            popStackElement(ts);
856 857 858
            return;
        }

859 860
        // Note: The first ptr of all of these was already returned as
        // *fist_child in push(), so we always start with the second field.
861 862 863 864 865
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
866 867
            last = true;
            goto out;
868 869 870

            // three children (fixed), no SRT
            // need to push a stackElement
871 872
        case MVAR_CLEAN:
        case MVAR_DIRTY:
873 874 875
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
876
                // no popStackElement
877 878
            } else {
                *c = ((StgMVar *)se->c)->value;
879
                last = true;
880
            }
881
            goto out;
882 883 884 885 886 887

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
888
                // no popStackElement
889 890
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
891
                last = true;
892
            }
893
            goto out;
894 895 896 897 898 899 900 901

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
902 903
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
904 905
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
906
                popStackElement(ts);
907
                break; // this breaks out of the switch not the loop
908 909 910 911 912 913 914 915 916 917
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            se->info.next.step++;
918
            goto out;
919
        }
920

921 922
        case TVAR:
        case CONSTR:
923 924 925 926 927 928
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
929 930
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
931 932 933 934
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
935 936
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
937
                popStackElement(ts);
938
                break; // this breaks out of the switch not the loop
939
            }
940
            goto out;
941 942 943

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
944
        case FUN_STATIC:
945 946 947 948
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
949
                    goto out;
950 951 952 953 954 955 956 957 958 959
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
960
                    goto out;
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
978
            if(*c == NULL) {
979
                popStackElement(ts);