RetainerProfile.c 55.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

Simon Marlow's avatar
Simon Marlow committed
12
#include "PosixSource.h"
13
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
14

15 16 17
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
18
#include "TraverseHeap.h"
19 20 21
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
22
#include "sm/Sanity.h"
23 24 25
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
26
#include "Apply.h"
David Feuer's avatar
David Feuer committed
27 28
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
29
#include "sm/Storage.h" // for END_OF_STATIC_LIST
30

31 32
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
33 34
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
35
the heap objects. It does not list all the objects that keep references
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
54 55 56
*/


57 58 59 60 61 62 63 64 65 66 67 68 69
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

70
static uint32_t retainerGeneration;  // generation
71

72 73 74
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
75

76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/** Note [Profiling heap traversal visited bit]
 *
 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
 * profiling code to store per-heap object information.
 *
 * When using the generic heap traversal code we use this field to store
 * profiler specific information. However we reserve the LSB of the *entire*
 * 'trav' union (which will overlap with the other fields) for the generic
 * traversal code. We use the bit to decide whether we've already visited this
 * closure in this pass or not. We do this as the heap may contain cyclic
 * references, it being a graph and all, so we would likely just infinite loop
 * if we didn't.
 *
 * We assume that at least the LSB of the largest field in the corresponding
 * union is insignificant. This is true at least for the word aligned pointers
 * which the retainer profiler currently stores there and should be maintained
 * by new users of the 'trav' union.
 *
 * Now the way the traversal works is that the interpretation of the "visited?"
 * bit depends on the value of the global 'flip' variable. We don't want to have
 * to do another pass over the heap just to reset the bit to zero so instead on
 * each traversal (i.e. each run of the profiling code) we invert the value of
 * the global 'flip' variable. We interpret this as resetting all the "visited?"
 * flags on the heap.
 *
 * There is one exception to this rule, namely: static objects. There we do just
 * go over the heap and reset the bit manually. See
103
 * 'resetStaticObjectForProfiling'.
104
 */
105
StgWord flip = 0;     // flip bit
106 107
                      // must be 0 if DEBUG_RETAINER is on (for static closures)

108 109
#define setTravDataToZero(c) \
  (c)->header.prof.hp.trav.lsb = flip
110 111 112 113 114 115 116 117 118 119 120 121

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
122 123
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
124
    posTypeStep,
125 126
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
127
    posTypePtrs,
128
    // Keeps SRT bitmap (stackPos.next.srt)
129
    posTypeSRT,
130 131 132
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
133 134 135 136 137 138 139 140
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
141 142 143
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
144
        StgPtr payload;
145 146 147 148
    } ptrs;

    // SRT
    struct {
149
        StgClosure *srt;
150 151 152
    } srt;
} nextPos;

153 154 155 156
/**
 * Position pointer into a closure. Determines what the next element to return
 * for a stackElement is.
 */
157 158 159 160 161
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

162 163 164 165 166 167 168 169 170 171 172
/**
 * An element of the traversal work-stack. Besides the closure itself this also
 * stores it's parent and associated data.
 *
 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
 * offset into the children of 'c'. This is to support returning a closure's
 * children one-by-one without pushing one element per child onto the stack. See
 * traversePushChildren() and traversePop().
 *
 */
173
typedef struct stackElement_ {
174
    stackPos info;
175
    StgClosure *c;
176
    StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
177
    stackData data;
178 179
} stackElement;

180 181 182
traverseState g_retainerTraverseState;


183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
#if defined(DEBUG)
unsigned int g_traversalDebugLevel = 0;
static inline void debug(const char *s, ...)
{
    va_list ap;

    if(g_traversalDebugLevel == 0)
        return;

    va_start(ap,s);
    vdebugBelch(s, ap);
    va_end(ap);
}
#else
#define debug(...)
#endif
199 200 201 202 203 204 205 206 207

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
208
STATIC_INLINE void
209
newStackBlock( traverseState *ts, bdescr *bd )
210
{
211 212 213 214 215
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
216 217 218 219 220 221 222
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
223
STATIC_INLINE void
224
returnToOldStack( traverseState *ts, bdescr *bd )
225
{
226 227 228 229 230
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
231 232
}

233 234 235
/**
 *  Initializes the traversal work-stack.
 */
236
void
237
initializeTraverseStack( traverseState *ts )
238
{
239 240
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
241 242
    }

243 244 245
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
246

247 248 249
    ts->stackSize = 0;
    ts->maxStackSize = 0;

250
    newStackBlock(ts, ts->firstStack);
251 252
}

253 254 255
/**
 * Frees all the block groups in the traversal works-stack.
 *
256 257
 * Invariants:
 *   firstStack != NULL
258
 */
259
void
260
closeTraverseStack( traverseState *ts )
261
{
262 263
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
264 265
}

266 267 268 269 270 271
int
getTraverseStackMaxSize(traverseState *ts)
{
    return ts->maxStackSize;
}

272
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
273
 * Returns true if the whole stack is empty.
274
 * -------------------------------------------------------------------------- */
275
STATIC_INLINE bool
276
isEmptyWorkStack( traverseState *ts )
277
{
278
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
279 280
}

sof's avatar
sof committed
281 282 283
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
284
W_
285
traverseWorkStackBlocks(traverseState *ts)
sof's avatar
sof committed
286 287
{
    bdescr* bd;
288
    W_ res = 0;
sof's avatar
sof committed
289

290
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
291 292 293 294 295
      res += bd->blocks;

    return res;
}

296 297 298 299 300 301
W_
retainerStackBlocks(void)
{
    return traverseWorkStackBlocks(&g_retainerTraverseState);
}

302 303 304 305 306
/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
307
STATIC_INLINE void
308
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
309 310 311 312 313 314 315 316 317 318
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
319
STATIC_INLINE StgClosure *
320 321 322
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
323
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
324
    } else {
325
        return NULL;
326 327 328 329 330 331
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
332
STATIC_INLINE void
333
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
334
{
335 336 337
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
338
    } else {
339
        info->next.srt.srt = NULL;
340
    }
341 342
}

343
STATIC_INLINE void
344
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
345
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
346
    info->type = posTypeSRT;
347 348
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
349
    } else {
350
        info->next.srt.srt = NULL;
351
    }
352 353 354 355 356
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
357
STATIC_INLINE StgClosure *
358 359 360
find_srt( stackPos *info )
{
    StgClosure *c;
361
    if (info->type == posTypeSRT) {
362 363 364
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
365 366 367
    }
}

368 369 370 371
/**
 * Push a set of closures, represented by a single 'stackElement', onto the
 * traversal work-stack.
 */
372
static void
373 374
pushStackElement(traverseState *ts, stackElement *se)
{
375
    bdescr *nbd;      // Next Block Descriptor
376
    if (ts->stackTop - 1 < ts->stackBottom) {
377 378
        debug("pushStackElement() to the next stack.\n");

379 380
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
381
        ts->currentStack->free = (StgPtr)ts->stackTop;
382

383
        if (ts->currentStack->link == NULL) {
384 385
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
386 387
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
388
        } else
389
            nbd = ts->currentStack->link;
390

391
        newStackBlock(ts, nbd);
392 393 394
    }

    // adjust stackTop (acutal push)
395
    ts->stackTop--;
396 397 398 399
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
400
    *ts->stackTop = *se;
401

402 403 404
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
405
    debug("stackSize = %d\n", ts->stackSize);
406 407
}

408 409
/**
 * Push a single closure onto the traversal work-stack.
410
 *
411 412 413
 *  cp   - object's parent
 *  c    - closure
 *  data - data associated with closure.
414
 */
415
inline void
416
traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
417 418 419
    stackElement se;

    se.c = c;
420 421
    se.cp = cp;
    se.data = data;
422 423
    se.info.type = posTypeFresh;

424
    pushStackElement(ts, &se);
425 426
};

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
/**
 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
 * conceptually pushes all remaining children of 'c' onto the traversal stack
 * while associating 'data' with the pushed elements to be returned upon poping.
 *
 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
 * onto the stack.
 *
 * If 'c' has only one child, 'first_child' is set to that child and nothing is
 * pushed onto the stack.
 *
 * Invariants:
 *
 *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
 *       be any stack objects.
 *
 * Note: SRTs are considered to be children as well.
 *
 * Note: When pushing onto the stack we only really push one 'stackElement'
 * representing all children onto the stack. See traversePop()
 */
448
STATIC_INLINE void
449
traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
450 451 452 453
{
    stackElement se;
    bdescr *nbd;      // Next Block Descriptor

454
    debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
455 456

    ASSERT(get_itbl(c)->type != TSO);
457
    ASSERT(get_itbl(c)->type != AP_STACK);
458 459 460 461 462 463

    //
    // fill in se
    //

    se.c = c;
464
    se.data = data;
465
    // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
466 467 468

    // fill in se.info
    switch (get_itbl(c)->type) {
469
        // no child, no SRT
470 471 472
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
473
    case COMPACT_NFDATA:
474 475
        *first_child = NULL;
        return;
476

477
        // one child (fixed), no SRT
478 479
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
480 481
        *first_child = ((StgMutVar *)c)->var;
        return;
482
    case THUNK_SELECTOR:
483 484
        *first_child = ((StgSelector *)c)->selectee;
        return;
485
    case BLACKHOLE:
486 487
        *first_child = ((StgInd *)c)->indirectee;
        return;
488 489
    case CONSTR_1_0:
    case CONSTR_1_1:
490 491
        *first_child = c->payload[0];
        return;
492

493 494 495
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
496

497 498
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
499
    case CONSTR_2_0:
500
        *first_child = c->payload[0];         // return the first pointer
501 502
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
503
        break;
504

505 506
        // three children (fixed), no SRT
        // need to push a stackElement
507 508
    case MVAR_CLEAN:
    case MVAR_DIRTY:
509 510 511
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
512
        se.info.type = posTypeStep;
513 514 515 516
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
517
    case WEAK:
518
        *first_child = ((StgWeak *)c)->key;
519
        se.info.type = posTypeStep;
520 521
        se.info.next.step = 2;
        break;
522

523
        // layout.payload.ptrs, no SRT
524
    case TVAR:
525
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
526
    case CONSTR_NOCAF:
527
    case PRIM:
528
    case MUT_PRIM:
529
    case BCO:
530 531 532 533 534 535 536 537
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
538 539
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
540 541
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
542 543 544 545 546 547 548 549
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
550 551
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
552 553
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
554 555 556 557 558 559
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
560

561
    // layout.payload.ptrs, SRT
562
    case FUN_STATIC:
563 564
    case FUN:           // *c is a heap object.
    case FUN_2_0:
565 566 567 568 569 570
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
571

572 573
    case THUNK:
    case THUNK_2_0:
574 575 576 577 578 579 580 581 582
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
583 584
    case FUN_1_0:
    case FUN_1_1:
585 586 587 588
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
589

590 591
    case THUNK_1_0:
    case THUNK_1_1:
592 593 594 595
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
596

597
    case FUN_0_1:      // *c is a heap object.
598
    case FUN_0_2:
599 600
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
601 602 603 604
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
605 606 607

    // SRT only
    case THUNK_STATIC:
608
        ASSERT(get_itbl(c)->srt != 0);
609 610
    case THUNK_0_1:
    case THUNK_0_2:
611 612
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
613 614 615 616 617
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

618
    case TREC_CHUNK:
619
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
620
        se.info.type = posTypeStep;
621 622
        se.info.next.step = 0;  // entry no.
        break;
623

624
        // cannot appear
625
    case PAP:
626 627
    case AP:
    case AP_STACK:
628
    case TSO:
629
    case STACK:
630
    case IND_STATIC:
631
        // stack objects
632 633
    case UPDATE_FRAME:
    case CATCH_FRAME:
634
    case UNDERFLOW_FRAME:
635 636 637 638
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
639
        // invalid objects
640 641 642
    case IND:
    case INVALID_OBJECT:
    default:
643
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
644
        return;
645 646
    }

647 648 649 650
    // se.cp has to be initialized when type==posTypeFresh. We don't do that
    // here though. So type must be !=posTypeFresh.
    ASSERT(se.info.type != posTypeFresh);

651
    pushStackElement(ts, &se);
652 653
}

654 655 656 657
/**
 *  popStackElement(): Remove a depleted stackElement from the top of the
 *  traversal work-stack.
 *
658 659
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
660 661
 *    empty, in which case popStackElement() is not allowed.
 */
662
static void
663
popStackElement(traverseState *ts) {
664
    debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
665 666 667 668 669 670 671

    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyWorkStack(ts));

    // <= (instead of <) is wrong!
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
672

673 674 675
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
676 677
        debug("stackSize = (--) %d\n", ts->stackSize);

678 679 680
        return;
    }

681 682
    bdescr *pbd;    // Previous Block Descriptor

683
    debug("popStackElement() to the previous stack.\n");
684

685 686
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
687

688
    if (ts->firstStack == ts->currentStack) {
689
        // The stack is completely empty.
690 691
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
692

693 694 695
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
696 697
        debug("stackSize = %d\n", ts->stackSize);

698
        return;
699 700 701 702
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
703
    ts->currentStack->free = (StgPtr)ts->stackLimit;
704 705

    // find the previous block descriptor
706
    pbd = ts->currentStack->u.back;
707 708
    ASSERT(pbd != NULL);

709
    returnToOldStack(ts, pbd);
710

711 712 713
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
714
    debug("stackSize = %d\n", ts->stackSize);
715 716
}

717
/**
718 719
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
720
 *
721 722 723
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
724
 *  and if so, retrieve the first object and store its pointer to *c. Also,
725 726 727
 *  set *cp and *data appropriately, both of which are stored in the stack
 *  element.  The topmost stack element then is overwritten so as for it to now
 *  denote the next object.
728
 *
729 730
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
731 732
 *  the work-stack becomes empty, indicated by true returned by
 *  isEmptyWorkStack(), in which case *c is set to NULL.
733
 *
734
 *  Note:
735
 *
736
 *    It is okay to call this function even when the work-stack is empty.
737
 */
738
STATIC_INLINE void
739
traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
740 741 742
{
    stackElement *se;

743
    debug("traversePop(): stackTop = 0x%x\n", ts->stackTop);
744

745 746 747 748
    // Is this the last internal element? If so instead of modifying the current
    // stackElement in place we actually remove it from the stack.
    bool last = false;

749
    do {
750
        if (isEmptyWorkStack(ts)) {
751 752 753 754
            *c = NULL;
            return;
        }

755
        // Note: Below every `break`, where the loop condition is true, must be
756 757
        // accompanied by a popStackElement() otherwise this is an infinite
        // loop.
758
        se = ts->stackTop;
759

760 761
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
762
            *cp = se->cp;
763
            *c = se->c;
764
            *data = se->data;
765
            popStackElement(ts);
766 767 768
            return;
        }

769 770
        // Note: The first ptr of all of these was already returned as
        // *fist_child in push(), so we always start with the second field.
771 772 773 774 775
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
776 777
            last = true;
            goto out;
778 779 780

            // three children (fixed), no SRT
            // need to push a stackElement
781 782
        case MVAR_CLEAN:
        case MVAR_DIRTY:
783 784 785
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
786
                // no popStackElement
787 788
            } else {
                *c = ((StgMVar *)se->c)->value;
789
                last = true;
790
            }
791
            goto out;
792 793 794 795 796 797

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
798
                // no popStackElement
799 800
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
801
                last = true;
802
            }
803
            goto out;
804 805 806 807 808 809 810 811

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
812 813
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
814 815
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
816
                popStackElement(ts);
817
                break; // this breaks out of the switch not the loop
818 819 820 821 822 823 824 825 826 827
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            se->info.next.step++;
828
            goto out;
829
        }
830

831 832
        case TVAR:
        case CONSTR:
833 834 835 836 837 838
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
839 840
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
841 842 843 844
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
845 846
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
847
                popStackElement(ts);
848
                break; // this breaks out of the switch not the loop
849
            }
850
            goto out;
851 852 853

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
854
        case FUN_STATIC:
855 856 857 858
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
859
                    goto out;
860 861 862 863 864 865 866 867 868 869
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
870
                    goto out;
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
888
            if(*c == NULL) {
889
                popStackElement(ts);
890
                break; // this breaks out of the switch not the loop
891
            }
892
            goto out;
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907

            // no child (fixed), no SRT
        case CONSTR_0_1:
        case CONSTR_0_2:
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR_CLEAN:
        case MUT_VAR_DIRTY:
        case THUNK_SELECTOR:
        case CONSTR_1_1:
            // cannot appear
        case PAP:
        case AP:
        case AP_STACK:
        case TSO:
908 909
        case STACK:
        case IND_STATIC:
Simon Marlow's avatar
Simon Marlow committed
910
        case CONSTR_NOCAF:
911
            // stack objects
912
        case UPDATE_FRAME:
913
        case CATCH_FRAME:
914 915
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
916 917 918 919 920 921 922
        case RET_BCO:
        case RET_SMALL:
        case RET_BIG:
            // invalid objects
        case IND:
        case INVALID_OBJECT:
        default:
923
            barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
924 925
            return;
        }
926 927 928 929 930 931 932 933 934 935
    } while (*c == NULL);

out:

    ASSERT(*c != NULL);

    *cp = se->c;
    *data = se->data;

    if(last)
936
        popStackElement(ts);
937 938 939

    return;

940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
}

/* -----------------------------------------------------------------------------
 * RETAINER PROFILING ENGINE
 * -------------------------------------------------------------------------- */

void
initRetainerProfiling( void )
{
    initializeAllRetainerSet();
    retainerGeneration = 0;
}

/* -----------------------------------------------------------------------------
 *  This function must be called before f-closing prof_file.
 * -------------------------------------------------------------------------- */
void
endRetainerProfiling( void )
{
    outputAllRetainerSet(prof_file);
}

962 963 964 965 966 967
/**
 * Make sure a closure's profiling data is initialized to zero if it does not
 * conform to the current value of the flip bit.
 *
 * See Note [Profiling heap traversal visited bit].
 */
968
void
969
traverseMaybeInitClosureData(StgClosure *c)
970
{
971 972
    if (!isTravDataValid(c)) {
        setTravDataToZero(c);
973 974 975 976
    }
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
977
 * Returns true if *c is a retainer.
978 979 980 981 982 983
 * In general the retainers are the objects that may be the roots of the
 * collection. Basically this roots represents programmers threads
 * (TSO) with their stack and thunks.
 *
 * In addition we mark all mutable objects as a retainers, the reason for
 * that decision is lost in time.
984
 * -------------------------------------------------------------------------- */
985
STATIC_INLINE bool
986 987 988
isRetainer( StgClosure *c )
{
    switch (get_itbl(c)->type) {
989 990 991 992
        //
        //  True case
        //
        // TSOs MUST be retainers: they constitute the set of roots.
993
    case TSO:
994
    case STACK:
995

996
        // mutable objects
997
    case MUT_PRIM:
998 999
    case MVAR_CLEAN:
    case MVAR_DIRTY:
1000
    case TVAR:
1001 1002
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
1003 1004
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
1005 1006 1007
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
    case BLOCKING_QUEUE:
1008

1009
        // thunks are retainers.
1010 1011 1012 1013 1014 1015 1016
    case THUNK:
    case THUNK_1_0:
    case THUNK_0_1:
    case THUNK_2_0:
    case THUNK_1_1:
    case THUNK_0_2:
    case THUNK_SELECTOR:
1017 1018
    case AP:
    case AP_STACK:
1019

1020
        // Static thunks, or CAFS, are obviously retainers.
1021 1022
    case THUNK_STATIC:

1023 1024
        // WEAK objects are roots; there is separate code in which traversing
        // begins from WEAK objects.
1025
    case WEAK:
Ben Gamari's avatar
Ben Gamari committed
1026
        return true;
1027

1028 1029 1030
        //
        // False case
        //
1031

1032
        // constructors
1033
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
1034
    case CONSTR_NOCAF:
1035 1036 1037 1038 1039
    case CONSTR_1_0:
    case CONSTR_0_1:
    case CONSTR_2_0:
    case CONSTR_1_1:
    case CONSTR_0_2:
1040
        // functions
1041 1042 1043 1044 1045 1046
    case FUN:
    case FUN_1_0:
    case FUN_0_1:
    case FUN_2_0:
    case FUN_1_1:
    case FUN_0_2:
1047
        // partial applications
1048
    case PAP:
1049
        // indirection
Ian Lynagh's avatar
Ian Lynagh committed
1050 1051 1052 1053
    // IND_STATIC used to be an error, but at the moment it can happen
    // as isAlive doesn't look through IND_STATIC as it ignores static
    // closures. See trac #3956 for a program that hit this error.
    case IND_STATIC:
1054
    case BLACKHOLE:
1055
    case WHITEHOLE:
1056
        // static objects
1057
    case FUN_STATIC:
1058
        // misc
1059
    case PRIM:
1060 1061
    case BCO:
    case ARR_WORDS:
1062
    case COMPACT_NFDATA:
1063
        // STM
1064
    case TREC_CHUNK:
1065
        // immutable arrays
1066 1067 1068 1069
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1070
        return false;
1071

1072 1073 1074 1075 1076
        //
        // Error case
        //
        // Stack objects are invalid because they are never treated as
        // legal objects during retainer profiling.
1077 1078
    case UPDATE_FRAME:
    case CATCH_FRAME:
1079 1080
    case CATCH_RETRY_FRAME:
    case CATCH_STM_FRAME:
1081
    case UNDERFLOW_FRAME:
1082
    case ATOMICALLY_FRAME:
1083 1084 1085 1086
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
1087
    case RET_FUN:
1088
        // other cases
1089 1090 1091
    case IND:
    case INVALID_OBJECT:
    default:
1092
        barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
Ben Gamari's avatar
Ben Gamari committed
1093
        return false;
1094 1095 1096 1097 1098 1099 1100 1101 1102
    }
}

/* -----------------------------------------------------------------------------
 *  Returns the retainer function value for the closure *c, i.e., R(*c).
 *  This function does NOT return the retainer(s) of *c.
 *  Invariants:
 *    *c must be a retainer.
 * -------------------------------------------------------------------------- */
1103
STATIC_INLINE retainer
1104 1105 1106 1107 1108 1109 1110 1111