RetainerProfile.c 56.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

Simon Marlow's avatar
Simon Marlow committed
12
#include "PosixSource.h"
13
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
14

15 16 17
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
18
#include "TraverseHeap.h"
19 20 21
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
22
#include "sm/Sanity.h"
23 24 25
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
26
#include "Apply.h"
David Feuer's avatar
David Feuer committed
27 28
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
29
#include "sm/Storage.h" // for END_OF_STATIC_LIST
30

31 32
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
33 34
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
35
the heap objects. It does not list all the objects that keep references
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
54 55 56
*/


57 58 59 60 61 62 63 64 65 66 67 68 69
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

70
static uint32_t retainerGeneration;  // generation
71

72 73 74
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
75

76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/** Note [Profiling heap traversal visited bit]
 *
 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
 * profiling code to store per-heap object information.
 *
 * When using the generic heap traversal code we use this field to store
 * profiler specific information. However we reserve the LSB of the *entire*
 * 'trav' union (which will overlap with the other fields) for the generic
 * traversal code. We use the bit to decide whether we've already visited this
 * closure in this pass or not. We do this as the heap may contain cyclic
 * references, it being a graph and all, so we would likely just infinite loop
 * if we didn't.
 *
 * We assume that at least the LSB of the largest field in the corresponding
 * union is insignificant. This is true at least for the word aligned pointers
 * which the retainer profiler currently stores there and should be maintained
 * by new users of the 'trav' union.
 *
 * Now the way the traversal works is that the interpretation of the "visited?"
 * bit depends on the value of the global 'flip' variable. We don't want to have
 * to do another pass over the heap just to reset the bit to zero so instead on
 * each traversal (i.e. each run of the profiling code) we invert the value of
 * the global 'flip' variable. We interpret this as resetting all the "visited?"
 * flags on the heap.
 *
 * There is one exception to this rule, namely: static objects. There we do just
 * go over the heap and reset the bit manually. See
103
 * 'resetStaticObjectForProfiling'.
104
 */
105
StgWord flip = 0;     // flip bit
106 107
                      // must be 0 if DEBUG_RETAINER is on (for static closures)

108 109
#define setTravDataToZero(c) \
  (c)->header.prof.hp.trav.lsb = flip
110 111 112 113 114 115 116 117 118 119 120 121

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
122 123
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
124
    posTypeStep,
125 126
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
127
    posTypePtrs,
128
    // Keeps SRT bitmap (stackPos.next.srt)
129
    posTypeSRT,
130 131 132
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
133 134 135 136 137 138 139 140
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
141 142 143
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
144
        StgPtr payload;
145 146 147 148
    } ptrs;

    // SRT
    struct {
149
        StgClosure *srt;
150 151 152
    } srt;
} nextPos;

153 154 155 156
/**
 * Position pointer into a closure. Determines what the next element to return
 * for a stackElement is.
 */
157 158 159 160 161
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

162 163 164 165 166 167 168 169 170 171 172
/**
 * An element of the traversal work-stack. Besides the closure itself this also
 * stores it's parent and associated data.
 *
 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
 * offset into the children of 'c'. This is to support returning a closure's
 * children one-by-one without pushing one element per child onto the stack. See
 * traversePushChildren() and traversePop().
 *
 */
173
typedef struct stackElement_ {
174
    stackPos info;
175
    StgClosure *c;
176
    StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
177
    stackData data;
178 179
} stackElement;

180 181 182
traverseState g_retainerTraverseState;


183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
#if defined(DEBUG)
unsigned int g_traversalDebugLevel = 0;
static inline void debug(const char *s, ...)
{
    va_list ap;

    if(g_traversalDebugLevel == 0)
        return;

    va_start(ap,s);
    vdebugBelch(s, ap);
    va_end(ap);
}
#else
#define debug(...)
#endif
199 200 201 202 203 204 205 206 207

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
208
STATIC_INLINE void
209
newStackBlock( traverseState *ts, bdescr *bd )
210
{
211 212 213 214 215
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
216 217 218 219 220 221 222
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
223
STATIC_INLINE void
224
returnToOldStack( traverseState *ts, bdescr *bd )
225
{
226 227 228 229 230
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
231 232
}

233 234 235
/**
 *  Initializes the traversal work-stack.
 */
236
void
237
initializeTraverseStack( traverseState *ts )
238
{
239 240
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
241 242
    }

243 244 245
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
246

247 248 249
    ts->stackSize = 0;
    ts->maxStackSize = 0;

250
    newStackBlock(ts, ts->firstStack);
251 252
}

253 254 255
/**
 * Frees all the block groups in the traversal works-stack.
 *
256 257
 * Invariants:
 *   firstStack != NULL
258
 */
259
void
260
closeTraverseStack( traverseState *ts )
261
{
262 263
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
264 265
}

266 267 268 269 270 271
int
getTraverseStackMaxSize(traverseState *ts)
{
    return ts->maxStackSize;
}

272
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
273
 * Returns true if the whole stack is empty.
274
 * -------------------------------------------------------------------------- */
275
STATIC_INLINE bool
276
isEmptyWorkStack( traverseState *ts )
277
{
278
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
279 280
}

sof's avatar
sof committed
281 282 283
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
284
W_
285
traverseWorkStackBlocks(traverseState *ts)
sof's avatar
sof committed
286 287
{
    bdescr* bd;
288
    W_ res = 0;
sof's avatar
sof committed
289

290
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
291 292 293 294 295
      res += bd->blocks;

    return res;
}

296 297 298 299 300 301
W_
retainerStackBlocks(void)
{
    return traverseWorkStackBlocks(&g_retainerTraverseState);
}

302
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
303
 * Returns true if stackTop is at the stack boundary of the current stack,
304 305
 * i.e., if the current stack chunk is empty.
 * -------------------------------------------------------------------------- */
306
STATIC_INLINE bool
307
isOnBoundary( traverseState *ts )
308
{
309
    return ts->stackTop == ts->currentStackBoundary;
310 311 312 313 314 315 316
}

/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
317
STATIC_INLINE void
318
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
319 320 321 322 323 324 325 326 327 328
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
329
STATIC_INLINE StgClosure *
330 331 332
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
333
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
334
    } else {
335
        return NULL;
336 337 338 339 340 341
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
342
STATIC_INLINE void
343
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
344
{
345 346 347
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
348
    } else {
349
        info->next.srt.srt = NULL;
350
    }
351 352
}

353
STATIC_INLINE void
354
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
355
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
356
    info->type = posTypeSRT;
357 358
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
359
    } else {
360
        info->next.srt.srt = NULL;
361
    }
362 363 364 365 366
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
367
STATIC_INLINE StgClosure *
368 369 370
find_srt( stackPos *info )
{
    StgClosure *c;
371
    if (info->type == posTypeSRT) {
372 373 374
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
375 376 377
    }
}

378 379 380 381
/**
 * Push a set of closures, represented by a single 'stackElement', onto the
 * traversal work-stack.
 */
382
static void
383 384
pushStackElement(traverseState *ts, stackElement *se)
{
385
    bdescr *nbd;      // Next Block Descriptor
386
    if (ts->stackTop - 1 < ts->stackBottom) {
387 388
        debug("pushStackElement() to the next stack.\n");

389 390
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
391
        ts->currentStack->free = (StgPtr)ts->stackTop;
392

393
        if (ts->currentStack->link == NULL) {
394 395
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
396 397
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
398
        } else
399
            nbd = ts->currentStack->link;
400

401
        newStackBlock(ts, nbd);
402 403 404
    }

    // adjust stackTop (acutal push)
405
    ts->stackTop--;
406 407 408 409
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
410
    *ts->stackTop = *se;
411

412 413 414
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
415
    debug("stackSize = %d\n", ts->stackSize);
416 417
}

418 419
/**
 * Push a single closure onto the traversal work-stack.
420
 *
421 422 423
 *  cp   - object's parent
 *  c    - closure
 *  data - data associated with closure.
424
 */
425
inline void
426
traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
427 428 429
    stackElement se;

    se.c = c;
430 431
    se.cp = cp;
    se.data = data;
432 433
    se.info.type = posTypeFresh;

434
    pushStackElement(ts, &se);
435 436
};

437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
/**
 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
 * conceptually pushes all remaining children of 'c' onto the traversal stack
 * while associating 'data' with the pushed elements to be returned upon poping.
 *
 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
 * onto the stack.
 *
 * If 'c' has only one child, 'first_child' is set to that child and nothing is
 * pushed onto the stack.
 *
 * Invariants:
 *
 *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
 *       be any stack objects.
 *
 * Note: SRTs are considered to be children as well.
 *
 * Note: When pushing onto the stack we only really push one 'stackElement'
 * representing all children onto the stack. See traversePop()
 */
458
STATIC_INLINE void
459
traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
460 461 462 463
{
    stackElement se;
    bdescr *nbd;      // Next Block Descriptor

464
    debug("traversePushChildren(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
465 466

    ASSERT(get_itbl(c)->type != TSO);
467
    ASSERT(get_itbl(c)->type != AP_STACK);
468 469 470 471 472 473

    //
    // fill in se
    //

    se.c = c;
474
    se.data = data;
475
    // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
476 477 478

    // fill in se.info
    switch (get_itbl(c)->type) {
479
        // no child, no SRT
480 481 482
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
483
    case COMPACT_NFDATA:
484 485
        *first_child = NULL;
        return;
486

487
        // one child (fixed), no SRT
488 489
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
490 491
        *first_child = ((StgMutVar *)c)->var;
        return;
492
    case THUNK_SELECTOR:
493 494
        *first_child = ((StgSelector *)c)->selectee;
        return;
495
    case BLACKHOLE:
496 497
        *first_child = ((StgInd *)c)->indirectee;
        return;
498 499
    case CONSTR_1_0:
    case CONSTR_1_1:
500 501
        *first_child = c->payload[0];
        return;
502

503 504 505
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
506

507 508
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
509
    case CONSTR_2_0:
510
        *first_child = c->payload[0];         // return the first pointer
511 512
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
513
        break;
514

515 516
        // three children (fixed), no SRT
        // need to push a stackElement
517 518
    case MVAR_CLEAN:
    case MVAR_DIRTY:
519 520 521
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
522
        se.info.type = posTypeStep;
523 524 525 526
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
527
    case WEAK:
528
        *first_child = ((StgWeak *)c)->key;
529
        se.info.type = posTypeStep;
530 531
        se.info.next.step = 2;
        break;
532

533
        // layout.payload.ptrs, no SRT
534
    case TVAR:
535
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
536
    case CONSTR_NOCAF:
537
    case PRIM:
538
    case MUT_PRIM:
539
    case BCO:
540 541 542 543 544 545 546 547
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
548 549
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
550 551
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
552 553 554 555 556 557 558 559
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
560 561
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
562 563
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
564 565 566 567 568 569
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
570

571
    // layout.payload.ptrs, SRT
572
    case FUN_STATIC:
573 574
    case FUN:           // *c is a heap object.
    case FUN_2_0:
575 576 577 578 579 580
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
581

582 583
    case THUNK:
    case THUNK_2_0:
584 585 586 587 588 589 590 591 592
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
593 594
    case FUN_1_0:
    case FUN_1_1:
595 596 597 598
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
599

600 601
    case THUNK_1_0:
    case THUNK_1_1:
602 603 604 605
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
606

607
    case FUN_0_1:      // *c is a heap object.
608
    case FUN_0_2:
609 610
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
611 612 613 614
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
615 616 617

    // SRT only
    case THUNK_STATIC:
618
        ASSERT(get_itbl(c)->srt != 0);
619 620
    case THUNK_0_1:
    case THUNK_0_2:
621 622
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
623 624 625 626 627
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

628
    case TREC_CHUNK:
629
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
630
        se.info.type = posTypeStep;
631 632
        se.info.next.step = 0;  // entry no.
        break;
633

634
        // cannot appear
635
    case PAP:
636 637
    case AP:
    case AP_STACK:
638
    case TSO:
639
    case STACK:
640
    case IND_STATIC:
641
        // stack objects
642 643
    case UPDATE_FRAME:
    case CATCH_FRAME:
644
    case UNDERFLOW_FRAME:
645 646 647 648
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
649
        // invalid objects
650 651 652
    case IND:
    case INVALID_OBJECT:
    default:
653
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
654
        return;
655 656
    }

657 658 659 660
    // se.cp has to be initialized when type==posTypeFresh. We don't do that
    // here though. So type must be !=posTypeFresh.
    ASSERT(se.info.type != posTypeFresh);

661
    pushStackElement(ts, &se);
662 663
}

664 665 666 667
/**
 *  popStackElement(): Remove a depleted stackElement from the top of the
 *  traversal work-stack.
 *
668 669
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
670 671
 *    empty, in which case popStackElement() is not allowed.
 */
672
static void
673
popStackElement(traverseState *ts) {
674
    debug("popStackElement(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
675 676 677 678 679 680 681

    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyWorkStack(ts));

    // <= (instead of <) is wrong!
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
682

683 684 685
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
686 687
        debug("stackSize = (--) %d\n", ts->stackSize);

688 689 690
        return;
    }

691 692
    bdescr *pbd;    // Previous Block Descriptor

693
    debug("popStackElement() to the previous stack.\n");
694

695 696
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
697

698
    if (ts->firstStack == ts->currentStack) {
699
        // The stack is completely empty.
700 701
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
702

703 704 705
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
706 707
        debug("stackSize = %d\n", ts->stackSize);

708
        return;
709 710 711 712
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
713
    ts->currentStack->free = (StgPtr)ts->stackLimit;
714 715

    // find the previous block descriptor
716
    pbd = ts->currentStack->u.back;
717 718
    ASSERT(pbd != NULL);

719
    returnToOldStack(ts, pbd);
720

721 722 723
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
724
    debug("stackSize = %d\n", ts->stackSize);
725 726
}

727
/**
728 729
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
730
 *
731 732 733
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
734
 *  and if so, retrieve the first object and store its pointer to *c. Also,
735 736 737
 *  set *cp and *data appropriately, both of which are stored in the stack
 *  element.  The topmost stack element then is overwritten so as for it to now
 *  denote the next object.
738
 *
739 740
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
Ben Gamari's avatar
Ben Gamari committed
741
 *  the current stack chunk becomes empty, indicated by true returned by
742
 *  isOnBoundary(), in which case *c is set to NULL.
743
 *
744
 *  Note:
745
 *
746 747
 *    It is okay to call this function even when the current stack chunk
 *    is empty.
748
 */
749
STATIC_INLINE void
750
traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
751 752 753
{
    stackElement *se;

754
    debug("traversePop(): stackTop = 0x%x currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
755

756 757 758 759
    // Is this the last internal element? If so instead of modifying the current
    // stackElement in place we actually remove it from the stack.
    bool last = false;

760
    do {
761
        if (isOnBoundary(ts)) {     // if the current stack chunk is depleted
762 763 764 765
            *c = NULL;
            return;
        }

766
        // Note: Below every `break`, where the loop condition is true, must be
767 768
        // accompanied by a popStackElement() otherwise this is an infinite
        // loop.
769
        se = ts->stackTop;
770

771 772
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
773
            *cp = se->cp;
774
            *c = se->c;
775
            *data = se->data;
776
            popStackElement(ts);
777 778 779
            return;
        }

780 781
        // Note: The first ptr of all of these was already returned as
        // *fist_child in push(), so we always start with the second field.
782 783 784 785 786
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
787 788
            last = true;
            goto out;
789 790 791

            // three children (fixed), no SRT
            // need to push a stackElement
792 793
        case MVAR_CLEAN:
        case MVAR_DIRTY:
794 795 796
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
797
                // no popStackElement
798 799
            } else {
                *c = ((StgMVar *)se->c)->value;
800
                last = true;
801
            }
802
            goto out;
803 804 805 806 807 808

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
809
                // no popStackElement
810 811
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
812
                last = true;
813
            }
814
            goto out;
815 816 817 818 819 820 821 822

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
823 824
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
825 826
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
827
                popStackElement(ts);
828
                break; // this breaks out of the switch not the loop
829 830 831 832 833 834 835 836 837 838
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            se->info.next.step++;
839
            goto out;
840
        }
841

842 843
        case TVAR:
        case CONSTR:
844 845 846 847 848 849
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
850 851
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
852 853 854 855
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
856 857
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
858
                popStackElement(ts);
859
                break; // this breaks out of the switch not the loop
860
            }
861
            goto out;
862 863 864

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
865
        case FUN_STATIC:
866 867 868 869
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
870
                    goto out;
871 872 873 874 875 876 877 878 879 880
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
881
                    goto out;
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
899
            if(*c == NULL) {
900
                popStackElement(ts);
901
                break; // this breaks out of the switch not the loop
902
            }
903
            goto out;
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918

            // no child (fixed), no SRT
        case CONSTR_0_1:
        case CONSTR_0_2:
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR_CLEAN:
        case MUT_VAR_DIRTY:
        case THUNK_SELECTOR:
        case CONSTR_1_1:
            // cannot appear
        case PAP:
        case AP:
        case AP_STACK:
        case TSO:
919 920
        case STACK:
        case IND_STATIC:
Simon Marlow's avatar
Simon Marlow committed
921
        case CONSTR_NOCAF:
922
            // stack objects
923
        case UPDATE_FRAME:
924
        case CATCH_FRAME:
925 926
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
927 928 929 930 931 932 933
        case RET_BCO:
        case RET_SMALL:
        case RET_BIG:
            // invalid objects
        case IND:
        case INVALID_OBJECT:
        default:
934
            barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
935 936
            return;
        }
937 938 939 940 941 942 943 944 945 946
    } while (*c == NULL);

out:

    ASSERT(*c != NULL);

    *cp = se->c;
    *data = se->data;

    if(last)
947
        popStackElement(ts);
948 949 950

    return;

951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
}

/* -----------------------------------------------------------------------------
 * RETAINER PROFILING ENGINE
 * -------------------------------------------------------------------------- */

void
initRetainerProfiling( void )
{
    initializeAllRetainerSet();
    retainerGeneration = 0;
}

/* -----------------------------------------------------------------------------
 *  This function must be called before f-closing prof_file.
 * -------------------------------------------------------------------------- */
void
endRetainerProfiling( void )
{
    outputAllRetainerSet(prof_file);
}

973 974 975 976 977 978
/**
 * Make sure a closure's profiling data is initialized to zero if it does not
 * conform to the current value of the flip bit.
 *
 * See Note [Profiling heap traversal visited bit].
 */
979
void
980
traverseMaybeInitClosureData(StgClosure *c)
981
{
982 983
    if (!isTravDataValid(c)) {
        setTravDataToZero(c);
984 985 986 987
    }
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
988
 * Returns true if *c is a retainer.
989 990 991 992 993 994
 * In general the retainers are the objects that may be the roots of the
 * collection. Basically this roots represents programmers threads
 * (TSO) with their stack and thunks.
 *
 * In addition we mark all mutable objects as a retainers, the reason for
 * that decision is lost in time.
995
 * -------------------------------------------------------------------------- */
996
STATIC_INLINE bool
997 998 999
isRetainer( StgClosure *c )
{
    switch (get_itbl(c)->type) {
1000 1001 1002 1003
        //
        //  True case
        //
        // TSOs MUST be retainers: they constitute the set of roots.
1004
    case TSO:
1005
    case STACK:
1006

1007
        // mutable objects
1008
    case MUT_PRIM:
1009 1010
    case MVAR_CLEAN:
    case MVAR_DIRTY:
1011
    case TVAR:
1012 1013
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
1014 1015
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
1016 1017 1018
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
    case BLOCKING_QUEUE:
1019

1020
        // thunks are retainers.
1021 1022 1023 1024 1025 1026 1027
    case THUNK:
    case THUNK_1_0:
    case THUNK_0_1:
    case THUNK_2_0:
    case THUNK_1_1:
    case THUNK_0_2:
    case THUNK_SELECTOR:
1028 1029
    case AP:
    case AP_STACK:
1030

1031
        // Static thunks, or CAFS, are obviously retainers.
1032 1033
    case THUNK_STATIC:

1034 1035
        // WEAK objects are roots; there is separate code in which traversing
        // begins from WEAK objects.
1036
    case WEAK:
Ben Gamari's avatar
Ben Gamari committed
1037
        return true;
1038

1039 1040 1041
        //
        // False case
        //
1042

1043
        // constructors
1044
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
1045
    case CONSTR_NOCAF:
1046 1047 1048 1049 1050
    case CONSTR_1_0:
    case CONSTR_0_1:
    case CONSTR_2_0:
    case CONSTR_1_1:
    case CONSTR_0_2:
1051
        // functions
1052 1053 1054 1055 1056 1057
    case FUN:
    case FUN_1_0:
    case FUN_0_1:
    case FUN_2_0:
    case FUN_1_1:
    case FUN_0_2:
1058
        // partial applications
1059
    case PAP:
1060
        // indirection
Ian Lynagh's avatar
Ian Lynagh committed
1061 1062 1063 1064
    // IND_STATIC used to be an error, but at the moment it can happen
    // as isAlive doesn't look through IND_STATIC as it ignores static
    // closures. See trac #3956 for a program that hit this error.
    case IND_STATIC:
1065
    case BLACKHOLE:
1066
    case WHITEHOLE:
1067
        // static objects
1068
    case FUN_STATIC:
1069
        // misc
1070
    case PRIM:
1071 1072
    case BCO:
    case ARR_WORDS:
1073
    case COMPACT_NFDATA:
1074
        // STM
1075
    case TREC_CHUNK:
1076
        // immutable arrays
1077 1078 1079 1080
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1081
        return false;
1082

1083 1084 1085 1086 1087
        //
        // Error case
        //
        // Stack objects are invalid because they are never treated as
        // legal objects during retainer profiling.
1088 1089
    case UPDATE_FRAME:
    case CATCH_FRAME:
1090 1091
    case CATCH_RETRY_FRAME:
    case CATCH_STM_FRAME:
1092
    case UNDERFLOW_FRAME:
1093
    case ATOMICALLY_FRAME:
1094 1095 1096 1097
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
1098
    case RET_FUN:
Austin Seipp's avatar