RetainerProfile.c 55.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

Simon Marlow's avatar
Simon Marlow committed
12
#include "PosixSource.h"
13
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
14

15 16 17
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
18
#include "TraverseHeap.h"
19 20 21
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
22
#include "sm/Sanity.h"
23 24 25
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
26
#include "Apply.h"
David Feuer's avatar
David Feuer committed
27 28
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
29
#include "sm/Storage.h" // for END_OF_STATIC_LIST
30

31 32
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
33 34
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
35
the heap objects. It does not list all the objects that keep references
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
54 55 56
*/


57 58 59 60 61 62 63 64 65 66 67 68 69
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

70
static uint32_t retainerGeneration;  // generation
71

72 73 74
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
75

76 77 78 79 80
/** Note [Profiling heap traversal visited bit]
 *
 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
 * profiling code to store per-heap object information.
 *
81 82 83 84
 * The generic heap traversal code reserves the least significant bit of the
 * largest members of the 'trav' union to decide whether we've already visited a
 * given closure in the current pass or not. The rest of the field is free to be
 * used by the calling profiler.
85
 *
86 87 88 89 90
 * By doing things this way we implicitly assume that the LSB of the largest
 * field in the 'trav' union is insignificant. This is true at least for the
 * word aligned pointers which the retainer profiler currently stores there and
 * should be maintained by new users of the 'trav' union for example by shifting
 * the real data up by one bit.
91
 *
92 93 94 95 96 97 98 99 100
 * Since we don't want to have to scan the entire heap a second time just to
 * reset the per-object visitied bit before/after the real traversal we make the
 * interpretation of this bit dependent on the value of a global variable,
 * 'flip'.
 *
 * When the 'trav' bit is equal to the value of 'flip' the closure data is
 * valid otherwise not (see isTravDataValid). We then invert the value of 'flip'
 * on each heap traversal (see traverseWorkStack), in effect marking all
 * closure's data as invalid at once.
101
 *
102 103
 * There are some complications with this approach, namely: static objects and
 * mutable data. There we do just go over all existing objects to reset the bit
104
 * manually. See 'resetStaticObjectForProfiling' and 'computeRetainerSet'.
105
 */
106
StgWord flip = 0;
107

108 109
#define setTravDataToZero(c) \
  (c)->header.prof.hp.trav.lsb = flip
110 111 112 113 114 115 116 117 118 119 120 121

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
122 123
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
124
    posTypeStep,
125 126
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
127
    posTypePtrs,
128
    // Keeps SRT bitmap (stackPos.next.srt)
129
    posTypeSRT,
130 131 132
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
133 134 135 136 137 138 139 140
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
141 142 143
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
144
        StgPtr payload;
145 146 147 148
    } ptrs;

    // SRT
    struct {
149
        StgClosure *srt;
150 151 152
    } srt;
} nextPos;

153 154 155 156
/**
 * Position pointer into a closure. Determines what the next element to return
 * for a stackElement is.
 */
157 158 159 160 161
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

162 163 164 165 166 167 168 169 170 171 172
/**
 * An element of the traversal work-stack. Besides the closure itself this also
 * stores it's parent and associated data.
 *
 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
 * offset into the children of 'c'. This is to support returning a closure's
 * children one-by-one without pushing one element per child onto the stack. See
 * traversePushChildren() and traversePop().
 *
 */
173
typedef struct stackElement_ {
174
    stackPos info;
175
    StgClosure *c;
176
    StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
177
    stackData data;
178 179
} stackElement;

180 181 182
traverseState g_retainerTraverseState;


183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
#if defined(DEBUG)
unsigned int g_traversalDebugLevel = 0;
static inline void debug(const char *s, ...)
{
    va_list ap;

    if(g_traversalDebugLevel == 0)
        return;

    va_start(ap,s);
    vdebugBelch(s, ap);
    va_end(ap);
}
#else
#define debug(...)
#endif
199 200 201 202 203 204 205 206 207

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
208
STATIC_INLINE void
209
newStackBlock( traverseState *ts, bdescr *bd )
210
{
211 212 213 214 215
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
216 217 218 219 220 221 222
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
223
STATIC_INLINE void
224
returnToOldStack( traverseState *ts, bdescr *bd )
225
{
226 227 228 229 230
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
231 232
}

233 234 235
/**
 *  Initializes the traversal work-stack.
 */
236
void
237
initializeTraverseStack( traverseState *ts )
238
{
239 240
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
241 242
    }

243 244 245
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
246

247 248 249
    ts->stackSize = 0;
    ts->maxStackSize = 0;

250
    newStackBlock(ts, ts->firstStack);
251 252
}

253 254 255
/**
 * Frees all the block groups in the traversal works-stack.
 *
256 257
 * Invariants:
 *   firstStack != NULL
258
 */
259
void
260
closeTraverseStack( traverseState *ts )
261
{
262 263
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
264 265
}

266 267 268 269 270 271
int
getTraverseStackMaxSize(traverseState *ts)
{
    return ts->maxStackSize;
}

272
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
273
 * Returns true if the whole stack is empty.
274
 * -------------------------------------------------------------------------- */
275
STATIC_INLINE bool
276
isEmptyWorkStack( traverseState *ts )
277
{
278
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
279 280
}

sof's avatar
sof committed
281 282 283
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
284
W_
285
traverseWorkStackBlocks(traverseState *ts)
sof's avatar
sof committed
286 287
{
    bdescr* bd;
288
    W_ res = 0;
sof's avatar
sof committed
289

290
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
291 292 293 294 295
      res += bd->blocks;

    return res;
}

296 297 298 299 300 301
W_
retainerStackBlocks(void)
{
    return traverseWorkStackBlocks(&g_retainerTraverseState);
}

302 303 304 305 306
/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
307
STATIC_INLINE void
308
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
309 310 311 312 313 314 315 316 317 318
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
319
STATIC_INLINE StgClosure *
320 321 322
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
323
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
324
    } else {
325
        return NULL;
326 327 328 329 330 331
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
332
STATIC_INLINE void
333
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
334
{
335 336 337
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
338
    } else {
339
        info->next.srt.srt = NULL;
340
    }
341 342
}

343
STATIC_INLINE void
344
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
345
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
346
    info->type = posTypeSRT;
347 348
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
349
    } else {
350
        info->next.srt.srt = NULL;
351
    }
352 353 354 355 356
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
357
STATIC_INLINE StgClosure *
358 359 360
find_srt( stackPos *info )
{
    StgClosure *c;
361
    if (info->type == posTypeSRT) {
362 363 364
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
365
    }
366 367

    return NULL;
368 369
}

370 371 372 373
/**
 * Push a set of closures, represented by a single 'stackElement', onto the
 * traversal work-stack.
 */
374
static void
375 376
pushStackElement(traverseState *ts, stackElement *se)
{
377
    bdescr *nbd;      // Next Block Descriptor
378
    if (ts->stackTop - 1 < ts->stackBottom) {
379 380
        debug("pushStackElement() to the next stack.\n");

381 382
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
383
        ts->currentStack->free = (StgPtr)ts->stackTop;
384

385
        if (ts->currentStack->link == NULL) {
386 387
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
388 389
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
390
        } else
391
            nbd = ts->currentStack->link;
392

393
        newStackBlock(ts, nbd);
394 395 396
    }

    // adjust stackTop (acutal push)
397
    ts->stackTop--;
398 399 400 401
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
402
    *ts->stackTop = *se;
403

404 405 406
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
407
    debug("stackSize = %d\n", ts->stackSize);
408 409
}

410 411
/**
 * Push a single closure onto the traversal work-stack.
412
 *
413 414 415
 *  cp   - object's parent
 *  c    - closure
 *  data - data associated with closure.
416
 */
417
inline void
418
traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
419 420 421
    stackElement se;

    se.c = c;
422 423
    se.cp = cp;
    se.data = data;
424 425
    se.info.type = posTypeFresh;

426
    pushStackElement(ts, &se);
427 428
};

429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
/**
 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
 * conceptually pushes all remaining children of 'c' onto the traversal stack
 * while associating 'data' with the pushed elements to be returned upon poping.
 *
 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
 * onto the stack.
 *
 * If 'c' has only one child, 'first_child' is set to that child and nothing is
 * pushed onto the stack.
 *
 * Invariants:
 *
 *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
 *       be any stack objects.
 *
 * Note: SRTs are considered to be children as well.
 *
 * Note: When pushing onto the stack we only really push one 'stackElement'
 * representing all children onto the stack. See traversePop()
 */
450
STATIC_INLINE void
451
traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
452 453 454
{
    stackElement se;

455
    debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
456 457

    ASSERT(get_itbl(c)->type != TSO);
458
    ASSERT(get_itbl(c)->type != AP_STACK);
459 460 461 462 463 464

    //
    // fill in se
    //

    se.c = c;
465
    se.data = data;
466
    // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
467 468 469

    // fill in se.info
    switch (get_itbl(c)->type) {
470
        // no child, no SRT
471 472 473
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
474
    case COMPACT_NFDATA:
475 476
        *first_child = NULL;
        return;
477

478
        // one child (fixed), no SRT
479 480
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
481 482
        *first_child = ((StgMutVar *)c)->var;
        return;
483
    case THUNK_SELECTOR:
484 485
        *first_child = ((StgSelector *)c)->selectee;
        return;
486
    case BLACKHOLE:
487 488
        *first_child = ((StgInd *)c)->indirectee;
        return;
489 490
    case CONSTR_1_0:
    case CONSTR_1_1:
491 492
        *first_child = c->payload[0];
        return;
493

494 495 496
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
497

498 499
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
500
    case CONSTR_2_0:
501
        *first_child = c->payload[0];         // return the first pointer
502 503
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
504
        break;
505

506 507
        // three children (fixed), no SRT
        // need to push a stackElement
508 509
    case MVAR_CLEAN:
    case MVAR_DIRTY:
510 511 512
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
513
        se.info.type = posTypeStep;
514 515 516 517
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
518
    case WEAK:
519
        *first_child = ((StgWeak *)c)->key;
520
        se.info.type = posTypeStep;
521 522
        se.info.next.step = 2;
        break;
523

524
        // layout.payload.ptrs, no SRT
525
    case TVAR:
526
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
527
    case CONSTR_NOCAF:
528
    case PRIM:
529
    case MUT_PRIM:
530
    case BCO:
531 532 533 534 535 536 537 538
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
539 540
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
541 542
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
543 544 545 546 547 548 549 550
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
551 552
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
553 554
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
555 556 557 558 559 560
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
561

562
    // layout.payload.ptrs, SRT
563
    case FUN_STATIC:
564 565
    case FUN:           // *c is a heap object.
    case FUN_2_0:
566 567 568 569 570 571
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
572

573 574
    case THUNK:
    case THUNK_2_0:
575 576 577 578 579 580 581 582 583
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
584 585
    case FUN_1_0:
    case FUN_1_1:
586 587 588 589
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
590

591 592
    case THUNK_1_0:
    case THUNK_1_1:
593 594 595 596
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
597

598
    case FUN_0_1:      // *c is a heap object.
599
    case FUN_0_2:
600 601
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
602 603 604 605
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
606 607 608

    // SRT only
    case THUNK_STATIC:
609
        ASSERT(get_itbl(c)->srt != 0);
610
        /* fall-thru */
611 612
    case THUNK_0_1:
    case THUNK_0_2:
613 614
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
615 616 617 618 619
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

620
    case TREC_CHUNK:
621
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
622
        se.info.type = posTypeStep;
623 624
        se.info.next.step = 0;  // entry no.
        break;
625

626
        // cannot appear
627
    case PAP:
628 629
    case AP:
    case AP_STACK:
630
    case TSO:
631
    case STACK:
632
    case IND_STATIC:
633
        // stack objects
634 635
    case UPDATE_FRAME:
    case CATCH_FRAME:
636
    case UNDERFLOW_FRAME:
637 638 639 640
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
641
        // invalid objects
642 643 644
    case IND:
    case INVALID_OBJECT:
    default:
645
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
646
        return;
647 648
    }

649 650 651 652
    // se.cp has to be initialized when type==posTypeFresh. We don't do that
    // here though. So type must be !=posTypeFresh.
    ASSERT(se.info.type != posTypeFresh);

653
    pushStackElement(ts, &se);
654 655
}

656 657 658 659
/**
 *  popStackElement(): Remove a depleted stackElement from the top of the
 *  traversal work-stack.
 *
660 661
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
662 663
 *    empty, in which case popStackElement() is not allowed.
 */
664
static void
665
popStackElement(traverseState *ts) {
666
    debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
667 668 669 670 671 672 673

    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyWorkStack(ts));

    // <= (instead of <) is wrong!
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
674

675 676 677
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
678 679
        debug("stackSize = (--) %d\n", ts->stackSize);

680 681 682
        return;
    }

683 684
    bdescr *pbd;    // Previous Block Descriptor

685
    debug("popStackElement() to the previous stack.\n");
686

687 688
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
689

690
    if (ts->firstStack == ts->currentStack) {
691
        // The stack is completely empty.
692 693
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
694

695 696 697
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
698 699
        debug("stackSize = %d\n", ts->stackSize);

700
        return;
701 702 703 704
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
705
    ts->currentStack->free = (StgPtr)ts->stackLimit;
706 707

    // find the previous block descriptor
708
    pbd = ts->currentStack->u.back;
709 710
    ASSERT(pbd != NULL);

711
    returnToOldStack(ts, pbd);
712

713 714 715
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
716
    debug("stackSize = %d\n", ts->stackSize);
717 718
}

719
/**
720 721
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
722
 *
723 724 725
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
726
 *  and if so, retrieve the first object and store its pointer to *c. Also,
727 728 729
 *  set *cp and *data appropriately, both of which are stored in the stack
 *  element.  The topmost stack element then is overwritten so as for it to now
 *  denote the next object.
730
 *
731 732
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
733 734
 *  the work-stack becomes empty, indicated by true returned by
 *  isEmptyWorkStack(), in which case *c is set to NULL.
735
 *
736
 *  Note:
737
 *
738
 *    It is okay to call this function even when the work-stack is empty.
739
 */
740
STATIC_INLINE void
741
traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
742 743 744
{
    stackElement *se;

745
    debug("traversePop(): stackTop = 0x%x\n", ts->stackTop);
746

747 748 749 750
    // Is this the last internal element? If so instead of modifying the current
    // stackElement in place we actually remove it from the stack.
    bool last = false;

751
    do {
752
        if (isEmptyWorkStack(ts)) {
753 754 755 756
            *c = NULL;
            return;
        }

757
        // Note: Below every `break`, where the loop condition is true, must be
758 759
        // accompanied by a popStackElement() otherwise this is an infinite
        // loop.
760
        se = ts->stackTop;
761

762 763
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
764
            *cp = se->cp;
765
            *c = se->c;
766
            *data = se->data;
767
            popStackElement(ts);
768 769 770
            return;
        }

771 772
        // Note: The first ptr of all of these was already returned as
        // *fist_child in push(), so we always start with the second field.
773 774 775 776 777
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
778 779
            last = true;
            goto out;
780 781 782

            // three children (fixed), no SRT
            // need to push a stackElement
783 784
        case MVAR_CLEAN:
        case MVAR_DIRTY:
785 786 787
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
788
                // no popStackElement
789 790
            } else {
                *c = ((StgMVar *)se->c)->value;
791
                last = true;
792
            }
793
            goto out;
794 795 796 797 798 799

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
800
                // no popStackElement
801 802
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
803
                last = true;
804
            }
805
            goto out;
806 807 808 809 810 811 812 813

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
814 815
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
816 817
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
818
                popStackElement(ts);
819
                break; // this breaks out of the switch not the loop
820 821 822 823 824 825 826 827 828 829
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            se->info.next.step++;
830
            goto out;
831
        }
832

833 834
        case TVAR:
        case CONSTR:
835 836 837 838 839 840
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
841 842
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
843 844 845 846
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
847 848
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
849
                popStackElement(ts);
850
                break; // this breaks out of the switch not the loop
851
            }
852
            goto out;
853 854 855

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
856
        case FUN_STATIC:
857 858 859 860
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
861
                    goto out;
862 863 864 865 866 867 868 869 870 871
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
872
                    goto out;
873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
890
            if(*c == NULL) {
891
                popStackElement(ts);
892
                break; // this breaks out of the switch not the loop
893
            }
894
            goto out;
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909

            // no child (fixed), no SRT
        case CONSTR_0_1:
        case CONSTR_0_2:
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR_CLEAN:
        case MUT_VAR_DIRTY:
        case THUNK_SELECTOR:
        case CONSTR_1_1:
            // cannot appear
        case PAP:
        case AP:
        case AP_STACK:
        case TSO:
910 911
        case STACK:
        case IND_STATIC:
Simon Marlow's avatar
Simon Marlow committed
912
        case CONSTR_NOCAF:
913
            // stack objects
914
        case UPDATE_FRAME:
915
        case CATCH_FRAME:
916 917
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
918 919 920 921 922 923 924
        case RET_BCO:
        case RET_SMALL:
        case RET_BIG:
            // invalid objects
        case IND:
        case INVALID_OBJECT:
        default:
925
            barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
926 927
            return;
        }
928 929 930 931 932 933 934 935 936 937
    } while (*c == NULL);

out:

    ASSERT(*c != NULL);

    *cp = se->c;
    *data = se->data;

    if(last)
938
        popStackElement(ts);
939 940 941

    return;

942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
}

/* -----------------------------------------------------------------------------
 * RETAINER PROFILING ENGINE
 * -------------------------------------------------------------------------- */

void
initRetainerProfiling( void )
{
    initializeAllRetainerSet();
    retainerGeneration = 0;
}

/* -----------------------------------------------------------------------------
 *  This function must be called before f-closing prof_file.
 * -------------------------------------------------------------------------- */
void
endRetainerProfiling( void )
{
    outputAllRetainerSet(prof_file);
}

964 965
/**
 * Make sure a closure's profiling data is initialized to zero if it does not
966
 * conform to the current value of the flip bit, returns true in this case.
967 968 969
 *
 * See Note [Profiling heap traversal visited bit].
 */
970
bool
971
traverseMaybeInitClosureData(StgClosure *c)
972
{
973 974
    if (!isTravDataValid(c)) {
        setTravDataToZero(c);
975
        return true;
976
    }
977
    return false;
978 979 980
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
981
 * Returns true if *c is a retainer.
982 983 984 985 986 987
 * In general the retainers are the objects that may be the roots of the
 * collection. Basically this roots represents programmers threads
 * (TSO) with their stack and thunks.
 *
 * In addition we mark all mutable objects as a retainers, the reason for
 * that decision is lost in time.
988
 * -------------------------------------------------------------------------- */
989
STATIC_INLINE bool
990
isRetainer( const StgClosure *c )
991 992
{
    switch (get_itbl(c)->type) {
993 994 995 996
        //
        //  True case
        //
        // TSOs MUST be retainers: they constitute the set of roots.
997
    case TSO:
998
    case STACK:
999

1000
        // mutable objects
1001
    case MUT_PRIM:
1002 1003
    case MVAR_CLEAN:
    case MVAR_DIRTY:
1004
    case TVAR:
1005 1006
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
1007 1008
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
1009 1010 1011
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
    case BLOCKING_QUEUE:
1012

1013
        // thunks are retainers.
1014 1015 1016 1017 1018 1019 1020
    case THUNK:
    case THUNK_1_0:
    case THUNK_0_1:
    case THUNK_2_0:
    case THUNK_1_1:
    case THUNK_0_2:
    case THUNK_SELECTOR:
1021 1022
    case AP:
    case AP_STACK:
1023

1024
        // Static thunks, or CAFS, are obviously retainers.
1025 1026
    case THUNK_STATIC:

1027 1028
        // WEAK objects are roots; there is separate code in which traversing
        // begins from WEAK objects.
1029
    case WEAK:
Ben Gamari's avatar
Ben Gamari committed
1030
        return true;
1031

1032 1033 1034
        //
        // False case
        //
1035

1036
        // constructors
1037
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
1038
    case CONSTR_NOCAF:
1039 1040 1041 1042 1043
    case CONSTR_1_0:
    case CONSTR_0_1:
    case CONSTR_2_0:
    case CONSTR_1_1:
    case CONSTR_0_2:
1044
        // functions
1045 1046 1047 1048 1049 1050
    case FUN:
    case FUN_1_0:
    case FUN_0_1:
    case FUN_2_0:
    case FUN_1_1:
    case FUN_0_2:
1051
        // partial applications
1052
    case PAP:
1053
        // indirection
Ian Lynagh's avatar
Ian Lynagh committed
1054 1055 1056 1057
    // IND_STATIC used to be an error, but at the moment it can happen
    // as isAlive doesn't look through IND_STATIC as it ignores static
    // closures. See trac #3956 for a program that hit this error.
    case IND_STATIC:
1058
    case BLACKHOLE:
1059
    case WHITEHOLE:
1060
        // static objects
1061
    case FUN_STATIC:
1062
        // misc
1063
    case PRIM:
1064 1065
    case BCO:
    case ARR_WORDS:
1066
    case COMPACT_NFDATA:
1067
        // STM
1068
    case TREC_CHUNK:
1069
        // immutable arrays
1070 1071 1072 1073
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1074
        return false;
1075

1076 1077 1078 1079 1080
        //
        // Error case
        //
        // Stack objects are invalid because they are never treated as
        // legal objects during retainer profiling.
1081 1082
    case UPDATE_FRAME:
    case CATCH_FRAME: