RetainerProfile.c 57.3 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

Simon Marlow's avatar
Simon Marlow committed
12
#include "PosixSource.h"
13
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
14

15 16 17
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
18
#include "TraverseHeap.h"
19 20 21
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
22
#include "sm/Sanity.h"
23 24 25
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
26
#include "Apply.h"
David Feuer's avatar
David Feuer committed
27 28
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
29
#include "sm/Storage.h" // for END_OF_STATIC_LIST
30

31 32
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
33 34
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
35
the heap objects. It does not list all the objects that keep references
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
54 55 56
*/


57 58 59 60 61 62 63 64 65 66 67 68 69
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

70
static uint32_t retainerGeneration;  // generation
71

72 73 74
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
75

76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/** Note [Profiling heap traversal visited bit]
 *
 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
 * profiling code to store per-heap object information.
 *
 * When using the generic heap traversal code we use this field to store
 * profiler specific information. However we reserve the LSB of the *entire*
 * 'trav' union (which will overlap with the other fields) for the generic
 * traversal code. We use the bit to decide whether we've already visited this
 * closure in this pass or not. We do this as the heap may contain cyclic
 * references, it being a graph and all, so we would likely just infinite loop
 * if we didn't.
 *
 * We assume that at least the LSB of the largest field in the corresponding
 * union is insignificant. This is true at least for the word aligned pointers
 * which the retainer profiler currently stores there and should be maintained
 * by new users of the 'trav' union.
 *
 * Now the way the traversal works is that the interpretation of the "visited?"
 * bit depends on the value of the global 'flip' variable. We don't want to have
 * to do another pass over the heap just to reset the bit to zero so instead on
 * each traversal (i.e. each run of the profiling code) we invert the value of
 * the global 'flip' variable. We interpret this as resetting all the "visited?"
 * flags on the heap.
 *
 * There is one exception to this rule, namely: static objects. There we do just
 * go over the heap and reset the bit manually. See
103
 * 'resetStaticObjectForProfiling'.
104
 */
105
StgWord flip = 0;     // flip bit
106 107
                      // must be 0 if DEBUG_RETAINER is on (for static closures)

108 109
#define setTravDataToZero(c) \
  (c)->header.prof.hp.trav.lsb = flip
110 111 112 113 114 115 116 117 118 119 120 121

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
122 123
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
124
    posTypeStep,
125 126
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
127
    posTypePtrs,
128
    // Keeps SRT bitmap (stackPos.next.srt)
129
    posTypeSRT,
130 131 132
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
133 134 135 136 137 138 139 140
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
141 142 143
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
144
        StgPtr payload;
145 146 147 148
    } ptrs;

    // SRT
    struct {
149
        StgClosure *srt;
150 151 152
    } srt;
} nextPos;

153 154 155 156
/**
 * Position pointer into a closure. Determines what the next element to return
 * for a stackElement is.
 */
157 158 159 160 161
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

162 163 164 165 166 167 168 169 170 171 172
/**
 * An element of the traversal work-stack. Besides the closure itself this also
 * stores it's parent and associated data.
 *
 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
 * offset into the children of 'c'. This is to support returning a closure's
 * children one-by-one without pushing one element per child onto the stack. See
 * traversePushChildren() and traversePop().
 *
 */
173
typedef struct stackElement_ {
174
    stackPos info;
175
    StgClosure *c;
176
    StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
177
    stackData data;
178 179
} stackElement;

180 181 182
traverseState g_retainerTraverseState;


183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
#if defined(DEBUG)
unsigned int g_traversalDebugLevel = 0;
static inline void debug(const char *s, ...)
{
    va_list ap;

    if(g_traversalDebugLevel == 0)
        return;

    va_start(ap,s);
    vdebugBelch(s, ap);
    va_end(ap);
}
#else
#define debug(...)
#endif
199 200 201 202 203 204 205 206 207

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
208
STATIC_INLINE void
209
newStackBlock( traverseState *ts, bdescr *bd )
210
{
211 212 213 214 215
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
216 217 218 219 220 221 222
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
223
STATIC_INLINE void
224
returnToOldStack( traverseState *ts, bdescr *bd )
225
{
226 227 228 229 230
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
231 232
}

233 234 235
/**
 *  Initializes the traversal work-stack.
 */
236
void
237
initializeTraverseStack( traverseState *ts )
238
{
239 240
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
241 242
    }

243 244 245
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
246

247
    newStackBlock(ts, ts->firstStack);
248 249
}

250 251 252
/**
 * Frees all the block groups in the traversal works-stack.
 *
253 254
 * Invariants:
 *   firstStack != NULL
255
 */
256
void
257
closeTraverseStack( traverseState *ts )
258
{
259 260
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
261 262 263
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
264
 * Returns true if the whole stack is empty.
265
 * -------------------------------------------------------------------------- */
266
STATIC_INLINE bool
267
isEmptyWorkStack( traverseState *ts )
268
{
269
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
270 271
}

sof's avatar
sof committed
272 273 274
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
275
W_
276
traverseWorkStackBlocks(traverseState *ts)
sof's avatar
sof committed
277 278
{
    bdescr* bd;
279
    W_ res = 0;
sof's avatar
sof committed
280

281
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
282 283 284 285 286
      res += bd->blocks;

    return res;
}

287 288 289 290 291 292
W_
retainerStackBlocks(void)
{
    return traverseWorkStackBlocks(&g_retainerTraverseState);
}

293
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
294
 * Returns true if stackTop is at the stack boundary of the current stack,
295 296
 * i.e., if the current stack chunk is empty.
 * -------------------------------------------------------------------------- */
297
STATIC_INLINE bool
298
isOnBoundary( traverseState *ts )
299
{
300
    return ts->stackTop == ts->currentStackBoundary;
301 302 303 304 305 306 307
}

/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
308
STATIC_INLINE void
309
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
310 311 312 313 314 315 316 317 318 319
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
320
STATIC_INLINE StgClosure *
321 322 323
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
324
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
325
    } else {
326
        return NULL;
327 328 329 330 331 332
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
333
STATIC_INLINE void
334
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
335
{
336 337 338
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
339
    } else {
340
        info->next.srt.srt = NULL;
341
    }
342 343
}

344
STATIC_INLINE void
345
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
346
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
347
    info->type = posTypeSRT;
348 349
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
350
    } else {
351
        info->next.srt.srt = NULL;
352
    }
353 354 355 356 357
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
358
STATIC_INLINE StgClosure *
359 360 361
find_srt( stackPos *info )
{
    StgClosure *c;
362
    if (info->type == posTypeSRT) {
363 364 365
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
366 367 368
    }
}

369 370 371 372
/**
 * Push a set of closures, represented by a single 'stackElement', onto the
 * traversal work-stack.
 */
373
static void
374 375
pushStackElement(traverseState *ts, stackElement *se)
{
376
    bdescr *nbd;      // Next Block Descriptor
377
    if (ts->stackTop - 1 < ts->stackBottom) {
378 379
        debug("pushStackElement() to the next stack.\n");

380 381
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
382
        ts->currentStack->free = (StgPtr)ts->stackTop;
383

384
        if (ts->currentStack->link == NULL) {
385 386
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
387 388
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
389
        } else
390
            nbd = ts->currentStack->link;
391

392
        newStackBlock(ts, nbd);
393 394 395
    }

    // adjust stackTop (acutal push)
396
    ts->stackTop--;
397 398 399 400
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
401
    *ts->stackTop = *se;
402

403 404 405
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
406
    debug("stackSize = %d\n", ts->stackSize);
407 408
}

409 410
/**
 * Push a single closure onto the traversal work-stack.
411
 *
412 413 414
 *  cp   - object's parent
 *  c    - closure
 *  data - data associated with closure.
415
 */
416
inline void
417
traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
418 419 420
    stackElement se;

    se.c = c;
421 422
    se.cp = cp;
    se.data = data;
423 424
    se.info.type = posTypeFresh;

425
    pushStackElement(ts, &se);
426 427
};

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
/**
 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
 * conceptually pushes all remaining children of 'c' onto the traversal stack
 * while associating 'data' with the pushed elements to be returned upon poping.
 *
 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
 * onto the stack.
 *
 * If 'c' has only one child, 'first_child' is set to that child and nothing is
 * pushed onto the stack.
 *
 * Invariants:
 *
 *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
 *       be any stack objects.
 *
 * Note: SRTs are considered to be children as well.
 *
 * Note: When pushing onto the stack we only really push one 'stackElement'
 * representing all children onto the stack. See traversePop()
 */
449
STATIC_INLINE void
450
traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
451 452 453 454
{
    stackElement se;
    bdescr *nbd;      // Next Block Descriptor

455
    debug("traversePushChildren(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
456 457

    ASSERT(get_itbl(c)->type != TSO);
458
    ASSERT(get_itbl(c)->type != AP_STACK);
459 460 461 462 463 464

    //
    // fill in se
    //

    se.c = c;
465
    se.data = data;
466
    // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
467 468 469

    // fill in se.info
    switch (get_itbl(c)->type) {
470
        // no child, no SRT
471 472 473
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
474
    case COMPACT_NFDATA:
475 476
        *first_child = NULL;
        return;
477

478
        // one child (fixed), no SRT
479 480
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
481 482
        *first_child = ((StgMutVar *)c)->var;
        return;
483
    case THUNK_SELECTOR:
484 485
        *first_child = ((StgSelector *)c)->selectee;
        return;
486
    case BLACKHOLE:
487 488
        *first_child = ((StgInd *)c)->indirectee;
        return;
489 490
    case CONSTR_1_0:
    case CONSTR_1_1:
491 492
        *first_child = c->payload[0];
        return;
493

494 495 496
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
497

498 499
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
500
    case CONSTR_2_0:
501
        *first_child = c->payload[0];         // return the first pointer
502 503
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
504
        break;
505

506 507
        // three children (fixed), no SRT
        // need to push a stackElement
508 509
    case MVAR_CLEAN:
    case MVAR_DIRTY:
510 511 512
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
513
        se.info.type = posTypeStep;
514 515 516 517
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
518
    case WEAK:
519
        *first_child = ((StgWeak *)c)->key;
520
        se.info.type = posTypeStep;
521 522
        se.info.next.step = 2;
        break;
523

524
        // layout.payload.ptrs, no SRT
525
    case TVAR:
526
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
527
    case CONSTR_NOCAF:
528
    case PRIM:
529
    case MUT_PRIM:
530
    case BCO:
531 532 533 534 535 536 537 538
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
539 540
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
541 542
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
543 544 545 546 547 548 549 550
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
551 552
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
553 554
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
555 556 557 558 559 560
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
561

562
    // layout.payload.ptrs, SRT
563
    case FUN_STATIC:
564 565
    case FUN:           // *c is a heap object.
    case FUN_2_0:
566 567 568 569 570 571
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
572

573 574
    case THUNK:
    case THUNK_2_0:
575 576 577 578 579 580 581 582 583
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
584 585
    case FUN_1_0:
    case FUN_1_1:
586 587 588 589
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
590

591 592
    case THUNK_1_0:
    case THUNK_1_1:
593 594 595 596
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
597

598
    case FUN_0_1:      // *c is a heap object.
599
    case FUN_0_2:
600 601
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
602 603 604 605
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
606 607 608

    // SRT only
    case THUNK_STATIC:
609
        ASSERT(get_itbl(c)->srt != 0);
610 611
    case THUNK_0_1:
    case THUNK_0_2:
612 613
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
614 615 616 617 618
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

619
    case TREC_CHUNK:
620
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
621
        se.info.type = posTypeStep;
622 623
        se.info.next.step = 0;  // entry no.
        break;
624

625
        // cannot appear
626
    case PAP:
627 628
    case AP:
    case AP_STACK:
629
    case TSO:
630
    case STACK:
631
    case IND_STATIC:
632
        // stack objects
633 634
    case UPDATE_FRAME:
    case CATCH_FRAME:
635
    case UNDERFLOW_FRAME:
636 637 638 639
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
640
        // invalid objects
641 642 643
    case IND:
    case INVALID_OBJECT:
    default:
644
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
645
        return;
646 647
    }

648 649 650 651
    // se.cp has to be initialized when type==posTypeFresh. We don't do that
    // here though. So type must be !=posTypeFresh.
    ASSERT(se.info.type != posTypeFresh);

652
    pushStackElement(ts, &se);
653 654
}

655 656 657 658
/**
 *  popStackElement(): Remove a depleted stackElement from the top of the
 *  traversal work-stack.
 *
659 660
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
661 662
 *    empty, in which case popStackElement() is not allowed.
 */
663
static void
664
popStackElement(traverseState *ts) {
665
    debug("popStackElement(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
666 667 668 669 670 671 672

    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyWorkStack(ts));

    // <= (instead of <) is wrong!
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
673

674 675 676
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
677 678
        debug("stackSize = (--) %d\n", ts->stackSize);

679 680 681
        return;
    }

682 683
    bdescr *pbd;    // Previous Block Descriptor

684
    debug("popStackElement() to the previous stack.\n");
685

686 687
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
688

689
    if (ts->firstStack == ts->currentStack) {
690
        // The stack is completely empty.
691 692
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
693

694 695 696
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
697 698
        debug("stackSize = %d\n", ts->stackSize);

699
        return;
700 701 702 703
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
704
    ts->currentStack->free = (StgPtr)ts->stackLimit;
705 706

    // find the previous block descriptor
707
    pbd = ts->currentStack->u.back;
708 709
    ASSERT(pbd != NULL);

710
    returnToOldStack(ts, pbd);
711

712 713 714
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
715
    debug("stackSize = %d\n", ts->stackSize);
716 717
}

718
/**
719 720
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
721
 *
722 723 724
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
725
 *  and if so, retrieve the first object and store its pointer to *c. Also,
726 727 728
 *  set *cp and *data appropriately, both of which are stored in the stack
 *  element.  The topmost stack element then is overwritten so as for it to now
 *  denote the next object.
729
 *
730 731
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
Ben Gamari's avatar
Ben Gamari committed
732
 *  the current stack chunk becomes empty, indicated by true returned by
733
 *  isOnBoundary(), in which case *c is set to NULL.
734
 *
735
 *  Note:
736
 *
737 738
 *    It is okay to call this function even when the current stack chunk
 *    is empty.
739
 */
740
STATIC_INLINE void
741
traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
742 743 744
{
    stackElement *se;

745
    debug("traversePop(): stackTop = 0x%x currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
746

747 748 749 750
    // Is this the last internal element? If so instead of modifying the current
    // stackElement in place we actually remove it from the stack.
    bool last = false;

751
    do {
752
        if (isOnBoundary(ts)) {     // if the current stack chunk is depleted
753 754 755 756
            *c = NULL;
            return;
        }

757
        // Note: Below every `break`, where the loop condition is true, must be
758 759
        // accompanied by a popStackElement() otherwise this is an infinite
        // loop.
760
        se = ts->stackTop;
761

762 763
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
764
            *cp = se->cp;
765
            *c = se->c;
766
            *data = se->data;
767
            popStackElement(ts);
768 769 770
            return;
        }

771 772
        // Note: The first ptr of all of these was already returned as
        // *fist_child in push(), so we always start with the second field.
773 774 775 776 777
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
778 779
            last = true;
            goto out;
780 781 782

            // three children (fixed), no SRT
            // need to push a stackElement
783 784
        case MVAR_CLEAN:
        case MVAR_DIRTY:
785 786 787
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
788
                // no popStackElement
789 790
            } else {
                *c = ((StgMVar *)se->c)->value;
791
                last = true;
792
            }
793
            goto out;
794 795 796 797 798 799

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
800
                // no popStackElement
801 802
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
803
                last = true;
804
            }
805
            goto out;
806 807 808 809 810 811 812 813

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
814 815
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
816 817
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
818
                popStackElement(ts);
819
                break; // this breaks out of the switch not the loop
820 821 822 823 824 825 826 827 828 829
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            se->info.next.step++;
830
            goto out;
831
        }
832

833 834
        case TVAR:
        case CONSTR:
835 836 837 838 839 840
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
841 842
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
843 844 845 846
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
847 848
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
849
                popStackElement(ts);
850
                break; // this breaks out of the switch not the loop
851
            }
852
            goto out;
853 854 855

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
856
        case FUN_STATIC:
857 858 859 860
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
861
                    goto out;
862 863 864 865 866 867 868 869 870 871
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
872
                    goto out;
873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
890
            if(*c == NULL) {
891
                popStackElement(ts);
892
                break; // this breaks out of the switch not the loop
893
            }
894
            goto out;
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909

            // no child (fixed), no SRT
        case CONSTR_0_1:
        case CONSTR_0_2:
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR_CLEAN:
        case MUT_VAR_DIRTY:
        case THUNK_SELECTOR:
        case CONSTR_1_1:
            // cannot appear
        case PAP:
        case AP:
        case AP_STACK:
        case TSO:
910 911
        case STACK:
        case IND_STATIC:
Simon Marlow's avatar
Simon Marlow committed
912
        case CONSTR_NOCAF:
913
            // stack objects
914
        case UPDATE_FRAME:
915
        case CATCH_FRAME:
916 917
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
918 919 920 921 922 923 924
        case RET_BCO:
        case RET_SMALL:
        case RET_BIG:
            // invalid objects
        case IND:
        case INVALID_OBJECT:
        default:
925
            barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
926 927
            return;
        }
928 929 930 931 932 933 934 935 936 937
    } while (*c == NULL);

out:

    ASSERT(*c != NULL);

    *cp = se->c;
    *data = se->data;

    if(last)
938
        popStackElement(ts);
939 940 941

    return;

942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
}

/* -----------------------------------------------------------------------------
 * RETAINER PROFILING ENGINE
 * -------------------------------------------------------------------------- */

void
initRetainerProfiling( void )
{
    initializeAllRetainerSet();
    retainerGeneration = 0;
}

/* -----------------------------------------------------------------------------
 *  This function must be called before f-closing prof_file.
 * -------------------------------------------------------------------------- */
void
endRetainerProfiling( void )
{
    outputAllRetainerSet(prof_file);
}

/* -----------------------------------------------------------------------------
 *  Returns the actual pointer to the retainer set of the closure *c.
 *  It may adjust RSET(c) subject to flip.
 *  Side effects:
 *    RSET(c) is initialized to NULL if its current value does not
 *    conform to flip.
 *  Note:
 *    Even though this function has side effects, they CAN be ignored because
 *    subsequent calls to retainerSetOf() always result in the same return value
 *    and retainerSetOf() is the only way to retrieve retainerSet of a given
 *    closure.
 *    We have to perform an XOR (^) operation each time a closure is examined.
 *    The reason is that we do not know when a closure is visited last.
 * -------------------------------------------------------------------------- */
978
void
979
traverseMaybeInitClosureData(StgClosure *c)
980
{
981 982
    if (!isTravDataValid(c)) {
        setTravDataToZero(c);
983 984 985 986
    }
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
987
 * Returns true if *c is a retainer.
988 989 990 991 992 993
 * In general the retainers are the objects that may be the roots of the
 * collection. Basically this roots represents programmers threads
 * (TSO) with their stack and thunks.
 *
 * In addition we mark all mutable objects as a retainers, the reason for
 * that decision is lost in time.
994
 * -------------------------------------------------------------------------- */
995
STATIC_INLINE bool
996 997 998
isRetainer( StgClosure *c )
{
    switch (get_itbl(c)->type) {
999 1000 1001 1002
        //
        //  True case
        //
        // TSOs MUST be retainers: they constitute the set of roots.
1003
    case TSO:
1004
    case STACK:
1005

1006
        // mutable objects
1007
    case MUT_PRIM:
1008 1009
    case MVAR_CLEAN:
    case MVAR_DIRTY:
1010
    case TVAR:
1011 1012
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
1013 1014
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
1015 1016 1017
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
    case BLOCKING_QUEUE:
1018

1019
        // thunks are retainers.
1020 1021 1022 1023 1024 1025 1026
    case THUNK:
    case THUNK_1_0:
    case THUNK_0_1:
    case THUNK_2_0:
    case THUNK_1_1:
    case THUNK_0_2:
    case THUNK_SELECTOR:
1027 1028
    case AP:
    case AP_STACK:
1029

1030
        // Static thunks, or CAFS, are obviously retainers.
1031 1032
    case THUNK_STATIC:

1033 1034
        // WEAK objects are roots; there is separate code in which traversing
        // begins from WEAK objects.
1035
    case WEAK:
Ben Gamari's avatar
Ben Gamari committed
1036
        return true;
1037

1038 1039 1040
        //
        // False case
        //
1041

1042
        // constructors
1043
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
1044
    case CONSTR_NOCAF:
1045 1046 1047 1048 1049
    case CONSTR_1_0:
    case CONSTR_0_1:
    case CONSTR_2_0:
    case CONSTR_1_1:
    case CONSTR_0_2:
1050
        // functions
1051 1052 1053 1054 1055 1056
    case FUN:
    case FUN_1_0:
    case FUN_0_1:
    case FUN_2_0:
    case FUN_1_1:
    case FUN_0_2:
1057
        // partial applications
1058
    case PAP:
1059
        // indirection
Ian Lynagh's avatar
Ian Lynagh committed
1060 1061 1062 1063
    // IND_STATIC used to be an error, but at the moment it can happen
    // as isAlive doesn't look through IND_STATIC as it ignores static
    // closures. See trac #3956 for a program that hit this error.
    case IND_STATIC:
1064
    case BLACKHOLE:
1065
    case WHITEHOLE:
1066
        // static objects
1067
    case FUN_STATIC:
1068
        // misc
1069
    case PRIM:
1070 1071
    case BCO:
    case ARR_WORDS:
1072
    case COMPACT_NFDATA:
1073
        // STM
1074
    case TREC_CHUNK:
1075
        // immutable arrays
1076 1077 1078 1079
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1080
        return false;
1081

1082 1083 1084 1085 1086
        //
        // Error case
        //
        // Stack objects are invalid because they are never treated as
        // legal objects during retainer profiling.
1087 1088
    case UPDATE_FRAME:
    case CATCH_FRAME:
1089 1090
    case CATCH_RETRY_FRAME:
    case CATCH_STM_FRAME:
1091
    case UNDERFLOW_FRAME:
1092
    case ATOMICALLY_FRAME:
1093 1094 1095 1096
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
1097
    case RET_FUN:
1098
        // other cases
1099 1100 1101
    case IND:
    case INVALID_OBJECT:
    default:
1102
        barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
Ben Gamari's avatar
Ben Gamari committed
1103
        return false;