RetainerProfile.c 60 KB
Newer Older
1 2 3 4 5 6 7 8 9
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2001
 * Author: Sungwoo Park
 *
 * Retainer profiling.
 *
 * ---------------------------------------------------------------------------*/

Ben Gamari's avatar
Ben Gamari committed
10
#if defined(PROFILING)
11

Simon Marlow's avatar
Simon Marlow committed
12
#include "PosixSource.h"
13
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
14

15 16 17 18 19 20
#include "RtsUtils.h"
#include "RetainerProfile.h"
#include "RetainerSet.h"
#include "Schedule.h"
#include "Printer.h"
#include "Weak.h"
Simon Marlow's avatar
Simon Marlow committed
21
#include "sm/Sanity.h"
22 23 24
#include "Profiling.h"
#include "Stats.h"
#include "ProfHeap.h"
25
#include "Apply.h"
David Feuer's avatar
David Feuer committed
26 27
#include "StablePtr.h" /* markStablePtrTable */
#include "StableName.h" /* rememberOldStableNameAddresses */
Simon Marlow's avatar
Simon Marlow committed
28
#include "sm/Storage.h" // for END_OF_STATIC_LIST
29

30 31
/* Note [What is a retainer?]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~
32 33
Retainer profiling is a profiling technique that gives information why
objects can't be freed and lists the consumers that hold pointers to
34
the heap objects. It does not list all the objects that keep references
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
to the other, because then we would keep too much information that will
make the report unusable, for example the cons element of the list would keep
all the tail cells. As a result we are keeping only the objects of the
certain types, see 'isRetainer()' function for more discussion.

More formal definition of the retainer can be given the following way.

An object p is a retainer object of the object l, if all requirements
hold:

  1. p can be a retainer (see `isRetainer()`)
  2. l is reachable from p
  3. There are no other retainers on the path from p to l.

Exact algorithm and additional information can be found the historical
document 'docs/storage-mgt/rp.tex'. Details that are related to the
RTS implementation may be out of date, but the general
information about the retainers is still applicable.
53 54 55
*/


56 57 58 59 60 61 62 63 64 65 66 67 68
/*
  Note: what to change in order to plug-in a new retainer profiling scheme?
    (1) type retainer in ../includes/StgRetainerProf.h
    (2) retainer function R(), i.e., getRetainerFrom()
    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
        in RetainerSet.h, if needed.
    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
 */

/* -----------------------------------------------------------------------------
 * Declarations...
 * -------------------------------------------------------------------------- */

69
static uint32_t retainerGeneration;  // generation
70

71 72 73
static uint32_t numObjectVisited;    // total number of objects visited
static uint32_t timesAnyObjectVisited;  // number of times any objects are
                                        // visited
74

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
/** Note [Profiling heap traversal visited bit]
 *
 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
 * profiling code to store per-heap object information.
 *
 * When using the generic heap traversal code we use this field to store
 * profiler specific information. However we reserve the LSB of the *entire*
 * 'trav' union (which will overlap with the other fields) for the generic
 * traversal code. We use the bit to decide whether we've already visited this
 * closure in this pass or not. We do this as the heap may contain cyclic
 * references, it being a graph and all, so we would likely just infinite loop
 * if we didn't.
 *
 * We assume that at least the LSB of the largest field in the corresponding
 * union is insignificant. This is true at least for the word aligned pointers
 * which the retainer profiler currently stores there and should be maintained
 * by new users of the 'trav' union.
 *
 * Now the way the traversal works is that the interpretation of the "visited?"
 * bit depends on the value of the global 'flip' variable. We don't want to have
 * to do another pass over the heap just to reset the bit to zero so instead on
 * each traversal (i.e. each run of the profiling code) we invert the value of
 * the global 'flip' variable. We interpret this as resetting all the "visited?"
 * flags on the heap.
 *
 * There is one exception to this rule, namely: static objects. There we do just
 * go over the heap and reset the bit manually. See
102
 * 'resetStaticObjectForProfiling'.
103
 */
104
StgWord flip = 0;     // flip bit
105 106
                      // must be 0 if DEBUG_RETAINER is on (for static closures)

107 108
#define setTravDataToZero(c) \
  (c)->header.prof.hp.trav.lsb = flip
109 110 111 112 113 114 115 116 117 118 119 120

/* -----------------------------------------------------------------------------
 * Retainer stack - header
 *   Note:
 *     Although the retainer stack implementation could be separated *
 *     from the retainer profiling engine, there does not seem to be
 *     any advantage in doing that; retainer stack is an integral part
 *     of retainer profiling engine and cannot be use elsewhere at
 *     all.
 * -------------------------------------------------------------------------- */

typedef enum {
121 122
    // Object with fixed layout. Keeps an information about that
    // element was processed. (stackPos.next.step)
123
    posTypeStep,
124 125
    // Description of the pointers-first heap object. Keeps information
    // about layout. (stackPos.next.ptrs)
126
    posTypePtrs,
127
    // Keeps SRT bitmap (stackPos.next.srt)
128
    posTypeSRT,
129 130 131
    // Keeps a new object that was not inspected yet. Keeps a parent
    // element (stackPos.next.parent)
    posTypeFresh
132 133 134 135 136 137 138 139
} nextPosType;

typedef union {
    // fixed layout or layout specified by a field in the closure
    StgWord step;

    // layout.payload
    struct {
140 141 142
        // See StgClosureInfo in InfoTables.h
        StgHalfWord pos;
        StgHalfWord ptrs;
143
        StgPtr payload;
144 145 146 147
    } ptrs;

    // SRT
    struct {
148
        StgClosure *srt;
149 150 151
    } srt;
} nextPos;

152 153 154 155
/**
 * Position pointer into a closure. Determines what the next element to return
 * for a stackElement is.
 */
156 157 158 159 160
typedef struct {
    nextPosType type;
    nextPos next;
} stackPos;

161 162 163 164 165 166 167
typedef union {
     /**
      * Most recent retainer for the corresponding closure on the stack.
      */
    retainer c_child_r;
} stackData;

168 169 170 171 172 173 174 175 176 177 178
/**
 * An element of the traversal work-stack. Besides the closure itself this also
 * stores it's parent and associated data.
 *
 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
 * offset into the children of 'c'. This is to support returning a closure's
 * children one-by-one without pushing one element per child onto the stack. See
 * traversePushChildren() and traversePop().
 *
 */
179 180
typedef struct {
    stackPos info;
181
    StgClosure *c;
182
    StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
183
    stackData data;
184 185
} stackElement;

186
typedef struct {
187 188
/*
  Invariants:
189

190
    firstStack points to the first block group.
191

192
    currentStack points to the block group currently being used.
193

194
    currentStack->free == stackLimit.
195

196
    stackTop points to the topmost byte in the stack of currentStack.
197

198 199
    Unless the whole stack is empty, stackTop must point to the topmost
    object (or byte) in the whole stack. Thus, it is only when the whole stack
200 201 202
    is empty that stackTop == stackLimit (not during the execution of
    pushStackElement() and popStackElement()).

203
    stackBottom == currentStack->start.
204

205
    stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
206 207


208
  Note:
209

210 211 212 213
    When a current stack becomes empty, stackTop is set to point to
    the topmost element on the previous block group so as to satisfy
    the invariants described above.
 */
214 215 216
    bdescr *firstStack;
    bdescr *currentStack;
    stackElement *stackBottom, *stackTop, *stackLimit;
217 218 219 220 221 222 223

/*
  currentStackBoundary is used to mark the current stack chunk.
  If stackTop == currentStackBoundary, it means that the current stack chunk
  is empty. It is the responsibility of the user to keep currentStackBoundary
  valid all the time if it is to be employed.
 */
224
    stackElement *currentStackBoundary;
225 226 227 228 229 230 231 232 233 234

/*
  stackSize records the current size of the stack.
  maxStackSize records its high water mark.
  Invariants:
    stackSize <= maxStackSize
  Note:
    stackSize is just an estimate measure of the depth of the graph. The reason
    is that some heap objects have only a single child and may not result
    in a new element being pushed onto the stack. Therefore, at the end of
235
    retainer profiling, maxStackSize is some value no greater
236 237
    than the actual depth of the graph.
 */
238 239 240
    int stackSize, maxStackSize;
} traverseState;

241 242
/**
 * Callback called when heap traversal visits a closure.
243 244 245 246 247 248 249 250 251 252 253 254 255 256
 *
 * Before this callback is called the profiling header of the visited closure
 * 'c' is zero'd with 'setTravDataToZero' if this closure hasn't been visited in
 * this run yet. See Note [Profiling heap traversal visited bit].
 *
 * Return 'true' when this is not the first visit to this element. The generic
 * traversal code will then skip traversing the children.
 */
typedef bool (*visitClosure_cb) (
    const StgClosure *c,
    const StgClosure *cp,
    const stackData data,
    stackData *child_data);

257 258 259
traverseState g_retainerTraverseState;


260 261 262
static void traverseStack(traverseState *, StgClosure *, stackData, StgPtr, StgPtr);
static void traverseClosure(traverseState *, StgClosure *, StgClosure *, retainer);
static void traversePushClosure(traverseState *, StgClosure *, StgClosure *, stackData);
263

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
#if defined(DEBUG)
unsigned int g_traversalDebugLevel = 0;
static inline void debug(const char *s, ...)
{
    va_list ap;

    if(g_traversalDebugLevel == 0)
        return;

    va_start(ap,s);
    vdebugBelch(s, ap);
    va_end(ap);
}
#else
#define debug(...)
#endif
280 281 282 283 284 285 286 287 288

// number of blocks allocated for one stack
#define BLOCKS_IN_STACK 1

/* -----------------------------------------------------------------------------
 * Add a new block group to the stack.
 * Invariants:
 *  currentStack->link == s.
 * -------------------------------------------------------------------------- */
289
STATIC_INLINE void
290
newStackBlock( traverseState *ts, bdescr *bd )
291
{
292 293 294 295 296
    ts->currentStack = bd;
    ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    ts->stackBottom  = (stackElement *)bd->start;
    ts->stackLimit   = (stackElement *)ts->stackTop;
    bd->free     = (StgPtr)ts->stackLimit;
297 298 299 300 301 302 303
}

/* -----------------------------------------------------------------------------
 * Return to the previous block group.
 * Invariants:
 *   s->link == currentStack.
 * -------------------------------------------------------------------------- */
304
STATIC_INLINE void
305
returnToOldStack( traverseState *ts, bdescr *bd )
306
{
307 308 309 310 311
    ts->currentStack = bd;
    ts->stackTop = (stackElement *)bd->free;
    ts->stackBottom = (stackElement *)bd->start;
    ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
    bd->free = (StgPtr)ts->stackLimit;
312 313
}

314 315 316
/**
 *  Initializes the traversal work-stack.
 */
317
static void
318
initializeTraverseStack( traverseState *ts )
319
{
320 321
    if (ts->firstStack != NULL) {
        freeChain(ts->firstStack);
322 323
    }

324 325 326
    ts->firstStack = allocGroup(BLOCKS_IN_STACK);
    ts->firstStack->link = NULL;
    ts->firstStack->u.back = NULL;
327

328
    newStackBlock(ts, ts->firstStack);
329 330
}

331 332 333
/**
 * Frees all the block groups in the traversal works-stack.
 *
334 335
 * Invariants:
 *   firstStack != NULL
336
 */
337
static void
338
closeTraverseStack( traverseState *ts )
339
{
340 341
    freeChain(ts->firstStack);
    ts->firstStack = NULL;
342 343 344
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
345
 * Returns true if the whole stack is empty.
346
 * -------------------------------------------------------------------------- */
347
STATIC_INLINE bool
348
isEmptyWorkStack( traverseState *ts )
349
{
350
    return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
351 352
}

sof's avatar
sof committed
353 354 355
/* -----------------------------------------------------------------------------
 * Returns size of stack
 * -------------------------------------------------------------------------- */
356
W_
357
traverseWorkStackBlocks(traverseState *ts)
sof's avatar
sof committed
358 359
{
    bdescr* bd;
360
    W_ res = 0;
sof's avatar
sof committed
361

362
    for (bd = ts->firstStack; bd != NULL; bd = bd->link)
sof's avatar
sof committed
363 364 365 366 367
      res += bd->blocks;

    return res;
}

368 369 370 371 372 373
W_
retainerStackBlocks(void)
{
    return traverseWorkStackBlocks(&g_retainerTraverseState);
}

374
/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
375
 * Returns true if stackTop is at the stack boundary of the current stack,
376 377
 * i.e., if the current stack chunk is empty.
 * -------------------------------------------------------------------------- */
378
STATIC_INLINE bool
379
isOnBoundary( traverseState *ts )
380
{
381
    return ts->stackTop == ts->currentStackBoundary;
382 383 384 385 386 387 388
}

/* -----------------------------------------------------------------------------
 * Initializes *info from ptrs and payload.
 * Invariants:
 *   payload[] begins with ptrs pointers followed by non-pointers.
 * -------------------------------------------------------------------------- */
389
STATIC_INLINE void
390
init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
391 392 393 394 395 396 397 398 399 400
{
    info->type              = posTypePtrs;
    info->next.ptrs.pos     = 0;
    info->next.ptrs.ptrs    = ptrs;
    info->next.ptrs.payload = payload;
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
401
STATIC_INLINE StgClosure *
402 403 404
find_ptrs( stackPos *info )
{
    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
405
        return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
406
    } else {
407
        return NULL;
408 409 410 411 412 413
    }
}

/* -----------------------------------------------------------------------------
 *  Initializes *info from SRT information stored in *infoTable.
 * -------------------------------------------------------------------------- */
414
STATIC_INLINE void
415
init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
416
{
417 418 419
    info->type = posTypeSRT;
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
420
    } else {
421
        info->next.srt.srt = NULL;
422
    }
423 424
}

425
STATIC_INLINE void
426
init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
427
{
Ömer Sinan Ağacan's avatar
Ömer Sinan Ağacan committed
428
    info->type = posTypeSRT;
429 430
    if (infoTable->i.srt) {
        info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
431
    } else {
432
        info->next.srt.srt = NULL;
433
    }
434 435 436 437 438
}

/* -----------------------------------------------------------------------------
 * Find the next object from *info.
 * -------------------------------------------------------------------------- */
439
STATIC_INLINE StgClosure *
440 441 442
find_srt( stackPos *info )
{
    StgClosure *c;
443
    if (info->type == posTypeSRT) {
444 445 446
        c = info->next.srt.srt;
        info->next.srt.srt = NULL;
        return c;
447 448 449
    }
}

450 451 452 453
/**
 * Push a set of closures, represented by a single 'stackElement', onto the
 * traversal work-stack.
 */
454
static void
455 456
pushStackElement(traverseState *ts, stackElement *se)
{
457
    bdescr *nbd;      // Next Block Descriptor
458
    if (ts->stackTop - 1 < ts->stackBottom) {
459 460
        debug("pushStackElement() to the next stack.\n");

461 462
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
463
        ts->currentStack->free = (StgPtr)ts->stackTop;
464

465
        if (ts->currentStack->link == NULL) {
466 467
            nbd = allocGroup(BLOCKS_IN_STACK);
            nbd->link = NULL;
468 469
            nbd->u.back = ts->currentStack;
            ts->currentStack->link = nbd;
470
        } else
471
            nbd = ts->currentStack->link;
472

473
        newStackBlock(ts, nbd);
474 475 476
    }

    // adjust stackTop (acutal push)
477
    ts->stackTop--;
478 479 480 481
    // If the size of stackElement was huge, we would better replace the
    // following statement by either a memcpy() call or a switch statement
    // on the type of the element. Currently, the size of stackElement is
    // small enough (5 words) that this direct assignment seems to be enough.
482
    *ts->stackTop = *se;
483

484 485 486
    ts->stackSize++;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
487
    debug("stackSize = %d\n", ts->stackSize);
488 489
}

490 491
/**
 * Push a single closure onto the traversal work-stack.
492
 *
493 494 495
 *  cp   - object's parent
 *  c    - closure
 *  data - data associated with closure.
496
 */
497
STATIC_INLINE void
498
traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
499 500 501
    stackElement se;

    se.c = c;
502 503
    se.cp = cp;
    se.data = data;
504 505
    se.info.type = posTypeFresh;

506
    pushStackElement(ts, &se);
507 508
};

509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
/**
 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
 * conceptually pushes all remaining children of 'c' onto the traversal stack
 * while associating 'data' with the pushed elements to be returned upon poping.
 *
 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
 * onto the stack.
 *
 * If 'c' has only one child, 'first_child' is set to that child and nothing is
 * pushed onto the stack.
 *
 * Invariants:
 *
 *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
 *       be any stack objects.
 *
 * Note: SRTs are considered to be children as well.
 *
 * Note: When pushing onto the stack we only really push one 'stackElement'
 * representing all children onto the stack. See traversePop()
 */
530
STATIC_INLINE void
531
traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
532 533 534 535
{
    stackElement se;
    bdescr *nbd;      // Next Block Descriptor

536
    debug("traversePushChildren(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
537 538

    ASSERT(get_itbl(c)->type != TSO);
539
    ASSERT(get_itbl(c)->type != AP_STACK);
540 541 542 543 544 545

    //
    // fill in se
    //

    se.c = c;
546
    se.data = data;
547
    // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
548 549 550

    // fill in se.info
    switch (get_itbl(c)->type) {
551
        // no child, no SRT
552 553 554
    case CONSTR_0_1:
    case CONSTR_0_2:
    case ARR_WORDS:
gcampax's avatar
gcampax committed
555
    case COMPACT_NFDATA:
556 557
        *first_child = NULL;
        return;
558

559
        // one child (fixed), no SRT
560 561
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
562 563
        *first_child = ((StgMutVar *)c)->var;
        return;
564
    case THUNK_SELECTOR:
565 566
        *first_child = ((StgSelector *)c)->selectee;
        return;
567
    case BLACKHOLE:
568 569
        *first_child = ((StgInd *)c)->indirectee;
        return;
570 571
    case CONSTR_1_0:
    case CONSTR_1_1:
572 573
        *first_child = c->payload[0];
        return;
574

575 576 577
        // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
        // of the next child. We do not write a separate initialization code.
        // Also we do not have to initialize info.type;
578

579 580
        // two children (fixed), no SRT
        // need to push a stackElement, but nothing to store in se.info
581
    case CONSTR_2_0:
582
        *first_child = c->payload[0];         // return the first pointer
583 584
        se.info.type = posTypeStep;
        se.info.next.step = 2;            // 2 = second
585
        break;
586

587 588
        // three children (fixed), no SRT
        // need to push a stackElement
589 590
    case MVAR_CLEAN:
    case MVAR_DIRTY:
591 592 593
        // head must be TSO and the head of a linked list of TSOs.
        // Shoule it be a child? Seems to be yes.
        *first_child = (StgClosure *)((StgMVar *)c)->head;
594
        se.info.type = posTypeStep;
595 596 597 598
        se.info.next.step = 2;            // 2 = second
        break;

        // three children (fixed), no SRT
599
    case WEAK:
600
        *first_child = ((StgWeak *)c)->key;
601
        se.info.type = posTypeStep;
602 603
        se.info.next.step = 2;
        break;
604

605
        // layout.payload.ptrs, no SRT
606
    case TVAR:
607
    case CONSTR:
Simon Marlow's avatar
Simon Marlow committed
608
    case CONSTR_NOCAF:
609
    case PRIM:
610
    case MUT_PRIM:
611
    case BCO:
612 613 614 615 616 617 618 619
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;   // no child
        break;

        // StgMutArrPtr.ptrs, no SRT
620 621
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
622 623
    case MUT_ARR_PTRS_FROZEN_CLEAN:
    case MUT_ARR_PTRS_FROZEN_DIRTY:
624 625 626 627 628 629 630 631
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;

        // StgMutArrPtr.ptrs, no SRT
632 633
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
634 635
    case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
    case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
636 637 638 639 640 641
        init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            return;
        break;
642

643
    // layout.payload.ptrs, SRT
644
    case FUN_STATIC:
645 646
    case FUN:           // *c is a heap object.
    case FUN_2_0:
647 648 649 650 651 652
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto fun_srt_only;
        break;
653

654 655
    case THUNK:
    case THUNK_2_0:
656 657 658 659 660 661 662 663 664
        init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
                  (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
            goto thunk_srt_only;
        break;

        // 1 fixed child, SRT
665 666
    case FUN_1_0:
    case FUN_1_1:
667 668 669 670
        *first_child = c->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_fun(&se.info, get_fun_itbl(c));
        break;
671

672 673
    case THUNK_1_0:
    case THUNK_1_1:
674 675 676 677
        *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
        init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
678

679
    case FUN_0_1:      // *c is a heap object.
680
    case FUN_0_2:
681 682
    fun_srt_only:
        init_srt_fun(&se.info, get_fun_itbl(c));
683 684 685 686
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;
687 688 689

    // SRT only
    case THUNK_STATIC:
690
        ASSERT(get_itbl(c)->srt != 0);
691 692
    case THUNK_0_1:
    case THUNK_0_2:
693 694
    thunk_srt_only:
        init_srt_thunk(&se.info, get_thunk_itbl(c));
695 696 697 698 699
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
        break;

700
    case TREC_CHUNK:
701
        *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
702
        se.info.type = posTypeStep;
703 704
        se.info.next.step = 0;  // entry no.
        break;
705

706
        // cannot appear
707
    case PAP:
708 709
    case AP:
    case AP_STACK:
710
    case TSO:
711
    case STACK:
712
    case IND_STATIC:
713
        // stack objects
714 715
    case UPDATE_FRAME:
    case CATCH_FRAME:
716
    case UNDERFLOW_FRAME:
717 718 719 720
    case STOP_FRAME:
    case RET_BCO:
    case RET_SMALL:
    case RET_BIG:
721
        // invalid objects
722 723 724
    case IND:
    case INVALID_OBJECT:
    default:
725
        barf("Invalid object *c in push(): %d", get_itbl(c)->type);
726
        return;
727 728
    }

729 730 731 732
    // se.cp has to be initialized when type==posTypeFresh. We don't do that
    // here though. So type must be !=posTypeFresh.
    ASSERT(se.info.type != posTypeFresh);

733
    pushStackElement(ts, &se);
734 735
}

736 737 738 739
/**
 *  popStackElement(): Remove a depleted stackElement from the top of the
 *  traversal work-stack.
 *
740 741
 *  Invariants:
 *    stackTop cannot be equal to stackLimit unless the whole stack is
742 743
 *    empty, in which case popStackElement() is not allowed.
 */
744
static void
745
popStackElement(traverseState *ts) {
746
    debug("popStackElement(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
747 748 749 750 751 752 753

    ASSERT(ts->stackTop != ts->stackLimit);
    ASSERT(!isEmptyWorkStack(ts));

    // <= (instead of <) is wrong!
    if (ts->stackTop + 1 < ts->stackLimit) {
        ts->stackTop++;
754

755 756 757
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
758 759
        debug("stackSize = (--) %d\n", ts->stackSize);

760 761 762
        return;
    }

763 764
    bdescr *pbd;    // Previous Block Descriptor

765
    debug("popStackElement() to the previous stack.\n");
766

767 768
    ASSERT(ts->stackTop + 1 == ts->stackLimit);
    ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
769

770
    if (ts->firstStack == ts->currentStack) {
771
        // The stack is completely empty.
772 773
        ts->stackTop++;
        ASSERT(ts->stackTop == ts->stackLimit);
774

775 776 777
        ts->stackSize--;
        if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
        ASSERT(ts->stackSize >= 0);
778 779
        debug("stackSize = %d\n", ts->stackSize);

780
        return;
781 782 783 784
    }

    // currentStack->free is updated when the active stack is switched back
    // to the previous stack.
785
    ts->currentStack->free = (StgPtr)ts->stackLimit;
786 787

    // find the previous block descriptor
788
    pbd = ts->currentStack->u.back;
789 790
    ASSERT(pbd != NULL);

791
    returnToOldStack(ts, pbd);
792

793 794 795
    ts->stackSize--;
    if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
    ASSERT(ts->stackSize >= 0);
796
    debug("stackSize = %d\n", ts->stackSize);
797 798
}

799
/**
800 801
 *  Finds the next object to be considered for retainer profiling and store
 *  its pointer to *c.
802
 *
803 804 805
 *  If the unprocessed object was stored in the stack (posTypeFresh), the
 *  this object is returned as-is. Otherwise Test if the topmost stack
 *  element indicates that more objects are left,
806
 *  and if so, retrieve the first object and store its pointer to *c. Also,
807 808 809
 *  set *cp and *data appropriately, both of which are stored in the stack
 *  element.  The topmost stack element then is overwritten so as for it to now
 *  denote the next object.
810
 *
811 812
 *  If the topmost stack element indicates no more objects are left, pop
 *  off the stack element until either an object can be retrieved or
Ben Gamari's avatar
Ben Gamari committed
813
 *  the current stack chunk becomes empty, indicated by true returned by
814
 *  isOnBoundary(), in which case *c is set to NULL.
815
 *
816
 *  Note:
817
 *
818 819
 *    It is okay to call this function even when the current stack chunk
 *    is empty.
820
 */
821
STATIC_INLINE void
822
traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
823 824 825
{
    stackElement *se;

826
    debug("traversePop(): stackTop = 0x%x currentStackBoundary = 0x%x\n", ts->stackTop, ts->currentStackBoundary);
827

828 829 830 831
    // Is this the last internal element? If so instead of modifying the current
    // stackElement in place we actually remove it from the stack.
    bool last = false;

832
    do {
833
        if (isOnBoundary(ts)) {     // if the current stack chunk is depleted
834 835 836 837
            *c = NULL;
            return;
        }

838
        // Note: Below every `break`, where the loop condition is true, must be
839 840
        // accompanied by a popStackElement() otherwise this is an infinite
        // loop.
841
        se = ts->stackTop;
842

843 844
        // If this is a top-level element, you should pop that out.
        if (se->info.type == posTypeFresh) {
845
            *cp = se->cp;
846
            *c = se->c;
847
            *data = se->data;
848
            popStackElement(ts);
849 850 851
            return;
        }

852 853
        // Note: The first ptr of all of these was already returned as
        // *fist_child in push(), so we always start with the second field.
854 855 856 857 858
        switch (get_itbl(se->c)->type) {
            // two children (fixed), no SRT
            // nothing in se.info
        case CONSTR_2_0:
            *c = se->c->payload[1];
859 860
            last = true;
            goto out;
861 862 863

            // three children (fixed), no SRT
            // need to push a stackElement
864 865
        case MVAR_CLEAN:
        case MVAR_DIRTY:
866 867 868
            if (se->info.next.step == 2) {
                *c = (StgClosure *)((StgMVar *)se->c)->tail;
                se->info.next.step++;             // move to the next step
869
                // no popStackElement
870 871
            } else {
                *c = ((StgMVar *)se->c)->value;
872
                last = true;
873
            }
874
            goto out;
875 876 877 878 879 880

            // three children (fixed), no SRT
        case WEAK:
            if (se->info.next.step == 2) {
                *c = ((StgWeak *)se->c)->value;
                se->info.next.step++;
881
                // no popStackElement
882 883
            } else {
                *c = ((StgWeak *)se->c)->finalizer;
884
                last = true;
885
            }
886
            goto out;
887 888 889 890 891 892 893 894

        case TREC_CHUNK: {
            // These are pretty complicated: we have N entries, each
            // of which contains 3 fields that we want to follow.  So
            // we divide the step counter: the 2 low bits indicate
            // which field, and the rest of the bits indicate the
            // entry number (starting from zero).
            TRecEntry *entry;
895 896
            uint32_t entry_no = se->info.next.step >> 2;
            uint32_t field_no = se->info.next.step & 3;
897 898
            if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
                *c = NULL;
899
                popStackElement(ts);
900
                break; // this breaks out of the switch not the loop
901 902 903 904 905 906 907 908 909 910
            }
            entry = &((StgTRecChunk *)se->c)->entries[entry_no];
            if (field_no == 0) {
                *c = (StgClosure *)entry->tvar;
            } else if (field_no == 1) {
                *c = entry->expected_value;
            } else {
                *c = entry->new_value;
            }
            se->info.next.step++;
911
            goto out;
912
        }
913

914 915
        case TVAR:
        case CONSTR:
916 917 918 919 920 921
        case PRIM:
        case MUT_PRIM:
        case BCO:
            // StgMutArrPtr.ptrs, no SRT
        case MUT_ARR_PTRS_CLEAN:
        case MUT_ARR_PTRS_DIRTY:
922 923
        case MUT_ARR_PTRS_FROZEN_CLEAN:
        case MUT_ARR_PTRS_FROZEN_DIRTY:
924 925 926 927
        case SMALL_MUT_ARR_PTRS_CLEAN:
        case SMALL_MUT_ARR_PTRS_DIRTY:
        case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
        case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
928 929
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
930
                popStackElement(ts);
931
                break; // this breaks out of the switch not the loop
932
            }
933
            goto out;
934 935 936

            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
937
        case FUN_STATIC:
938 939 940 941
        case FUN_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
942
                    goto out;
943 944 945 946 947 948 949 950 951 952
                }
                init_srt_fun(&se->info, get_fun_itbl(se->c));
            }
            goto do_srt;

        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
                *c = find_ptrs(&se->info);
                if (*c != NULL) {
953
                    goto out;
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970
                }
                init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
            goto do_srt;

            // SRT
        do_srt:
        case THUNK_STATIC:
        case FUN_0_1:
        case FUN_0_2:
        case THUNK_0_1:
        case THUNK_0_2:
        case FUN_1_0:
        case FUN_1_1:
        case THUNK_1_0:
        case THUNK_1_1:
            *c = find_srt(&se->info);
971
            if(*c == NULL) {
972
                popStackElement(ts);
973
                break; // this breaks out of the switch not the loop
974
            }
975
            goto out;
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990

            // no child (fixed), no SRT
        case CONSTR_0_1:
        case CONSTR_0_2:
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR_CLEAN:
        case MUT_VAR_DIRTY:
        case THUNK_SELECTOR:
        case CONSTR_1_1:
            // cannot appear
        case PAP:
        case AP:
        case AP_STACK:
        case TSO:
991 992
        case STACK:
        case IND_STATIC:
Simon Marlow's avatar
Simon Marlow committed
993
        case CONSTR_NOCAF:
994
            // stack objects
995
        case UPDATE_FRAME:
996
        case CATCH_FRAME:
997 998
        case UNDERFLOW_FRAME:
        case STOP_FRAME:
999 1000 1001 1002 1003 1004 1005
        case RET_BCO:
        case RET_SMALL:
        case RET_BIG:
            // invalid objects
        case IND:
        case INVALID_OBJECT:
        default:
1006
            barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
1007 1008
            return;
        }
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
    } while (*c == NULL);

out:

    ASSERT(*c != NULL);

    *cp = se->c;
    *data = se->data;

    if(last)
1019
        popStackElement(ts);
1020 1021 1022

    return;

1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
}

/* -----------------------------------------------------------------------------
 * RETAINER PROFILING ENGINE
 * -------------------------------------------------------------------------- */

void
initRetainerProfiling( void )
{
    initializeAllRetainerSet();
    retainerGeneration = 0;
}

/* -----------------------------------------------------------------------------
 *  This function must be called before f-closing prof_file.
 * -------------------------------------------------------------------------- */
void
endRetainerProfiling( void )
{
    outputAllRetainerSet(prof_file);
}

/* -----------------------------------------------------------------------------
 *  Returns the actual pointer to the retainer set of the closure *c.
 *  It may adjust RSET(c) subject to flip.
 *  Side effects:
 *    RSET(c) is initialized to NULL if its current value does not
 *    conform to flip.
 *  Note:
 *    Even though this function has side effects, they CAN be ignored because
 *    subsequent calls to retainerSetOf() always result in the same return value
 *    and retainerSetOf() is the only way to retrieve retainerSet of a given
 *    closure.
 *    We have to perform an XOR (^) operation each time a closure is examined.
 *    The reason is that we do not know when a closure is visited last.
 * -------------------------------------------------------------------------- */
1059
STATIC_INLINE void
1060
traverseMaybeInitClosureData(StgClosure *c)
1061
{
1062 1063
    if (!isTravDataValid(c)) {
        setTravDataToZero(c);
1064 1065 1066 1067
    }
}

/* -----------------------------------------------------------------------------
Ben Gamari's avatar
Ben Gamari committed
1068
 * Returns true if *c is a retainer.
1069 1070 1071 1072 1073 1074
 * In general the retainers are the objects that may be the roots of the
 * collection. Basically this roots represents programmers threads
 * (TSO) with their stack and thunks.
 *
 * In addition we mark all mutable objects as a retainers, the reason for
 * that decision is lost in time.
1075
 * -------------------------------------------------------------------------- */
1076
STATIC_INLINE bool
1077 1078 1079
isRetainer( StgClosure *c )
{
    switch (get_itbl(c)->type) {
1080 1081 1082 1083
        //
        //  True case
        //
        // TSOs MUST be retainers: they constitute the set of roots.
1084
    case TSO:
1085
    case STACK:
1086

1087
        // mutable objects
1088
    case MUT_PRIM:
1089 1090
    case MVAR_CLEAN:
    case MVAR_DIRTY:
1091
    case TVAR:
1092 1093
    case MUT_VAR_CLEAN:
    case MUT_VAR_DIRTY:
1094 1095
    case MUT_ARR_PTRS_CLEAN:
    case MUT_ARR_PTRS_DIRTY:
1096 1097 1098
    case SMALL_MUT_ARR_PTRS_CLEAN:
    case SMALL_MUT_ARR_PTRS_DIRTY:
    case BLOCKING_QUEUE:
1099

1100
        // thunks are retainers.
1101 1102 1103 1104 1105 1106 1107
    case THUNK:
    case THUNK_1_0:
    case THUNK_0_1:
    case THUNK_2_0:
    case THUNK_1_1:
    case THUNK_0_2:
    case THUNK_SELECTOR:
1108 1109
    case AP:
    case AP_STACK:
1110

1111
        // Static thunks, or CAFS, are obviously retainers.
1112 1113
    case THUNK_STATIC:

1114 1115
        // WEAK objects are roots; there is separate code in which traversing
        // begins from WEAK objects.
1116
    case WEAK:
Ben Gamari's avatar
Ben Gamari committed
1117
        return true;
1118

1119 1120 1121
        //
        // False case
        //
1122

1123
        // constructors