ProfHeap.c 37.9 KB
Newer Older
1
/* ----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2003
4 5 6
 *
 * Support for heap profiling
 *
7
 * --------------------------------------------------------------------------*/
8

9
#include "PosixSource.h"
10
#include "Rts.h"
11

Simon Marlow's avatar
Simon Marlow committed
12
#include "Capability.h"
13
#include "RtsFlags.h"
14
#include "RtsUtils.h"
15
#include "Profiling.h"
16 17
#include "ProfHeap.h"
#include "Stats.h"
18
#include "Hash.h"
19
#include "RetainerProfile.h"
Simon Marlow's avatar
Simon Marlow committed
20
#include "LdvProfile.h"
21
#include "Arena.h"
22
#include "Printer.h"
23
#include "Trace.h"
24
#include "sm/GCThread.h"
25

26
#include <fs_rts.h>
27 28
#include <string.h>

29 30 31
FILE *hp_file;
static char *hp_filename; /* heap profile (hp2ps style) log file */

32
/* -----------------------------------------------------------------------------
33 34
 * era stores the current time period.  It is the same as the
 * number of censuses that have been performed.
35
 *
36 37 38
 * RESTRICTION:
 *   era must be no longer than LDV_SHIFT (15 or 30) bits.
 * Invariants:
39
 *   era is initialized to 1 in initHeapProfiling().
40
 *
41 42 43
 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
 * When era reaches max_era, the profiling stops because a closure can
 * store only up to (max_era - 1) as its creation or last use time.
44
 * -------------------------------------------------------------------------- */
45
unsigned int era;
46
static uint32_t max_era;
47

48
/* -----------------------------------------------------------------------------
49 50 51 52 53 54 55
 * Counters
 *
 * For most heap profiles each closure identity gets a simple count
 * of live words in the heap at each census.  However, if we're
 * selecting by biography, then we have to keep the various
 * lag/drag/void counters for each identity.
 * -------------------------------------------------------------------------- */
56
typedef struct _counter {
57
    const void *identity;
58
    union {
59
        ssize_t resid;
60
        struct {
61 62 63 64 65 66
            // Total sizes of:
            ssize_t prim;     // 'inherently used' closures
            ssize_t not_used; // 'never used' closures
            ssize_t used;     // 'used at least once' closures
            ssize_t void_total;  // 'destroyed without being used' closures
            ssize_t drag_total;  // 'used at least once and waiting to die'
67
        } ldv;
68 69 70
    } c;
    struct _counter *next;
} counter;
71

72
STATIC_INLINE void
73 74 75 76 77 78 79 80 81
initLDVCtr( counter *ctr )
{
    ctr->c.ldv.prim = 0;
    ctr->c.ldv.not_used = 0;
    ctr->c.ldv.used = 0;
    ctr->c.ldv.void_total = 0;
    ctr->c.ldv.drag_total = 0;
}

82 83
typedef struct {
    double      time;    // the time in MUT time when the census is made
84 85 86 87
    StgWord64   rtime;   // The eventlog time the census was made. This is used
                         // for the LDV profiling events because they are all
                         // emitted at the end of compilation so we need to know
                         // when the sample actually took place.
88 89 90 91 92
    HashTable * hash;
    counter   * ctrs;
    Arena     * arena;

    // for LDV profiling, when just displaying by LDV
93 94 95 96 97
    ssize_t    prim;
    ssize_t    not_used;
    ssize_t    used;
    ssize_t    void_total;
    ssize_t    drag_total;
98 99
} Census;

100
static Census *censuses = NULL;
101
static uint32_t n_censuses = 0;
102

103
#if defined(PROFILING)
104 105 106 107 108
static void aggregateCensusInfo( void );
#endif

static void dumpCensus( Census *census );

Ben Gamari's avatar
Ben Gamari committed
109
static bool closureSatisfiesConstraints( const StgClosure* p );
110

111
/* ----------------------------------------------------------------------------
Edward Z. Yang's avatar
Edward Z. Yang committed
112
 * Find the "closure identity", which is a unique pointer representing
113 114 115
 * the band to which this closure's heap space is attributed in the
 * heap profile.
 * ------------------------------------------------------------------------- */
116
static const void *
117
closureIdentity( const StgClosure *p )
118 119 120
{
    switch (RtsFlags.ProfFlags.doHeapProfile) {

121
#if defined(PROFILING)
122
    case HEAP_BY_CCS:
123
        return p->header.prof.ccs;
124
    case HEAP_BY_MOD:
125
        return p->header.prof.ccs->cc->module;
126
    case HEAP_BY_DESCR:
127
        return GET_PROF_DESC(get_itbl(p));
128
    case HEAP_BY_TYPE:
129
        return GET_PROF_TYPE(get_itbl(p));
130
    case HEAP_BY_RETAINER:
131 132
        // AFAIK, the only closures in the heap which might not have a
        // valid retainer set are DEAD_WEAK closures.
133
        if (isTravDataValid(p))
134 135 136
            return retainerSetOf(p);
        else
            return NULL;
137
#endif
138

139
    case HEAP_BY_CLOSURE_TYPE:
140
    {
141
        const StgInfoTable *info;
142 143 144 145 146 147 148 149
        info = get_itbl(p);
        switch (info->type) {
        case CONSTR:
        case CONSTR_1_0:
        case CONSTR_0_1:
        case CONSTR_2_0:
        case CONSTR_1_1:
        case CONSTR_0_2:
Simon Marlow's avatar
Simon Marlow committed
150
        case CONSTR_NOCAF:
151 152
            return GET_CON_DESC(itbl_to_con_itbl(info));
        default:
153
            return closure_type_names[info->type];
154 155
        }
    }
156

157
    default:
158
        barf("closureIdentity");
159 160 161
    }
}

162 163 164
/* --------------------------------------------------------------------------
 * Profiling type predicates
 * ----------------------------------------------------------------------- */
165
#if defined(PROFILING)
Ben Gamari's avatar
Ben Gamari committed
166
STATIC_INLINE bool
167
doingLDVProfiling( void )
168
{
169 170
    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
            || RtsFlags.ProfFlags.bioSelector != NULL);
171 172
}

Ben Gamari's avatar
Ben Gamari committed
173
bool
174
doingRetainerProfiling( void )
175
{
176
    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
177
            || RtsFlags.ProfFlags.retainerSelector != NULL);
178
}
179
#endif /* PROFILING */
180

181
// Processes a closure 'c' being destroyed whose size is 'size'.
182 183
// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
// such as TSO; they should not be involved in computing dragNew or voidNew.
184 185 186 187
//
// Even though era is checked in both LdvCensusForDead() and
// LdvCensusKillAll(), we still need to make sure that era is > 0 because
// LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
188
// when a thunk is replaced by an indirection object.
189

190
#if defined(PROFILING)
191
void
192
LDV_recordDead( const StgClosure *c, uint32_t size )
193
{
194
    const void *id;
195
    uint32_t t;
196 197
    counter *ctr;

198
    if (era > 0 && closureSatisfiesConstraints(c)) {
199 200 201 202 203 204
        size -= sizeofW(StgProfHeader);
        ASSERT(LDVW(c) != 0);
        if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
            t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
            if (t < era) {
                if (RtsFlags.ProfFlags.bioSelector == NULL) {
205 206
                    censuses[t].void_total   += size;
                    censuses[era].void_total -= size;
207
                    ASSERT(censuses[t].void_total <= censuses[t].not_used);
208 209 210
                } else {
                    id = closureIdentity(c);
                    ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
211 212 213
                    if (ctr == NULL)
                        barf("LDV_recordDead: Failed to find counter for closure %p", c);

214
                    ctr->c.ldv.void_total += size;
215 216 217 218 219 220 221 222 223
                    ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
                    if (ctr == NULL) {
                        ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
                        initLDVCtr(ctr);
                        insertHashTable(censuses[era].hash, (StgWord)id, ctr);
                        ctr->identity = id;
                        ctr->next = censuses[era].ctrs;
                        censuses[era].ctrs = ctr;
                    }
224
                    ctr->c.ldv.void_total -= size;
225 226 227 228 229 230 231 232 233
                }
            }
        } else {
            t = LDVW((c)) & LDV_LAST_MASK;
            if (t + 1 < era) {
                if (RtsFlags.ProfFlags.bioSelector == NULL) {
                    censuses[t+1].drag_total += size;
                    censuses[era].drag_total -= size;
                } else {
234
                    const void *id;
235 236 237
                    id = closureIdentity(c);
                    ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
                    ASSERT( ctr != NULL );
238
                    ctr->c.ldv.drag_total += size;
239 240 241 242 243 244 245 246 247
                    ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
                    if (ctr == NULL) {
                        ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
                        initLDVCtr(ctr);
                        insertHashTable(censuses[era].hash, (StgWord)id, ctr);
                        ctr->identity = id;
                        ctr->next = censuses[era].ctrs;
                        censuses[era].ctrs = ctr;
                    }
248
                    ctr->c.ldv.drag_total -= size;
249 250 251
                }
            }
        }
252 253
    }
}
254
#endif
255

256 257 258
/* --------------------------------------------------------------------------
 * Initialize censuses[era];
 * ----------------------------------------------------------------------- */
259

260
STATIC_INLINE void
261
initEra(Census *census)
262
{
263 264 265 266 267 268 269 270 271
    census->hash  = allocHashTable();
    census->ctrs  = NULL;
    census->arena = newArena();

    census->not_used   = 0;
    census->used       = 0;
    census->prim       = 0;
    census->void_total = 0;
    census->drag_total = 0;
272 273
}

274 275 276
STATIC_INLINE void
freeEra(Census *census)
{
277 278
    arenaFree(census->arena);
    freeHashTable(census->hash, NULL);
279 280
}

281 282 283 284
/* --------------------------------------------------------------------------
 * Increases era by 1 and initialize census[era].
 * Reallocates gi[] and increases its size if needed.
 * ----------------------------------------------------------------------- */
285

286
static void
287
nextEra( void )
288
{
289
#if defined(PROFILING)
290 291 292 293
    if (doingLDVProfiling()) {
        era++;

        if (era == max_era) {
294
            errorBelch("Maximum number of censuses reached.");
Ben Gamari's avatar
Ben Gamari committed
295
            if (rtsConfig.rts_opts_suggestions == true) {
296 297 298 299 300 301
                if (rtsConfig.rts_opts_enabled == RtsOptsAll)  {
                    errorBelch("Use `+RTS -i' to reduce censuses.");
                } else  {
                    errorBelch("Relink with -rtsopts and "
                               "use `+RTS -i' to reduce censuses.");
                }
302
            }
303 304 305 306 307 308 309 310
            stg_exit(EXIT_FAILURE);
        }

        if (era == n_censuses) {
            n_censuses *= 2;
            censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
                                       "nextEra");
        }
311
    }
312
#endif /* PROFILING */
313

314
    initEra( &censuses[era] );
315 316
}

317 318 319
/* ----------------------------------------------------------------------------
 * Heap profiling by info table
 * ------------------------------------------------------------------------- */
320

321 322 323 324 325 326 327 328 329 330 331 332
static void
printEscapedString(const char* string)
{
    for (const char* p = string; *p != '\0'; ++p) {
        if (*p == '\"') {
            // Escape every " as ""
            fputc('"', hp_file);
        }
        fputc(*p, hp_file);
    }
}

333
static void
Ben Gamari's avatar
Ben Gamari committed
334
printSample(bool beginSample, StgDouble sampleValue)
335
{
336
    fprintf(hp_file, "%s %f\n",
337
            (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
338
            sampleValue);
339 340 341
    if (!beginSample) {
        fflush(hp_file);
    }
342 343
}

344

345 346 347 348
void freeHeapProfiling (void)
{
}

349 350 351
/* --------------------------------------------------------------------------
 * Initialize the heap profilier
 * ----------------------------------------------------------------------- */
352
void
353 354 355
initHeapProfiling(void)
{
    if (! RtsFlags.ProfFlags.doHeapProfile) {
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
        return;
    }

    char *prog;

    prog = stgMallocBytes(strlen(prog_name) + 1, "initHeapProfiling");
    strcpy(prog, prog_name);
#if defined(mingw32_HOST_OS)
    // on Windows, drop the .exe suffix if there is one
    {
        char *suff;
        suff = strrchr(prog,'.');
        if (suff != NULL && !strcmp(suff,".exe")) {
            *suff = '\0';
        }
    }
#endif

  if (RtsFlags.ProfFlags.doHeapProfile) {
    /* Initialise the log file name */
    hp_filename = stgMallocBytes(strlen(prog) + 6, "hpFileName");
    sprintf(hp_filename, "%s.hp", prog);

    /* open the log file */
380
    if ((hp_file = __rts_fopen(hp_filename, "w+")) == NULL) {
381 382 383 384 385
      debugBelch("Can't open profiling report file %s\n",
              hp_filename);
      RtsFlags.ProfFlags.doHeapProfile = 0;
      stgFree(prog);
      return;
386
    }
387 388 389
  }

  stgFree(prog);
390

391
#if defined(PROFILING)
392
    if (doingLDVProfiling() && doingRetainerProfiling()) {
393 394
        errorBelch("cannot mix -hb and -hr");
        stg_exit(EXIT_FAILURE);
395
    }
396
#if defined(THREADED_RTS)
397
    // See #12019.
398 399 400 401 402
    if (doingLDVProfiling() && RtsFlags.ParFlags.nCapabilities > 1) {
        errorBelch("-hb cannot be used with multiple capabilities");
        stg_exit(EXIT_FAILURE);
    }
#endif
403 404
#endif

405 406
    // we only count eras if we're doing LDV profiling.  Otherwise era
    // is fixed at zero.
407
#if defined(PROFILING)
408
    if (doingLDVProfiling()) {
409
        era = 1;
410 411 412
    } else
#endif
    {
413
        era = 0;
414 415
    }

Ian Lynagh's avatar
Ian Lynagh committed
416
    // max_era = 2^LDV_SHIFT
417
    max_era = 1 << LDV_SHIFT;
418 419 420 421

    n_censuses = 32;
    censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");

422 423
    initEra( &censuses[era] );

424
    /* initProfilingLogFile(); */
425 426
    fprintf(hp_file, "JOB \"");
    printEscapedString(prog_name);
427

428
#if defined(PROFILING)
429 430 431 432 433 434 435 436
    for (int i = 1; i < prog_argc; ++i) {
        fputc(' ', hp_file);
        printEscapedString(prog_argv[i]);
    }
    fprintf(hp_file, " +RTS");
    for (int i = 0; i < rts_argc; ++i) {
        fputc(' ', hp_file);
        printEscapedString(rts_argv[i]);
437
    }
438
#endif /* PROFILING */
439 440 441

    fprintf(hp_file, "\"\n" );

442
    fprintf(hp_file, "DATE \"%s\"\n", time_str());
443

444 445
    fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
    fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
446

Ben Gamari's avatar
Ben Gamari committed
447 448
    printSample(true, 0);
    printSample(false, 0);
449

450
#if defined(PROFILING)
451
    if (doingRetainerProfiling()) {
452
        initRetainerProfiling();
453
    }
454 455
#endif

456
    traceHeapProfBegin(0);
457 458 459 460 461 462 463 464 465 466 467
}

void
endHeapProfiling(void)
{
    StgDouble seconds;

    if (! RtsFlags.ProfFlags.doHeapProfile) {
        return;
    }

468
#if defined(PROFILING)
469
    if (doingRetainerProfiling()) {
470
        endRetainerProfiling();
471 472 473
    }
#endif

474
#if defined(PROFILING)
475
    if (doingLDVProfiling()) {
476
        uint32_t t;
477 478 479 480 481
        LdvCensusKillAll();
        aggregateCensusInfo();
        for (t = 1; t < era; t++) {
            dumpCensus( &censuses[t] );
        }
482
    }
483
#endif
484

485
#if defined(PROFILING)
486
    if (doingLDVProfiling()) {
487
        uint32_t t;
488 489 490 491 492 493
        if (RtsFlags.ProfFlags.bioSelector != NULL) {
            for (t = 1; t <= era; t++) {
                freeEra( &censuses[t] );
            }
        } else {
            freeEra( &censuses[era] );
494
        }
495 496
    } else {
        freeEra( &censuses[0] );
497
    }
498 499 500 501
#else
    freeEra( &censuses[0] );
#endif

502 503
    stgFree(censuses);

504
    seconds = mut_user_time();
Ben Gamari's avatar
Ben Gamari committed
505 506
    printSample(true, seconds);
    printSample(false, seconds);
507
    fclose(hp_file);
508 509 510 511
}



512
#if defined(PROFILING)
513 514 515 516 517 518
static size_t
buf_append(char *p, const char *q, char *end)
{
    int m;

    for (m = 0; p < end; p++, q++, m++) {
519 520
        *p = *q;
        if (*q == '\0') { break; }
521 522 523 524
    }
    return m;
}

525
static void
526
fprint_ccs(FILE *fp, CostCentreStack *ccs, uint32_t max_length)
527
{
528
    char buf[max_length+1], *p, *buf_end;
529 530 531

    // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
    if (ccs == CCS_MAIN) {
532 533
        fprintf(fp, "MAIN");
        return;
534
    }
535

thomie's avatar
thomie committed
536
    fprintf(fp, "(%" FMT_Int ")", ccs->ccsID);
537

538 539 540
    p = buf;
    buf_end = buf + max_length + 1;

541 542 543
    // keep printing components of the stack until we run out of space
    // in the buffer.  If we run out of space, end with "...".
    for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
544

545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
        // CAF cost centres print as M.CAF, but we leave the module
        // name out of all the others to save space.
        if (!strcmp(ccs->cc->label,"CAF")) {
            p += buf_append(p, ccs->cc->module, buf_end);
            p += buf_append(p, ".CAF", buf_end);
        } else {
            p += buf_append(p, ccs->cc->label, buf_end);
            if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
                p += buf_append(p, "/", buf_end);
            }
        }

        if (p >= buf_end) {
            sprintf(buf+max_length-4, "...");
            break;
        }
561 562
    }
    fprintf(fp, "%s", buf);
563 564
}

Ben Gamari's avatar
Ben Gamari committed
565
bool
Ben Gamari's avatar
Ben Gamari committed
566
strMatchesSelector( const char* str, const char* sel )
567
{
Ben Gamari's avatar
Ben Gamari committed
568
   const char* p;
569
   // debugBelch("str_matches_selector %s %s\n", str, sel);
570
   while (1) {
571 572 573
       // Compare str against wherever we've got to in sel.
       p = str;
       while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
574
           p++; sel++;
575 576 577
       }
       // Match if all of str used and have reached the end of a sel fragment.
       if (*p == '\0' && (*sel == ',' || *sel == '\0'))
Ben Gamari's avatar
Ben Gamari committed
578
           return true;
579

580 581 582
       // No match.  Advance sel to the start of the next elem.
       while (*sel != ',' && *sel != '\0') sel++;
       if (*sel == ',') sel++;
583

584
       /* Run out of sel ?? */
Ben Gamari's avatar
Ben Gamari committed
585
       if (*sel == '\0') return false;
586 587 588
   }
}

589 590
#endif /* PROFILING */

591 592 593 594
/* -----------------------------------------------------------------------------
 * Figure out whether a closure should be counted in this census, by
 * testing against all the specified constraints.
 * -------------------------------------------------------------------------- */
Ben Gamari's avatar
Ben Gamari committed
595
static bool
596
closureSatisfiesConstraints( const StgClosure* p )
597
{
598
#if !defined(PROFILING)
599
    (void)p;   /* keep gcc -Wall happy */
Ben Gamari's avatar
Ben Gamari committed
600
    return true;
601
#else
Ben Gamari's avatar
Ben Gamari committed
602
   bool b;
603 604 605 606 607

   // The CCS has a selected field to indicate whether this closure is
   // deselected by not being mentioned in the module, CC, or CCS
   // selectors.
   if (!p->header.prof.ccs->selected) {
Ben Gamari's avatar
Ben Gamari committed
608
       return false;
609
   }
610

611
   if (RtsFlags.ProfFlags.descrSelector) {
Simon Marlow's avatar
Simon Marlow committed
612
       b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
613
                                 RtsFlags.ProfFlags.descrSelector );
Ben Gamari's avatar
Ben Gamari committed
614
       if (!b) return false;
615 616
   }
   if (RtsFlags.ProfFlags.typeSelector) {
Simon Marlow's avatar
Simon Marlow committed
617
       b = strMatchesSelector( (GET_PROF_TYPE(get_itbl((StgClosure *)p))),
618
                                RtsFlags.ProfFlags.typeSelector );
Ben Gamari's avatar
Ben Gamari committed
619
       if (!b) return false;
620
   }
621 622
   if (RtsFlags.ProfFlags.retainerSelector) {
       RetainerSet *rs;
623
       uint32_t i;
624 625 626 627
       // We must check that the retainer set is valid here.  One
       // reason it might not be valid is if this closure is a
       // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
       // these aren't reached by the retainer profiler's traversal.
628
       if (isTravDataValid((StgClosure *)p)) {
629 630 631 632 633
           rs = retainerSetOf((StgClosure *)p);
           if (rs != NULL) {
               for (i = 0; i < rs->num; i++) {
                   b = strMatchesSelector( rs->element[i]->cc->label,
                                           RtsFlags.ProfFlags.retainerSelector );
Ben Gamari's avatar
Ben Gamari committed
634
                   if (b) return true;
635 636
               }
           }
637
       }
Ben Gamari's avatar
Ben Gamari committed
638
       return false;
639
   }
Ben Gamari's avatar
Ben Gamari committed
640
   return true;
641
#endif /* PROFILING */
642
}
643

644 645 646
/* -----------------------------------------------------------------------------
 * Aggregate the heap census info for biographical profiling
 * -------------------------------------------------------------------------- */
647
#if defined(PROFILING)
648 649 650 651
static void
aggregateCensusInfo( void )
{
    HashTable *acc;
652
    uint32_t t;
653 654 655 656 657 658 659
    counter *c, *d, *ctrs;
    Arena *arena;

    if (!doingLDVProfiling()) return;

    // Aggregate the LDV counters when displaying by biography.
    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
660
        long void_total, drag_total;
661

662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
        // Now we compute void_total and drag_total for each census
        // After the program has finished, the void_total field of
        // each census contains the count of words that were *created*
        // in this era and were eventually void.  Conversely, if a
        // void closure was destroyed in this era, it will be
        // represented by a negative count of words in void_total.
        //
        // To get the count of live words that are void at each
        // census, just propagate the void_total count forwards:

        void_total = 0;
        drag_total = 0;
        for (t = 1; t < era; t++) { // note: start at 1, not 0
            void_total += censuses[t].void_total;
            drag_total += censuses[t].drag_total;
            censuses[t].void_total = void_total;
            censuses[t].drag_total = drag_total;

            ASSERT( censuses[t].void_total <= censuses[t].not_used );
            // should be true because: void_total is the count of
            // live words that are void at this census, which *must*
            // be less than the number of live words that have not
            // been used yet.

            ASSERT( censuses[t].drag_total <= censuses[t].used );
            // similar reasoning as above.
        }

        return;
691 692 693 694 695 696 697 698 699 700 701 702 703
    }

    // otherwise... we're doing a heap profile that is restricted to
    // some combination of lag, drag, void or use.  We've kept all the
    // census info for all censuses so far, but we still need to
    // aggregate the counters forwards.

    arena = newArena();
    acc = allocHashTable();
    ctrs = NULL;

    for (t = 1; t < era; t++) {

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
        // first look through all the counters we're aggregating
        for (c = ctrs; c != NULL; c = c->next) {
            // if one of the totals is non-zero, then this closure
            // type must be present in the heap at this census time...
            d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);

            if (d == NULL) {
                // if this closure identity isn't present in the
                // census for this time period, then our running
                // totals *must* be zero.
                ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);

                // debugCCS(c->identity);
                // debugBelch(" census=%d void_total=%d drag_total=%d\n",
                //         t, c->c.ldv.void_total, c->c.ldv.drag_total);
            } else {
                d->c.ldv.void_total += c->c.ldv.void_total;
                d->c.ldv.drag_total += c->c.ldv.drag_total;
                c->c.ldv.void_total =  d->c.ldv.void_total;
                c->c.ldv.drag_total =  d->c.ldv.drag_total;

                ASSERT( c->c.ldv.void_total >= 0 );
                ASSERT( c->c.ldv.drag_total >= 0 );
            }
        }

        // now look through the counters in this census to find new ones
        for (c = censuses[t].ctrs; c != NULL; c = c->next) {
            d = lookupHashTable(acc, (StgWord)c->identity);
            if (d == NULL) {
                d = arenaAlloc( arena, sizeof(counter) );
                initLDVCtr(d);
                insertHashTable( acc, (StgWord)c->identity, d );
                d->identity = c->identity;
                d->next = ctrs;
                ctrs = d;
                d->c.ldv.void_total = c->c.ldv.void_total;
                d->c.ldv.drag_total = c->c.ldv.drag_total;
            }
            ASSERT( c->c.ldv.void_total >= 0 );
            ASSERT( c->c.ldv.drag_total >= 0 );
        }
746 747 748 749 750 751 752
    }

    freeHashTable(acc, NULL);
    arenaFree(arena);
}
#endif

753 754 755 756 757 758 759
/* -----------------------------------------------------------------------------
 * Print out the results of a heap census.
 * -------------------------------------------------------------------------- */
static void
dumpCensus( Census *census )
{
    counter *ctr;
760
    ssize_t count;
761

Ben Gamari's avatar
Ben Gamari committed
762
    printSample(true, census->time);
763 764 765 766 767 768 769 770 771


    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
      traceHeapBioProfSampleBegin(era, census->rtime);
    } else {
      traceHeapProfSampleBegin(era);
    }


772

773
#if defined(PROFILING)
774

775 776
    /* change typecast to uint64_t to remove
     * print formatting warning. See #12636 */
777
    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
778 779 780 781 782 783 784 785 786 787 788 789 790
        fprintf(hp_file, "VOID\t%" FMT_Word64 "\n",
                (uint64_t)(census->void_total *
                                     sizeof(W_)));
        fprintf(hp_file, "LAG\t%" FMT_Word64 "\n",
                (uint64_t)((census->not_used - census->void_total) *
                                     sizeof(W_)));
        fprintf(hp_file, "USE\t%" FMT_Word64 "\n",
                (uint64_t)((census->used - census->drag_total) *
                                     sizeof(W_)));
        fprintf(hp_file, "INHERENT_USE\t%" FMT_Word64 "\n",
                (uint64_t)(census->prim * sizeof(W_)));
        fprintf(hp_file, "DRAG\t%" FMT_Word64 "\n",
                (uint64_t)(census->drag_total * sizeof(W_)));
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807


        // Eventlog
        traceHeapProfSampleString(0, "VOID",
                (census->void_total * sizeof(W_)));
        traceHeapProfSampleString(0, "LAG",
                ((census->not_used - census->void_total) *
                                     sizeof(W_)));
        traceHeapProfSampleString(0, "USE",
                ((census->used - census->drag_total) *
                                     sizeof(W_)));
        traceHeapProfSampleString(0, "INHERENT_USE",
                (census->prim * sizeof(W_)));
        traceHeapProfSampleString(0, "DRAG",
                (census->drag_total * sizeof(W_)));

        traceHeapProfSampleEnd(era);
Ben Gamari's avatar
Ben Gamari committed
808
        printSample(false, census->time);
809
        return;
810 811 812
    }
#endif

813
    for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
814

815
#if defined(PROFILING)
816 817 818 819 820 821 822 823 824 825 826
        if (RtsFlags.ProfFlags.bioSelector != NULL) {
            count = 0;
            if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
                count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
            if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
                count += ctr->c.ldv.drag_total;
            if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
                count += ctr->c.ldv.void_total;
            if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
                count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
        } else
827
#endif
828 829 830
        {
            count = ctr->c.resid;
        }
831

832
        ASSERT( count >= 0 );
833

834
        if (count == 0) continue;
835

836 837 838
        switch (RtsFlags.ProfFlags.doHeapProfile) {
        case HEAP_BY_CLOSURE_TYPE:
            fprintf(hp_file, "%s", (char *)ctr->identity);
839 840
            traceHeapProfSampleString(0, (char *)ctr->identity,
                                      count * sizeof(W_));
841
            break;
842
#if defined(PROFILING)
843
        case HEAP_BY_CCS:
844 845 846 847
            fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
                       RtsFlags.ProfFlags.ccsLength);
            traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
                                          count * sizeof(W_));
848 849 850 851 852
            break;
        case HEAP_BY_MOD:
        case HEAP_BY_DESCR:
        case HEAP_BY_TYPE:
            fprintf(hp_file, "%s", (char *)ctr->identity);
853 854
            traceHeapProfSampleString(0, (char *)ctr->identity,
                                      count * sizeof(W_));
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
            break;
        case HEAP_BY_RETAINER:
        {
            RetainerSet *rs = (RetainerSet *)ctr->identity;

            // it might be the distinguished retainer set rs_MANY:
            if (rs == &rs_MANY) {
                fprintf(hp_file, "MANY");
                break;
            }

            // Mark this retainer set by negating its id, because it
            // has appeared in at least one census.  We print the
            // values of all such retainer sets into the log file at
            // the end.  A retainer set may exist but not feature in
            // any censuses if it arose as the intermediate retainer
            // set for some closure during retainer set calculation.
            if (rs->id > 0)
                rs->id = -(rs->id);

            // report in the unit of bytes: * sizeof(StgWord)
876 877
            printRetainerSetShort(hp_file, rs, (W_)count * sizeof(W_)
                                             , RtsFlags.ProfFlags.ccsLength);
878 879
            break;
        }
880
#endif
881 882 883
        default:
            barf("dumpCensus; doHeapProfile");
        }
884

885
        fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
886 887
    }

888
    traceHeapProfSampleEnd(era);
Ben Gamari's avatar
Ben Gamari committed
889
    printSample(false, census->time);
890 891
}

892

893
static void heapProfObject(Census *census, StgClosure *p, size_t size,
Ben Gamari's avatar
Ben Gamari committed
894
                           bool prim
895
#if !defined(PROFILING)
896 897 898 899
                           STG_UNUSED
#endif
                           )
{
900
    const void *identity;
901
    size_t real_size;
902 903 904 905
    counter *ctr;

            identity = NULL;

906
#if defined(PROFILING)
907 908
            // subtract the profiling overhead
            real_size = size - sizeofW(StgProfHeader);
909
#else
910
            real_size = size;
911 912
#endif

913
            if (closureSatisfiesConstraints((StgClosure*)p)) {
914
#if defined(PROFILING)
915 916 917 918 919 920 921 922
                if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
                    if (prim)
                        census->prim += real_size;
                    else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
                        census->not_used += real_size;
                    else
                        census->used += real_size;
                } else
923
#endif
924 925
                {
                    identity = closureIdentity((StgClosure *)p);
926

927
                    if (identity != NULL) {
928
                        ctr = lookupHashTable(census->hash, (StgWord)identity);
929
                        if (ctr != NULL) {
930
#if defined(PROFILING)
931 932 933 934 935 936 937 938
                            if (RtsFlags.ProfFlags.bioSelector != NULL) {
                                if (prim)
                                    ctr->c.ldv.prim += real_size;
                                else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
                                    ctr->c.ldv.not_used += real_size;
                                else
                                    ctr->c.ldv.used += real_size;
                            } else
939
#endif
940 941 942 943 944 945 946 947 948 949
                            {
                                ctr->c.resid += real_size;
                            }
                        } else {
                            ctr = arenaAlloc( census->arena, sizeof(counter) );
                            initLDVCtr(ctr);
                            insertHashTable( census->hash, (StgWord)identity, ctr );
                            ctr->identity = identity;
                            ctr->next = census->ctrs;
                            census->ctrs = ctr;
950

951
#if defined(PROFILING)
952 953 954 955 956 957 958 959
                            if (RtsFlags.ProfFlags.bioSelector != NULL) {
                                if (prim)
                                    ctr->c.ldv.prim = real_size;
                                else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
                                    ctr->c.ldv.not_used = real_size;
                                else
                                    ctr->c.ldv.used = real_size;
                            } else
960
#endif
961 962 963 964 965 966 967
                            {
                                ctr->c.resid = real_size;
                            }
                        }
                    }
                }
            }
968 969
}

gcampax's avatar
gcampax committed
970 971 972 973 974 975 976 977 978 979 980 981 982 983
// Compact objects require special handling code because they
// are not stored consecutively in memory (rather, each object
// is a list of objects), and that would break the while loop
// below. But we know that each block holds at most one object
// so we don't need the loop.
//
// See Note [Compact Normal Forms] for details.
static void
heapCensusCompactList(Census *census, bdescr *bd)
{
    for (; bd != NULL; bd = bd->link) {
        StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start;
        StgCompactNFData *str = block->owner;
        heapProfObject(census, (StgClosure*)str,
Ben Gamari's avatar
Ben Gamari committed
984
                       compact_nfdata_full_sizeW(str), true);
gcampax's avatar
gcampax committed
985 986 987
    }
}

988 989 990 991
/* -----------------------------------------------------------------------------
 * Code to perform a heap census.
 * -------------------------------------------------------------------------- */
static void
992
heapCensusChain( Census *census, bdescr *bd )
993 994
{
    StgPtr p;
995
    const StgInfoTable *info;
996
    size_t size;
Ben Gamari's avatar
Ben Gamari committed
997
    bool prim;
998

999
    for (; bd != NULL; bd = bd->link) {
1000

1001
        // HACK: pretend a pinned block is just one big ARR_WORDS
1002
        // owned by CCS_PINNED.  These blocks can be full of holes due
1003 1004 1005 1006
        // to alignment constraints so we can't traverse the memory
        // and do a proper census.
        if (bd->flags & BF_PINNED) {
            StgClosure arr;
1007
            SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED);
Ben Gamari's avatar
Ben Gamari committed
1008
            heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, true);
1009 1010
            continue;
        }
1011

1012
        p = bd->start;
1013

1014
        while (p < bd->free) {
1015
            info = get_itbl((const StgClosure *)p);
Ben Gamari's avatar
Ben Gamari committed
1016
            prim = false;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043

            switch (info->type) {

            case THUNK:
                size = thunk_sizeW_fromITBL(info);
                break;

            case THUNK_1_1:
            case THUNK_0_2:
            case THUNK_2_0:
                size = sizeofW(StgThunkHeader) + 2;
                break;

            case THUNK_1_0:
            case THUNK_0_1:
            case THUNK_SELECTOR:
                size = sizeofW(StgThunkHeader) + 1;
                break;

            case FUN:
            case BLACKHOLE:
            case BLOCKING_QUEUE:
            case FUN_1_0:
            case FUN_0_1:
            case FUN_1_1:
            case FUN_0_2:
            case FUN_2_0:
Simon Marlow's avatar
Simon Marlow committed
1044 1045
            case CONSTR:
            case CONSTR_NOCAF:
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
            case CONSTR_1_0:
            case CONSTR_0_1:
            case CONSTR_1_1:
            case CONSTR_0_2:
            case CONSTR_2_0:
                size = sizeW_fromITBL(info);
                break;

            case IND:
                // Special case/Delicate Hack: INDs don't normally
                // appear, since we're doing this heap census right
                // after GC.  However, GarbageCollect() also does
                // resurrectThreads(), which can update some
                // blackholes when it calls raiseAsync() on the
                // resurrected threads.  So we know that any IND will
                // be the size of a BLACKHOLE.
                size = BLACKHOLE_sizeW();
                break;

            case BCO:
Ben Gamari's avatar
Ben Gamari committed
1066
                prim = true;
1067 1068
                size = bco_sizeW((StgBCO *)p);
                break;
1069

1070 1071
            case MVAR_CLEAN:
            case MVAR_DIRTY:
1072 1073
            case TVAR:
            case WEAK:
1074 1075 1076 1077
            case PRIM:
            case MUT_PRIM:
            case MUT_VAR_CLEAN:
            case MUT_VAR_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1078
                prim = true;
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
                size = sizeW_fromITBL(info);
                break;

            case AP:
                size = ap_sizeW((StgAP *)p);
                break;

            case PAP:
                size = pap_sizeW((StgPAP *)p);
                break;

            case AP_STACK:
                size = ap_stack_sizeW((StgAP_STACK *)p);
                break;

            case ARR_WORDS:
Ben Gamari's avatar
Ben Gamari committed
1095
                prim = true;
1096
                size = arr_words_sizeW((StgArrBytes*)p);
1097 1098 1099 1100
                break;

            case MUT_ARR_PTRS_CLEAN:
            case MUT_ARR_PTRS_DIRTY:
1101 1102
            case MUT_ARR_PTRS_FROZEN_CLEAN:
            case MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1103
                prim = true;
1104 1105 1106 1107 1108
                size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
                break;

            case SMALL_MUT_ARR_PTRS_CLEAN:
            case SMALL_MUT_ARR_PTRS_DIRTY:
1109 1110
            case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
            case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
Ben Gamari's avatar
Ben Gamari committed
1111
                prim = true;
1112 1113 1114 1115
                size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
                break;

            case TSO:
Ben Gamari's avatar
Ben Gamari committed
1116
                prim = true;
1117
#if defined(PROFILING)
1118
                if (RtsFlags.ProfFlags.includeTSOs) {
1119
                    size = sizeofW(StgTSO);
1120 1121 1122
                    break;
                } else {
                    // Skip this TSO and move on to the next object
1123
                    p += sizeofW(StgTSO);
1124 1125
                    continue;
                }
1126
#else
1127
                size = sizeofW(StgTSO);
1128
                break;
1129 1130
#endif

1131
            case STACK:
Ben Gamari's avatar
Ben Gamari committed
1132
                prim = true;
1133
#if defined(PROFILING)
1134
                if (RtsFlags.ProfFlags.includeTSOs) {
1135 1136
                    size = stack_sizeW((StgStack*)p);
                    break;
1137 1138
                } else {
                    // Skip this TSO and move on to the next object
1139
                    p += stack_sizeW((StgStack*)p);
1140 1141
                    continue;
                }
1142 1143
#else
                size = stack_sizeW((StgStack*)p);
1144
                break;
1145 1146 1147
#endif

            case TREC_CHUNK:
Ben Gamari's avatar
Ben Gamari committed
1148
                prim = true;
1149 1150 1151
                size = sizeofW(StgTRecChunk);
                break;

gcampax's avatar
gcampax committed
1152 1153 1154 1155
            case COMPACT_NFDATA:
                barf("heapCensus, found compact object in the wrong list");
                break;

1156 1157 1158 1159
            default:
                barf("heapCensus, unknown object: %d", info->type);
            }

1160
            heapProfObject(census,(StgClosure*)p,size,prim);
1161

1162
            p += size;
1163 1164
            /* skip over slop */
            while (p < bd->free && !*p) p++; // skip slop
1165
        }
1166 1167
    }
}
1168

Simon Marlow's avatar
Simon Marlow committed
1169
void heapCensus (Time t)
1170
{
1171
  uint32_t g, n;
1172
  Census *census;
1173
  gen_workspace *ws;
1174 1175

  census = &censuses[era];
Ian Lynagh's avatar
Ian Lynagh committed
1176
  census->time  = mut_user_time_until(t);
1177 1178
  census->rtime = TimeToNS(stat_getElapsedTime());

1179

1180
  // calculate retainer sets if necessary
1181
#if defined(PROFILING)
1182 1183
  if (doingRetainerProfiling()) {
      retainerProfile();
1184 1185 1186
  }
#endif

1187
#if defined(PROFILING)
1188
  stat_startHeapCensus();
1189
#endif
1190

1191
  // Traverse the heap, collecting the census info
1192 1193 1194 1195 1196
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
      heapCensusChain( census, generations[g].blocks );
      // Are we interested in large objects?  might be
      // confusing to include the stack in a heap profile.
      heapCensusChain( census, generations[g].large_objects );
gcampax's avatar
gcampax committed
1197
      heapCensusCompactList ( census, generations[g].compact_objects );
1198 1199 1200 1201 1202 1203 1204

      for (n = 0; n < n_capabilities; n++) {
          ws = &gc_threads[n]->gens[g];
          heapCensusChain(census, ws->todo_bd);
          heapCensusChain(census, ws->part_list);
          heapCensusChain(census, ws->scavd_list);
      }
1205 1206
  }

1207
  // dump out the census info
1208
#if defined(PROFILING)
1209 1210 1211
    // We can't generate any info for LDV profiling until
    // the end of the run...
    if (!doingLDVProfiling())
1212
        dumpCensus( census );
1213 1214 1215
#else
    dumpCensus( census );
#endif
1216

1217 1218 1219

  // free our storage, unless we're keeping all the census info for
  // future restriction by biography.
1220
#if defined(PROFILING)
1221 1222
  if (RtsFlags.ProfFlags.bioSelector == NULL)
  {
1223
      freeEra(census);
1224 1225 1226
      census->hash = NULL;
      census->arena = NULL;
  }
1227
#endif
1228 1229 1230

  // we're into the next time period now
  nextEra();
1231

1232
#if defined(PROFILING)
1233
  stat_endHeapCensus();
1234
#endif
1235
}