ProfHeap.c 28.4 KB
Newer Older
1
/* ----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2003
4 5 6
 *
 * Support for heap profiling
 *
7
 * --------------------------------------------------------------------------*/
8

9
#include "PosixSource.h"
10
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
11

12
#include "RtsUtils.h"
13
#include "Profiling.h"
14 15
#include "ProfHeap.h"
#include "Stats.h"
16
#include "Hash.h"
17
#include "RetainerProfile.h"
Simon Marlow's avatar
Simon Marlow committed
18
#include "LdvProfile.h"
19
#include "Arena.h"
20 21
#include "Printer.h"

22 23
#include <string.h>

24
/* -----------------------------------------------------------------------------
25 26
 * era stores the current time period.  It is the same as the
 * number of censuses that have been performed.
27
 *
28 29 30
 * RESTRICTION:
 *   era must be no longer than LDV_SHIFT (15 or 30) bits.
 * Invariants:
31
 *   era is initialized to 1 in initHeapProfiling().
32
 *
33 34 35
 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
 * When era reaches max_era, the profiling stops because a closure can
 * store only up to (max_era - 1) as its creation or last use time.
36
 * -------------------------------------------------------------------------- */
37
unsigned int era;
38
static nat max_era;
39

40
/* -----------------------------------------------------------------------------
41 42 43 44 45 46 47
 * Counters
 *
 * For most heap profiles each closure identity gets a simple count
 * of live words in the heap at each census.  However, if we're
 * selecting by biography, then we have to keep the various
 * lag/drag/void counters for each identity.
 * -------------------------------------------------------------------------- */
48 49 50 51 52 53
typedef struct _counter {
    void *identity;
    union {
	nat resid;
	struct {
	    int prim;     // total size of 'inherently used' closures
54
	    int not_used; // total size of 'never used' closures
55
	    int used;     // total size of 'used at least once' closures
56 57
	    int void_total;  // current total size of 'destroyed without being used' closures
	    int drag_total;  // current total size of 'used at least once and waiting to die'
58 59 60 61
	} ldv;
    } c;
    struct _counter *next;
} counter;
62

sof's avatar
sof committed
63
STATIC_INLINE void
64 65 66 67 68 69 70 71 72
initLDVCtr( counter *ctr )
{
    ctr->c.ldv.prim = 0;
    ctr->c.ldv.not_used = 0;
    ctr->c.ldv.used = 0;
    ctr->c.ldv.void_total = 0;
    ctr->c.ldv.drag_total = 0;
}

73 74 75 76 77 78 79 80 81 82 83 84 85 86
typedef struct {
    double      time;    // the time in MUT time when the census is made
    HashTable * hash;
    counter   * ctrs;
    Arena     * arena;

    // for LDV profiling, when just displaying by LDV
    int       prim;
    int       not_used;
    int       used;
    int       void_total;
    int       drag_total;
} Census;

87 88
static Census *censuses = NULL;
static nat n_censuses = 0;
89

90 91 92 93 94 95
#ifdef PROFILING
static void aggregateCensusInfo( void );
#endif

static void dumpCensus( Census *census );

Simon Marlow's avatar
Simon Marlow committed
96 97
static rtsBool closureSatisfiesConstraints( StgClosure* p );

98
/* ----------------------------------------------------------------------------
99 100 101 102
 * Find the "closure identity", which is a unique pointer reresenting
 * the band to which this closure's heap space is attributed in the
 * heap profile.
 * ------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
103
static void *
104 105 106 107 108 109
closureIdentity( StgClosure *p )
{
    switch (RtsFlags.ProfFlags.doHeapProfile) {

#ifdef PROFILING
    case HEAP_BY_CCS:
110
	return p->header.prof.ccs;
111
    case HEAP_BY_MOD:
112
	return p->header.prof.ccs->cc->module;
113
    case HEAP_BY_DESCR:
Simon Marlow's avatar
Simon Marlow committed
114
	return GET_PROF_DESC(get_itbl(p));
115
    case HEAP_BY_TYPE:
Simon Marlow's avatar
Simon Marlow committed
116
	return GET_PROF_TYPE(get_itbl(p));
117
    case HEAP_BY_RETAINER:
118 119 120 121 122 123 124
	// AFAIK, the only closures in the heap which might not have a
	// valid retainer set are DEAD_WEAK closures.
	if (isRetainerSetFieldValid(p))
	    return retainerSetOf(p);
	else
	    return NULL;

125
#else
126
    case HEAP_BY_CLOSURE_TYPE:
127 128 129 130 131 132 133 134 135 136 137 138 139 140
    {
        StgInfoTable *info;
        info = get_itbl(p);
        switch (info->type) {
        case CONSTR:
        case CONSTR_1_0:
        case CONSTR_0_1:
        case CONSTR_2_0:
        case CONSTR_1_1:
        case CONSTR_0_2:
        case CONSTR_STATIC:
        case CONSTR_NOCAF_STATIC:
            return GET_CON_DESC(itbl_to_con_itbl(info));
        default:
141
            return closure_type_names[info->type];
142 143
        }
    }
144

145 146 147 148 149 150
#endif
    default:
	barf("closureIdentity");
    }
}

151 152 153 154
/* --------------------------------------------------------------------------
 * Profiling type predicates
 * ----------------------------------------------------------------------- */
#ifdef PROFILING
sof's avatar
sof committed
155
STATIC_INLINE rtsBool
156
doingLDVProfiling( void )
157
{
158 159
    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV 
	    || RtsFlags.ProfFlags.bioSelector != NULL);
160 161
}

sof's avatar
sof committed
162
STATIC_INLINE rtsBool
163
doingRetainerProfiling( void )
164
{
165 166 167
    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
	    || RtsFlags.ProfFlags.retainerSelector != NULL);
}
168
#endif /* PROFILING */
169

170 171 172 173 174 175 176 177
// Precesses a closure 'c' being destroyed whose size is 'size'.
// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
// such as TSO; they should not be involved in computing dragNew or voidNew.
// 
// Even though era is checked in both LdvCensusForDead() and 
// LdvCensusKillAll(), we still need to make sure that era is > 0 because 
// LDV_recordDead() may be called from elsewhere in the runtime system. E.g., 
// when a thunk is replaced by an indirection object.
178

179 180 181 182
#ifdef PROFILING
void
LDV_recordDead( StgClosure *c, nat size )
{
183 184 185 186
    void *id;
    nat t;
    counter *ctr;

187 188
    if (era > 0 && closureSatisfiesConstraints(c)) {
	size -= sizeofW(StgProfHeader);
189
	ASSERT(LDVW(c) != 0);
190 191 192
	if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
	    t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
	    if (t < era) {
193 194 195
		if (RtsFlags.ProfFlags.bioSelector == NULL) {
		    censuses[t].void_total   += (int)size;
		    censuses[era].void_total -= (int)size;
196
		    ASSERT(censuses[t].void_total < censuses[t].not_used);
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
		} else {
		    id = closureIdentity(c);
		    ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
		    ASSERT( ctr != NULL );
		    ctr->c.ldv.void_total += (int)size;
		    ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
		    if (ctr == NULL) {
			ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
			initLDVCtr(ctr);
			insertHashTable(censuses[era].hash, (StgWord)id, ctr);
			ctr->identity = id;
			ctr->next = censuses[era].ctrs;
			censuses[era].ctrs = ctr;
		    }
		    ctr->c.ldv.void_total -= (int)size;
		}
213 214
	    }
	} else {
215 216
	    t = LDVW((c)) & LDV_LAST_MASK;
	    if (t + 1 < era) {
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
		if (RtsFlags.ProfFlags.bioSelector == NULL) {
		    censuses[t+1].drag_total += size;
		    censuses[era].drag_total -= size;
		} else {
		    void *id;
		    id = closureIdentity(c);
		    ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
		    ASSERT( ctr != NULL );
		    ctr->c.ldv.drag_total += (int)size;
		    ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
		    if (ctr == NULL) {
			ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
			initLDVCtr(ctr);
			insertHashTable(censuses[era].hash, (StgWord)id, ctr);
			ctr->identity = id;
			ctr->next = censuses[era].ctrs;
			censuses[era].ctrs = ctr;
		    }
		    ctr->c.ldv.drag_total -= (int)size;
		}
237
	    }
238 239 240
	}
    }
}
241
#endif
242

243 244 245
/* --------------------------------------------------------------------------
 * Initialize censuses[era];
 * ----------------------------------------------------------------------- */
246

sof's avatar
sof committed
247
STATIC_INLINE void
248
initEra(Census *census)
249
{
250 251 252 253 254 255 256 257 258
    census->hash  = allocHashTable();
    census->ctrs  = NULL;
    census->arena = newArena();

    census->not_used   = 0;
    census->used       = 0;
    census->prim       = 0;
    census->void_total = 0;
    census->drag_total = 0;
259 260
}

261 262 263
STATIC_INLINE void
freeEra(Census *census)
{
264 265 266 267 268 269
    if (RtsFlags.ProfFlags.bioSelector != NULL)
        // when bioSelector==NULL, these are freed in heapCensus()
    {
        arenaFree(census->arena);
        freeHashTable(census->hash, NULL);
    }
270 271
}

272 273 274 275
/* --------------------------------------------------------------------------
 * Increases era by 1 and initialize census[era].
 * Reallocates gi[] and increases its size if needed.
 * ----------------------------------------------------------------------- */
276

277
static void
278
nextEra( void )
279
{
280 281 282 283 284
#ifdef PROFILING
    if (doingLDVProfiling()) { 
	era++;

	if (era == max_era) {
285
	    errorBelch("maximum number of censuses reached; use +RTS -i to reduce");
286
	    stg_exit(EXIT_FAILURE);
287 288 289 290 291 292
	}
	
	if (era == n_censuses) {
	    n_censuses *= 2;
	    censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
				       "nextEra");
293 294
	}
    }
295
#endif /* PROFILING */
296

297
    initEra( &censuses[era] );
298 299
}

300 301 302
/* ----------------------------------------------------------------------------
 * Heap profiling by info table
 * ------------------------------------------------------------------------- */
303

Simon Marlow's avatar
Simon Marlow committed
304
#if !defined(PROFILING)
305
FILE *hp_file;
306
static char *hp_filename;
307

308
void initProfiling1 (void)
309 310 311
{
}

312
void freeProfiling1 (void)
313 314 315
{
}

316
void initProfiling2 (void)
317
{
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
    char *prog;

    prog = stgMallocBytes(strlen(prog_name) + 1, "initProfiling2");
    strcpy(prog, prog_name);
#ifdef mingw32_HOST_OS
    // on Windows, drop the .exe suffix if there is one
    {
        char *suff;
        suff = strrchr(prog,'.');
        if (suff != NULL && !strcmp(suff,".exe")) {
            *suff = '\0';
        }
    }
#endif

333 334
  if (RtsFlags.ProfFlags.doHeapProfile) {
    /* Initialise the log file name */
335 336
    hp_filename = stgMallocBytes(strlen(prog) + 6, "hpFileName");
    sprintf(hp_filename, "%s.hp", prog);
337 338 339
    
    /* open the log file */
    if ((hp_file = fopen(hp_filename, "w")) == NULL) {
340
      debugBelch("Can't open profiling report file %s\n", 
341 342 343 344 345 346
	      hp_filename);
      RtsFlags.ProfFlags.doHeapProfile = 0;
      return;
    }
  }
  
347 348
  stgFree(prog);

349 350 351 352 353 354 355
  initHeapProfiling();
}

void endProfiling( void )
{
  endHeapProfiling();
}
356
#endif /* !PROFILING */
357

358 359 360 361 362
static void
printSample(rtsBool beginSample, StgDouble sampleValue)
{
    StgDouble fractionalPart, integralPart;
    fractionalPart = modf(sampleValue, &integralPart);
363
    fprintf(hp_file, "%s %" FMT_Word64 ".%02" FMT_Word64 "\n",
364
            (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
365
            (StgWord64)integralPart, (StgWord64)(fractionalPart * 100));
366 367
}

368 369 370
/* --------------------------------------------------------------------------
 * Initialize the heap profilier
 * ----------------------------------------------------------------------- */
371 372 373 374 375 376 377
nat
initHeapProfiling(void)
{
    if (! RtsFlags.ProfFlags.doHeapProfile) {
        return 0;
    }

378 379
#ifdef PROFILING
    if (doingLDVProfiling() && doingRetainerProfiling()) {
380
	errorBelch("cannot mix -hb and -hr");
381
	stg_exit(EXIT_FAILURE);
382 383 384
    }
#endif

385 386 387 388 389 390 391 392 393 394 395
    // we only count eras if we're doing LDV profiling.  Otherwise era
    // is fixed at zero.
#ifdef PROFILING
    if (doingLDVProfiling()) {
	era = 1;
    } else
#endif
    {
	era = 0;
    }

Ian Lynagh's avatar
Ian Lynagh committed
396 397
    // max_era = 2^LDV_SHIFT
	max_era = 1 << LDV_SHIFT;
398 399 400 401

    n_censuses = 32;
    censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");

402 403
    initEra( &censuses[era] );

404
    /* initProfilingLogFile(); */
sof's avatar
sof committed
405
    fprintf(hp_file, "JOB \"%s", prog_name);
406

407 408 409 410 411
#ifdef PROFILING
    {
	int count;
	for(count = 1; count < prog_argc; count++)
	    fprintf(hp_file, " %s", prog_argv[count]);
412
	fprintf(hp_file, " +RTS");
413
	for(count = 0; count < rts_argc; count++)
414
	    fprintf(hp_file, " %s", rts_argv[count]);
415
    }
416
#endif /* PROFILING */
417 418 419

    fprintf(hp_file, "\"\n" );

420
    fprintf(hp_file, "DATE \"%s\"\n", time_str());
421

422 423
    fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
    fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
424

425 426
    printSample(rtsTrue, 0);
    printSample(rtsFalse, 0);
427

428
#ifdef PROFILING
429 430 431
    if (doingRetainerProfiling()) {
	initRetainerProfiling();
    }
432 433
#endif

434 435 436 437 438 439 440 441 442 443 444 445
    return 0;
}

void
endHeapProfiling(void)
{
    StgDouble seconds;

    if (! RtsFlags.ProfFlags.doHeapProfile) {
        return;
    }

446
#ifdef PROFILING
447
    if (doingRetainerProfiling()) {
448 449 450 451
	endRetainerProfiling();
    }
#endif

452
#ifdef PROFILING
453
    if (doingLDVProfiling()) {
454
	nat t;
455
	LdvCensusKillAll();
456 457 458
	aggregateCensusInfo();
	for (t = 1; t < era; t++) {
	    dumpCensus( &censuses[t] );
459 460
	}
    }
461
#endif
462

463
#ifdef PROFILING
464
    if (doingLDVProfiling()) {
465
        nat t;
466
        for (t = 1; t <= era; t++) {
467 468
            freeEra( &censuses[t] );
        }
469 470
    } else {
        freeEra( &censuses[0] );
471
    }
472 473 474 475
#else
    freeEra( &censuses[0] );
#endif

476 477
    stgFree(censuses);

478
    seconds = mut_user_time();
479 480
    printSample(rtsTrue, seconds);
    printSample(rtsFalse, seconds);
481
    fclose(hp_file);
482 483 484 485 486
}



#ifdef PROFILING
487 488 489 490 491 492 493 494 495 496 497 498
static size_t
buf_append(char *p, const char *q, char *end)
{
    int m;

    for (m = 0; p < end; p++, q++, m++) {
	*p = *q;
	if (*q == '\0') { break; }
    }
    return m;
}

499
static void
500
fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
501
{
502
    char buf[max_length+1], *p, *buf_end;
503 504 505 506 507 508

    // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
    if (ccs == CCS_MAIN) {
	fprintf(fp, "MAIN");
	return;
    }
509

510
    fprintf(fp, "(%ld)", ccs->ccsID);
511

512 513 514
    p = buf;
    buf_end = buf + max_length + 1;

515 516 517
    // keep printing components of the stack until we run out of space
    // in the buffer.  If we run out of space, end with "...".
    for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
518

519 520 521
	// CAF cost centres print as M.CAF, but we leave the module
	// name out of all the others to save space.
	if (!strcmp(ccs->cc->label,"CAF")) {
522 523
	    p += buf_append(p, ccs->cc->module, buf_end);
	    p += buf_append(p, ".CAF", buf_end);
524
	} else {
ravi@bluespec.com's avatar
ravi@bluespec.com committed
525
	    p += buf_append(p, ccs->cc->label, buf_end);
526
	    if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
527
		p += buf_append(p, "/", buf_end);
528 529
	    }
	}
530 531
	
	if (p >= buf_end) {
532 533 534 535 536
	    sprintf(buf+max_length-4, "...");
	    break;
	}
    }
    fprintf(fp, "%s", buf);
537 538
}

539 540
rtsBool
strMatchesSelector( char* str, char* sel )
541 542
{
   char* p;
543
   // debugBelch("str_matches_selector %s %s\n", str, sel);
544
   while (1) {
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
       // Compare str against wherever we've got to in sel.
       p = str;
       while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
	   p++; sel++;
       }
       // Match if all of str used and have reached the end of a sel fragment.
       if (*p == '\0' && (*sel == ',' || *sel == '\0'))
	   return rtsTrue;
       
       // No match.  Advance sel to the start of the next elem.
       while (*sel != ',' && *sel != '\0') sel++;
       if (*sel == ',') sel++;
       
       /* Run out of sel ?? */
       if (*sel == '\0') return rtsFalse;
560 561 562
   }
}

Simon Marlow's avatar
Simon Marlow committed
563 564
#endif /* PROFILING */

565 566 567 568
/* -----------------------------------------------------------------------------
 * Figure out whether a closure should be counted in this census, by
 * testing against all the specified constraints.
 * -------------------------------------------------------------------------- */
Simon Marlow's avatar
Simon Marlow committed
569
static rtsBool
570
closureSatisfiesConstraints( StgClosure* p )
571
{
572
#if !defined(PROFILING)
573
    (void)p;   /* keep gcc -Wall happy */
574 575
    return rtsTrue;
#else
576
   rtsBool b;
577 578 579 580 581 582

   // The CCS has a selected field to indicate whether this closure is
   // deselected by not being mentioned in the module, CC, or CCS
   // selectors.
   if (!p->header.prof.ccs->selected) {
       return rtsFalse;
583
   }
584

585
   if (RtsFlags.ProfFlags.descrSelector) {
Simon Marlow's avatar
Simon Marlow committed
586
       b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
587 588
				 RtsFlags.ProfFlags.descrSelector );
       if (!b) return rtsFalse;
589 590
   }
   if (RtsFlags.ProfFlags.typeSelector) {
Simon Marlow's avatar
Simon Marlow committed
591
       b = strMatchesSelector( (GET_PROF_TYPE(get_itbl((StgClosure *)p))),
592
                                RtsFlags.ProfFlags.typeSelector );
593
       if (!b) return rtsFalse;
594
   }
595 596 597
   if (RtsFlags.ProfFlags.retainerSelector) {
       RetainerSet *rs;
       nat i;
598 599 600 601 602 603 604 605 606 607 608 609
       // We must check that the retainer set is valid here.  One
       // reason it might not be valid is if this closure is a
       // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
       // these aren't reached by the retainer profiler's traversal.
       if (isRetainerSetFieldValid((StgClosure *)p)) {
	   rs = retainerSetOf((StgClosure *)p);
	   if (rs != NULL) {
	       for (i = 0; i < rs->num; i++) {
		   b = strMatchesSelector( rs->element[i]->cc->label,
					   RtsFlags.ProfFlags.retainerSelector );
		   if (b) return rtsTrue;
	       }
610 611 612 613
	   }
       }
       return rtsFalse;
   }
614 615
   return rtsTrue;
#endif /* PROFILING */
616
}
617

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
/* -----------------------------------------------------------------------------
 * Aggregate the heap census info for biographical profiling
 * -------------------------------------------------------------------------- */
#ifdef PROFILING
static void
aggregateCensusInfo( void )
{
    HashTable *acc;
    nat t;
    counter *c, *d, *ctrs;
    Arena *arena;

    if (!doingLDVProfiling()) return;

    // Aggregate the LDV counters when displaying by biography.
    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
	int void_total, drag_total;

	// Now we compute void_total and drag_total for each census
637 638 639 640 641 642 643 644 645
	// After the program has finished, the void_total field of
	// each census contains the count of words that were *created*
	// in this era and were eventually void.  Conversely, if a
	// void closure was destroyed in this era, it will be
	// represented by a negative count of words in void_total.
	//
	// To get the count of live words that are void at each
	// census, just propagate the void_total count forwards:

646 647 648 649 650 651 652
	void_total = 0;
	drag_total = 0;
	for (t = 1; t < era; t++) { // note: start at 1, not 0
	    void_total += censuses[t].void_total;
	    drag_total += censuses[t].drag_total;
	    censuses[t].void_total = void_total;
	    censuses[t].drag_total = drag_total;
653

654
	    ASSERT( censuses[t].void_total <= censuses[t].not_used );
655 656 657 658 659
	    // should be true because: void_total is the count of
	    // live words that are void at this census, which *must*
	    // be less than the number of live words that have not
	    // been used yet.

660
	    ASSERT( censuses[t].drag_total <= censuses[t].used );
661
	    // similar reasoning as above.
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
	}
	
	return;
    }

    // otherwise... we're doing a heap profile that is restricted to
    // some combination of lag, drag, void or use.  We've kept all the
    // census info for all censuses so far, but we still need to
    // aggregate the counters forwards.

    arena = newArena();
    acc = allocHashTable();
    ctrs = NULL;

    for (t = 1; t < era; t++) {

	// first look through all the counters we're aggregating
	for (c = ctrs; c != NULL; c = c->next) {
	    // if one of the totals is non-zero, then this closure
	    // type must be present in the heap at this census time...
	    d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);

	    if (d == NULL) {
		// if this closure identity isn't present in the
		// census for this time period, then our running
		// totals *must* be zero.
		ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);

690 691
		// debugCCS(c->identity);
		// debugBelch(" census=%d void_total=%d drag_total=%d\n",
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
		//         t, c->c.ldv.void_total, c->c.ldv.drag_total);
	    } else {
		d->c.ldv.void_total += c->c.ldv.void_total;
		d->c.ldv.drag_total += c->c.ldv.drag_total;
		c->c.ldv.void_total =  d->c.ldv.void_total;
		c->c.ldv.drag_total =  d->c.ldv.drag_total;

		ASSERT( c->c.ldv.void_total >= 0 );
		ASSERT( c->c.ldv.drag_total >= 0 );
	    }
	}

	// now look through the counters in this census to find new ones
	for (c = censuses[t].ctrs; c != NULL; c = c->next) {
	    d = lookupHashTable(acc, (StgWord)c->identity);
	    if (d == NULL) {
		d = arenaAlloc( arena, sizeof(counter) );
		initLDVCtr(d);
		insertHashTable( acc, (StgWord)c->identity, d );
		d->identity = c->identity;
		d->next = ctrs;
		ctrs = d;
		d->c.ldv.void_total = c->c.ldv.void_total;
		d->c.ldv.drag_total = c->c.ldv.drag_total;
	    }
	    ASSERT( c->c.ldv.void_total >= 0 );
	    ASSERT( c->c.ldv.drag_total >= 0 );
	}
    }

    freeHashTable(acc, NULL);
    arenaFree(arena);
}
#endif

727 728 729 730 731 732 733
/* -----------------------------------------------------------------------------
 * Print out the results of a heap census.
 * -------------------------------------------------------------------------- */
static void
dumpCensus( Census *census )
{
    counter *ctr;
734 735
    int count;

736
    printSample(rtsTrue, census->time);
737

738 739
#ifdef PROFILING
    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
740
      fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
741
	fprintf(hp_file, "LAG\t%lu\n", 
742
		(unsigned long)(census->not_used - census->void_total) * sizeof(W_));
743
	fprintf(hp_file, "USE\t%lu\n", 
744
		(unsigned long)(census->used - census->drag_total) * sizeof(W_));
745
	fprintf(hp_file, "INHERENT_USE\t%lu\n", 
746 747 748
		(unsigned long)(census->prim) * sizeof(W_));
	fprintf(hp_file, "DRAG\t%lu\n",
		(unsigned long)(census->drag_total) * sizeof(W_));
749
	printSample(rtsFalse, census->time);
750 751 752 753
	return;
    }
#endif

754
    for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
755 756

#ifdef PROFILING
757 758
	if (RtsFlags.ProfFlags.bioSelector != NULL) {
	    count = 0;
759
	    if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
760
		count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
761
	    if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
762
		count += ctr->c.ldv.drag_total;
763
	    if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
764
		count += ctr->c.ldv.void_total;
765
	    if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
766 767
		count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
	} else
768
#endif
769 770 771
	{
	    count = ctr->c.resid;
	}
772

773
	ASSERT( count >= 0 );
774

775
	if (count == 0) continue;
776

777
#if !defined(PROFILING)
778 779
	switch (RtsFlags.ProfFlags.doHeapProfile) {
	case HEAP_BY_CLOSURE_TYPE:
780
	    fprintf(hp_file, "%s", (char *)ctr->identity);
781 782 783 784 785 786 787
	    break;
	}
#endif
	
#ifdef PROFILING
	switch (RtsFlags.ProfFlags.doHeapProfile) {
	case HEAP_BY_CCS:
ravi@bluespec.com's avatar
ravi@bluespec.com committed
788
	    fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, RtsFlags.ProfFlags.ccsLength);
789 790 791 792 793 794 795 796 797 798
	    break;
	case HEAP_BY_MOD:
	case HEAP_BY_DESCR:
	case HEAP_BY_TYPE:
	    fprintf(hp_file, "%s", (char *)ctr->identity);
	    break;
	case HEAP_BY_RETAINER:
	{
	    RetainerSet *rs = (RetainerSet *)ctr->identity;

799 800 801 802 803 804
	    // it might be the distinguished retainer set rs_MANY:
	    if (rs == &rs_MANY) {
		fprintf(hp_file, "MANY");
		break;
	    }

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
	    // Mark this retainer set by negating its id, because it
	    // has appeared in at least one census.  We print the
	    // values of all such retainer sets into the log file at
	    // the end.  A retainer set may exist but not feature in
	    // any censuses if it arose as the intermediate retainer
	    // set for some closure during retainer set calculation.
	    if (rs->id > 0)
		rs->id = -(rs->id);

	    // report in the unit of bytes: * sizeof(StgWord)
	    printRetainerSetShort(hp_file, rs);
	    break;
	}
	default:
	    barf("dumpCensus; doHeapProfile");
	}
#endif

823
	fprintf(hp_file, "\t%lu\n", (unsigned long)count * sizeof(W_));
824 825
    }

826
    printSample(rtsFalse, census->time);
827 828
}

829 830 831 832
/* -----------------------------------------------------------------------------
 * Code to perform a heap census.
 * -------------------------------------------------------------------------- */
static void
833
heapCensusChain( Census *census, bdescr *bd )
834 835 836
{
    StgPtr p;
    StgInfoTable *info;
837
    void *identity;
838
    nat size;
839
    counter *ctr;
840
    nat real_size;
841
    rtsBool prim;
842

843
    for (; bd != NULL; bd = bd->link) {
844 845 846 847 848 849 850 851 852

	// HACK: ignore pinned blocks, because they contain gaps.
	// It's not clear exactly what we'd like to do here, since we
	// can't tell which objects in the block are actually alive.
	// Perhaps the whole block should be counted as SYSTEM memory.
	if (bd->flags & BF_PINNED) {
	    continue;
	}

853 854 855
	p = bd->start;
	while (p < bd->free) {
	    info = get_itbl((StgClosure *)p);
856
	    prim = rtsFalse;
857 858 859
	    
	    switch (info->type) {

860 861 862 863 864 865 866
	    case THUNK:
		size = thunk_sizeW_fromITBL(info);
		break;

	    case THUNK_1_1:
	    case THUNK_0_2:
	    case THUNK_2_0:
867
		size = sizeofW(StgThunkHeader) + 2;
868 869 870 871 872
		break;

	    case THUNK_1_0:
	    case THUNK_0_1:
	    case THUNK_SELECTOR:
873
		size = sizeofW(StgThunkHeader) + 1;
874 875
		break;

876 877 878 879
	    case CONSTR:
	    case FUN:
	    case IND_PERM:
	    case BLACKHOLE:
880
	    case BLOCKING_QUEUE:
881 882 883 884 885 886 887 888 889 890 891 892
	    case FUN_1_0:
	    case FUN_0_1:
	    case FUN_1_1:
	    case FUN_0_2:
	    case FUN_2_0:
	    case CONSTR_1_0:
	    case CONSTR_0_1:
	    case CONSTR_1_1:
	    case CONSTR_0_2:
	    case CONSTR_2_0:
		size = sizeW_fromITBL(info);
		break;
893

894 895 896 897 898 899 900 901 902 903 904
	    case IND:
		// Special case/Delicate Hack: INDs don't normally
		// appear, since we're doing this heap census right
		// after GC.  However, GarbageCollect() also does
		// resurrectThreads(), which can update some
		// blackholes when it calls raiseAsync() on the
		// resurrected threads.  So we know that any IND will
		// be the size of a BLACKHOLE.
		size = BLACKHOLE_sizeW();
		break;

905
	    case BCO:
906 907 908 909
		prim = rtsTrue;
		size = bco_sizeW((StgBCO *)p);
		break;

910 911
            case MVAR_CLEAN:
            case MVAR_DIRTY:
912
	    case WEAK:
913 914
	    case PRIM:
	    case MUT_PRIM:
915 916
	    case MUT_VAR_CLEAN:
	    case MUT_VAR_DIRTY:
917 918 919 920
		prim = rtsTrue;
		size = sizeW_fromITBL(info);
		break;

921 922
	    case AP:
		size = ap_sizeW((StgAP *)p);
923 924 925 926 927
		break;

	    case PAP:
		size = pap_sizeW((StgPAP *)p);
		break;
928 929 930 931

	    case AP_STACK:
		size = ap_stack_sizeW((StgAP_STACK *)p);
		break;
932 933
		
	    case ARR_WORDS:
934
		prim = rtsTrue;
Simon Marlow's avatar
Simon Marlow committed
935
		size = arr_words_sizeW((StgArrWords*)p);
936 937
		break;
		
938 939
	    case MUT_ARR_PTRS_CLEAN:
	    case MUT_ARR_PTRS_DIRTY:
940
	    case MUT_ARR_PTRS_FROZEN:
941
	    case MUT_ARR_PTRS_FROZEN0:
942
		prim = rtsTrue;
943 944 945 946
		size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
		break;
		
	    case TSO:
947
		prim = rtsTrue;
948
#ifdef PROFILING
949 950 951 952 953 954 955 956
		if (RtsFlags.ProfFlags.includeTSOs) {
		    size = tso_sizeW((StgTSO *)p);
		    break;
		} else {
		    // Skip this TSO and move on to the next object
		    p += tso_sizeW((StgTSO *)p);
		    continue;
		}
957 958 959
#else
		size = tso_sizeW((StgTSO *)p);
		break;
960 961
#endif

962 963 964 965 966
	    case TREC_CHUNK:
		prim = rtsTrue;
		size = sizeofW(StgTRecChunk);
		break;

967
	    default:
968
		barf("heapCensus, unknown object: %d", info->type);
969 970
	    }
	    
971 972
	    identity = NULL;

973
#ifdef PROFILING
974 975
	    // subtract the profiling overhead
	    real_size = size - sizeofW(StgProfHeader);
976 977
#else
	    real_size = size;
978
#endif
979 980

	    if (closureSatisfiesConstraints((StgClosure*)p)) {
981 982
#ifdef PROFILING
		if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
983 984 985 986 987 988
		    if (prim)
			census->prim += real_size;
		    else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
			census->not_used += real_size;
		    else
			census->used += real_size;
989
		} else
990
#endif
991 992
		{
		    identity = closureIdentity((StgClosure *)p);
993

994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
		    if (identity != NULL) {
			ctr = lookupHashTable( census->hash, (StgWord)identity );
			if (ctr != NULL) {
#ifdef PROFILING
			    if (RtsFlags.ProfFlags.bioSelector != NULL) {
				if (prim)
				    ctr->c.ldv.prim += real_size;
				else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
				    ctr->c.ldv.not_used += real_size;
				else
				    ctr->c.ldv.used += real_size;
			    } else
#endif
			    {
				ctr->c.resid += real_size;
			    }
			} else {
			    ctr = arenaAlloc( census->arena, sizeof(counter) );
			    initLDVCtr(ctr);
			    insertHashTable( census->hash, (StgWord)identity, ctr );
			    ctr->identity = identity;
			    ctr->next = census->ctrs;
			    census->ctrs = ctr;

#ifdef PROFILING
			    if (RtsFlags.ProfFlags.bioSelector != NULL) {
				if (prim)
				    ctr->c.ldv.prim = real_size;
				else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
				    ctr->c.ldv.not_used = real_size;
				else
				    ctr->c.ldv.used = real_size;
			    } else
#endif
			    {
				ctr->c.resid = real_size;
			    }
			}
		    }
1033 1034 1035
		}
	    }

1036 1037 1038 1039
	    p += size;
	}
    }
}
1040

1041
void
1042
heapCensus( void )
1043
{
1044
  nat g;
1045 1046 1047 1048 1049 1050
  Census *census;

  census = &censuses[era];
  census->time  = mut_user_time();
    
  // calculate retainer sets if necessary
1051
#ifdef PROFILING
1052 1053
  if (doingRetainerProfiling()) {
      retainerProfile();
1054 1055 1056
  }
#endif

1057
#ifdef PROFILING
1058
  stat_startHeapCensus();
1059
#endif
1060

1061
  // Traverse the heap, collecting the census info
1062 1063 1064 1065 1066
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
      heapCensusChain( census, generations[g].blocks );
      // Are we interested in large objects?  might be
      // confusing to include the stack in a heap profile.
      heapCensusChain( census, generations[g].large_objects );
1067 1068
  }

1069
  // dump out the census info
1070 1071 1072 1073 1074 1075 1076 1077
#ifdef PROFILING
    // We can't generate any info for LDV profiling until
    // the end of the run...
    if (!doingLDVProfiling())
	dumpCensus( census );
#else
    dumpCensus( census );
#endif
1078

1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089

  // free our storage, unless we're keeping all the census info for
  // future restriction by biography.
#ifdef PROFILING
  if (RtsFlags.ProfFlags.bioSelector == NULL)
  {
      freeHashTable( census->hash, NULL/* don't free the elements */ );
      arenaFree( census->arena );
      census->hash = NULL;
      census->arena = NULL;
  }
1090
#endif
1091 1092 1093

  // we're into the next time period now
  nextEra();
1094

1095
#ifdef PROFILING
1096
  stat_endHeapCensus();
1097
#endif
1098 1099
}