Stats.c 32.2 KB
Newer Older
1
/* -----------------------------------------------------------------------------
2
 *
3
 * (c) The GHC Team, 1998-2005
4 5 6 7 8
 *
 * Statistics and timing-related functions.
 *
 * ---------------------------------------------------------------------------*/

Simon Marlow's avatar
Simon Marlow committed
9
#include "PosixSource.h"
10
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
11

12
#include "RtsUtils.h"
13
#include "Schedule.h"
14
#include "Stats.h"
15
#include "Profiling.h"
16
#include "GetTime.h"
Simon Marlow's avatar
Simon Marlow committed
17
#include "sm/Storage.h"
18
#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
Simon Marlow's avatar
Simon Marlow committed
19 20
#include "sm/GCThread.h"
#include "sm/BlockAlloc.h"
21

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
22 23 24 25
#if USE_PAPI
#include "Papi.h"
#endif

26 27 28
/* huh? */
#define BIG_STRING_LEN              512

Simon Marlow's avatar
Simon Marlow committed
29
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
30

Simon Marlow's avatar
Simon Marlow committed
31
static Time
Simon Marlow's avatar
Simon Marlow committed
32 33 34 35
    start_init_cpu, start_init_elapsed,
    end_init_cpu,   end_init_elapsed,
    start_exit_cpu, start_exit_elapsed,
    end_exit_cpu,   end_exit_elapsed;
36

Simon Marlow's avatar
Simon Marlow committed
37
static Time GC_tot_cpu  = 0;
38

Simon Marlow's avatar
Simon Marlow committed
39 40
static StgWord64 GC_tot_alloc      = 0;
static StgWord64 GC_tot_copied     = 0;
41

Ian Lynagh's avatar
Ian Lynagh committed
42
static StgWord64 GC_par_max_copied = 0;
43
static StgWord64 GC_par_tot_copied = 0;
44

sof's avatar
sof committed
45
#ifdef PROFILING
Simon Marlow's avatar
Simon Marlow committed
46 47
static Time RP_start_time  = 0, RP_tot_time  = 0;  // retainer prof user time
static Time RPe_start_time = 0, RPe_tot_time = 0;  // retainer prof elap time
sof's avatar
sof committed
48

Simon Marlow's avatar
Simon Marlow committed
49 50
static Time HC_start_time, HC_tot_time = 0;     // heap census prof user time
static Time HCe_start_time, HCe_tot_time = 0;   // heap census prof elap time
sof's avatar
sof committed
51
#endif
52

sof's avatar
sof committed
53
#ifdef PROFILING
54
#define PROF_VAL(x)   (x)
sof's avatar
sof committed
55 56 57
#else
#define PROF_VAL(x)   0
#endif
58

59 60 61
// current = current as of last GC
static lnat current_residency = 0; // in words; for stats only
static lnat max_residency     = 0;
62
static lnat cumulative_residency = 0;
Simon Marlow's avatar
Simon Marlow committed
63
static lnat residency_samples = 0; // for stats only
64
static lnat current_slop      = 0;
Simon Marlow's avatar
Simon Marlow committed
65
static lnat max_slop          = 0;
66

Simon Marlow's avatar
Simon Marlow committed
67
static lnat GC_end_faults = 0;
68

Simon Marlow's avatar
Simon Marlow committed
69 70 71
static Time *GC_coll_cpu = NULL;
static Time *GC_coll_elapsed = NULL;
static Time *GC_coll_max_pause = NULL;
72

73 74 75
static void statsFlush( void );
static void statsClose( void );

Simon Marlow's avatar
Simon Marlow committed
76 77 78
/* -----------------------------------------------------------------------------
   Current elapsed time
   ------------------------------------------------------------------------- */
sof's avatar
sof committed
79

Simon Marlow's avatar
Simon Marlow committed
80
Time stat_getElapsedTime(void)
Simon Marlow's avatar
Simon Marlow committed
81
{
Simon Marlow's avatar
Simon Marlow committed
82
    return getProcessElapsedTime() - start_init_elapsed;
Simon Marlow's avatar
Simon Marlow committed
83 84
}

Simon Marlow's avatar
Simon Marlow committed
85 86 87
/* ---------------------------------------------------------------------------
   Measure the current MUT time, for profiling
   ------------------------------------------------------------------------ */
88

Ian Lynagh's avatar
Ian Lynagh committed
89
double
Simon Marlow's avatar
Simon Marlow committed
90
mut_user_time_until( Time t )
Ian Lynagh's avatar
Ian Lynagh committed
91
{
Simon Marlow's avatar
Simon Marlow committed
92
    return TimeToSecondsDbl(t - GC_tot_cpu);
93 94
    // heapCensus() time is included in GC_tot_cpu, so we don't need
    // to subtract it here.
Ian Lynagh's avatar
Ian Lynagh committed
95 96
}

97
double
98
mut_user_time( void )
99
{
Simon Marlow's avatar
Simon Marlow committed
100
    Time cpu;
Simon Marlow's avatar
Simon Marlow committed
101
    cpu = getProcessCPUTime();
Ian Lynagh's avatar
Ian Lynagh committed
102
    return mut_user_time_until(cpu);
103 104
}

105
#ifdef PROFILING
106
/*
Simon Marlow's avatar
Simon Marlow committed
107
  mut_user_time_during_RP() returns the MUT time during retainer profiling.
108
  The same is for mut_user_time_during_HC();
109 110 111 112
 */
double
mut_user_time_during_RP( void )
{
Simon Marlow's avatar
Simon Marlow committed
113
    return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
114 115 116
}

double
117
mut_user_time_during_heap_census( void )
118
{
Simon Marlow's avatar
Simon Marlow committed
119
    return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
120
}
sof's avatar
sof committed
121
#endif /* PROFILING */
122

Simon Marlow's avatar
Simon Marlow committed
123 124 125 126
/* ---------------------------------------------------------------------------
   initStats0() has no dependencies, it can be called right at the beginning
   ------------------------------------------------------------------------ */

127
void
128 129
initStats0(void)
{
Simon Marlow's avatar
Simon Marlow committed
130 131 132 133
    start_init_cpu    = 0;
    start_init_elapsed = 0;
    end_init_cpu     = 0;
    end_init_elapsed  = 0;
134

Simon Marlow's avatar
Simon Marlow committed
135 136 137 138
    start_exit_cpu    = 0;
    start_exit_elapsed = 0;
    end_exit_cpu     = 0;
    end_exit_elapsed  = 0;
139 140 141 142

    GC_tot_alloc     = 0;
    GC_tot_copied    = 0;
    GC_par_max_copied = 0;
143
    GC_par_tot_copied = 0;
Simon Marlow's avatar
Simon Marlow committed
144
    GC_tot_cpu  = 0;
145 146 147 148 149 150 151 152 153 154 155 156 157

#ifdef PROFILING
    RP_start_time  = 0;
    RP_tot_time  = 0;
    RPe_start_time = 0;
    RPe_tot_time = 0;

    HC_start_time = 0;
    HC_tot_time = 0;
    HCe_start_time = 0;
    HCe_tot_time = 0;
#endif

Simon Marlow's avatar
Simon Marlow committed
158
    max_residency = 0;
159
    cumulative_residency = 0;
Simon Marlow's avatar
Simon Marlow committed
160 161
    residency_samples = 0;
    max_slop = 0;
162 163 164 165

    GC_end_faults = 0;
}    

Simon Marlow's avatar
Simon Marlow committed
166 167 168 169
/* ---------------------------------------------------------------------------
   initStats1() can be called after setupRtsFlags()
   ------------------------------------------------------------------------ */

170 171
void
initStats1 (void)
172 173 174 175
{
    nat i;
  
    if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
176
	statsPrintf("    Alloc    Copied     Live    GC    GC     TOT     TOT  Page Flts\n");
177
	statsPrintf("    bytes     bytes     bytes  user  elap    user    elap\n");
178
    }
Simon Marlow's avatar
Simon Marlow committed
179
    GC_coll_cpu = 
Simon Marlow's avatar
Simon Marlow committed
180 181
	(Time *)stgMallocBytes(
            sizeof(Time)*RtsFlags.GcFlags.generations,
Simon Marlow's avatar
Simon Marlow committed
182 183
	    "initStats");
    GC_coll_elapsed = 
Simon Marlow's avatar
Simon Marlow committed
184 185
	(Time *)stgMallocBytes(
	    sizeof(Time)*RtsFlags.GcFlags.generations,
186
	    "initStats");
Simon Marlow's avatar
Simon Marlow committed
187
    GC_coll_max_pause =
Simon Marlow's avatar
Simon Marlow committed
188 189
	(Time *)stgMallocBytes(
	    sizeof(Time)*RtsFlags.GcFlags.generations,
190
	    "initStats");
191
    for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
Simon Marlow's avatar
Simon Marlow committed
192 193 194
	GC_coll_cpu[i] = 0;
        GC_coll_elapsed[i] = 0;
        GC_coll_max_pause[i] = 0;
195
    }
196
}
197 198 199 200

/* -----------------------------------------------------------------------------
   Initialisation time...
   -------------------------------------------------------------------------- */
201 202

void
203
stat_startInit(void)
204
{
Simon Marlow's avatar
Simon Marlow committed
205
    getProcessTimes(&start_init_cpu, &start_init_elapsed);
206 207 208
}

void 
209
stat_endInit(void)
210
{
Simon Marlow's avatar
Simon Marlow committed
211
    getProcessTimes(&end_init_cpu, &end_init_elapsed);
212

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
213 214 215 216 217 218 219 220 221 222
#if USE_PAPI
    /* We start counting events for the mutator
     * when garbage collection starts
     * we switch to the GC event set. */
    papi_start_mutator_count();

    /* This flag is needed to avoid counting the last GC */
    papi_is_reporting = 1;

#endif
223 224
}

225 226 227 228 229 230
/* -----------------------------------------------------------------------------
   stat_startExit and stat_endExit
   
   These two measure the time taken in shutdownHaskell().
   -------------------------------------------------------------------------- */

231 232 233
void
stat_startExit(void)
{
Simon Marlow's avatar
Simon Marlow committed
234
    getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
235 236 237 238 239 240 241 242 243

#if USE_PAPI
    /* We stop counting mutator events
     * GC events are not being counted at this point */
    papi_stop_mutator_count();

    /* This flag is needed, because GC is run once more after this function */
    papi_is_reporting = 0;
#endif
244 245 246 247 248
}

void
stat_endExit(void)
{
Simon Marlow's avatar
Simon Marlow committed
249
    getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
250 251
}

252 253 254 255 256 257 258
/* -----------------------------------------------------------------------------
   Called at the beginning of each GC
   -------------------------------------------------------------------------- */

static nat rub_bell = 0;

void
259
stat_startGC (Capability *cap, gc_thread *gct)
260 261 262 263 264
{
    nat bell = RtsFlags.GcFlags.ringBell;

    if (bell) {
	if (bell > 1) {
265
	    debugBelch(" GC ");
266 267
	    rub_bell = 1;
	} else {
268
	    debugBelch("\007");
269 270 271
	}
    }

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
272 273 274 275 276 277 278 279
#if USE_PAPI
    if(papi_is_reporting) {
      /* Switch to counting GC events */
      papi_stop_mutator_count();
      papi_start_gc_count();
    }
#endif

Simon Marlow's avatar
Simon Marlow committed
280
    getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
281 282 283 284 285 286 287 288 289 290

    // Post EVENT_GC_START with the same timestamp as used for stats
    // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
    // Here, as opposed to other places, the event is emitted on the cap
    // that initiates the GC and external tools expect it to have the same
    // timestamp as used in +RTS -s calculcations.
    traceEventGcStartAtT(cap,
                         TimeToNS(gct->gc_start_elapsed - start_init_elapsed));

    gct->gc_start_thread_cpu = getThreadCPUTime();
Simon Marlow's avatar
Simon Marlow committed
291 292 293 294 295 296 297 298

    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
    {
        gct->gc_start_faults = getPageFaults();
    }
}

void
299
stat_gcWorkerThreadStart (gc_thread *gct STG_UNUSED)
Simon Marlow's avatar
Simon Marlow committed
300
{
301 302 303 304 305
#if 0
    /*
     * We dont' collect per-thread GC stats any more, but this code
     * could be used to do that if we want to in the future:
     */
Simon Marlow's avatar
Simon Marlow committed
306 307 308 309 310
    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
    {
        getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
        gct->gc_start_thread_cpu  = getThreadCPUTime();
    }
311
#endif
Simon Marlow's avatar
Simon Marlow committed
312 313 314
}

void
315
stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
Simon Marlow's avatar
Simon Marlow committed
316
{
317 318 319 320 321
#if 0
    /*
     * We dont' collect per-thread GC stats any more, but this code
     * could be used to do that if we want to in the future:
     */
Simon Marlow's avatar
Simon Marlow committed
322
    Time thread_cpu, elapsed, gc_cpu, gc_elapsed;
Simon Marlow's avatar
Simon Marlow committed
323 324 325 326 327 328 329 330 331 332 333

    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
    {
        elapsed    = getProcessElapsedTime();
        thread_cpu = getThreadCPUTime();

        gc_cpu     = thread_cpu - gct->gc_start_thread_cpu;
        gc_elapsed = elapsed    - gct->gc_start_elapsed;
    
        taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
    }
334
#endif
335 336 337 338 339 340 341
}

/* -----------------------------------------------------------------------------
   Called at the end of each GC
   -------------------------------------------------------------------------- */

void
342 343 344
stat_endGC (Capability *cap, gc_thread *gct,
            lnat alloc, lnat live, lnat copied, lnat slop, nat gen,
            nat par_n_threads, lnat par_max_copied, lnat par_tot_copied)
345
{
346 347 348 349
    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
        RtsFlags.ProfFlags.doHeapProfile)
        // heap profiling needs GC_tot_time
    {
350
        Time cpu, elapsed, gc_cpu, gc_elapsed;
351 352 353 354 355 356 357 358 359 360 361 362

        traceEventGcStats(cap,
                          CAPSET_HEAP_DEFAULT,
                          gen,
                          copied * sizeof(W_),
                          slop   * sizeof(W_),
                          /* current loss due to fragmentation */
                          (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
                                 * BLOCK_SIZE_W * sizeof(W_),
                          par_n_threads,
                          par_max_copied * sizeof(W_),
                          par_tot_copied * sizeof(W_));
363
	
Simon Marlow's avatar
Simon Marlow committed
364 365
        getProcessTimes(&cpu, &elapsed);

366 367 368 369 370 371 372 373
        // Post EVENT_GC_END with the same timestamp as used for stats
        // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
        // Here, as opposed to other places, the event is emitted on the cap
        // that initiates the GC and external tools expect it to have the same
        // timestamp as used in +RTS -s calculcations.
        traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));

        gc_elapsed = elapsed - gct->gc_start_elapsed;
Simon Marlow's avatar
Simon Marlow committed
374 375 376
        gc_cpu = cpu - gct->gc_start_cpu;

        if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
377
	    nat faults = getPageFaults();
378
	    
379
	    statsPrintf("%9ld %9ld %9ld",
380
		    alloc*sizeof(W_), copied*sizeof(W_), 
381
			live*sizeof(W_));
Simon Marlow's avatar
Simon Marlow committed
382
            statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld  (Gen: %2d)\n",
Simon Marlow's avatar
Simon Marlow committed
383 384 385 386
                    TimeToSecondsDbl(gc_cpu),
		    TimeToSecondsDbl(gc_elapsed),
		    TimeToSecondsDbl(cpu),
		    TimeToSecondsDbl(elapsed - start_init_elapsed),
Simon Marlow's avatar
Simon Marlow committed
387 388 389 390 391
		    faults - gct->gc_start_faults,
                        gct->gc_start_faults - GC_end_faults,
                    gen);

            GC_end_faults = faults;
392
	    statsFlush();
393 394
	}

Simon Marlow's avatar
Simon Marlow committed
395 396 397 398 399
        GC_coll_cpu[gen] += gc_cpu;
        GC_coll_elapsed[gen] += gc_elapsed;
        if (GC_coll_max_pause[gen] < gc_elapsed) {
            GC_coll_max_pause[gen] = gc_elapsed;
        }
400

Ian Lynagh's avatar
Ian Lynagh committed
401 402
	GC_tot_copied += (StgWord64) copied;
	GC_tot_alloc  += (StgWord64) alloc;
403 404
        GC_par_max_copied += (StgWord64) par_max_copied;
        GC_par_tot_copied += (StgWord64) par_tot_copied;
Simon Marlow's avatar
Simon Marlow committed
405
	GC_tot_cpu   += gc_cpu;
406 407 408 409 410 411 412 413 414 415 416 417 418
        
        /* For the moment we calculate both per-HEC and total allocation.
	 * There is thus redundancy here, but for the moment we will calculate
	 * it both the old and new way and assert they're the same.
	 * When we're sure it's working OK then we can simplify things.
	 * TODO: simplify calcAllocated and clearNurseries so they don't have
	 *       to calculate the total
	 */
        {
            lnat tot_alloc = 0;
            lnat n;
            for (n = 0; n < n_capabilities; n++) {
                tot_alloc += capabilities[n].total_allocated;
419 420 421
                traceEventHeapAllocated(&capabilities[n],
                                        CAPSET_HEAP_DEFAULT,
                                        capabilities[n].total_allocated * sizeof(W_));
422 423 424
            }
            ASSERT(GC_tot_alloc == tot_alloc);
        }
425 426 427
        traceEventHeapSize(cap,
	                   CAPSET_HEAP_DEFAULT,
			   mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
428

429
	if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
Simon Marlow's avatar
Simon Marlow committed
430 431
	    if (live > max_residency) {
		max_residency = live;
432
	    }
433
            current_residency = live;
Simon Marlow's avatar
Simon Marlow committed
434
	    residency_samples++;
435
	    cumulative_residency += live;
436 437 438
	    traceEventHeapLive(cap, 
    	    	    	       CAPSET_HEAP_DEFAULT,
	                       live * sizeof(W_));
439
	}
440

Simon Marlow's avatar
Simon Marlow committed
441
        if (slop > max_slop) max_slop = slop;
442 443 444
    }

    if (rub_bell) {
445
	debugBelch("\b\b\b  \b\b\b");
446 447
	rub_bell = 0;
    }
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
448 449 450 451

#if USE_PAPI
    if(papi_is_reporting) {
      /* Switch to counting mutator events */
452 453 454 455 456
      if (gen == 0) {
          papi_stop_gc0_count();
      } else {
          papi_stop_gc1_count();
      }
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
457 458 459
      papi_start_mutator_count();
    }
#endif
460 461
}

462 463 464
/* -----------------------------------------------------------------------------
   Called at the beginning of each Retainer Profiliing
   -------------------------------------------------------------------------- */
465 466 467
#ifdef PROFILING
void
stat_startRP(void)
468
{
Simon Marlow's avatar
Simon Marlow committed
469
    Time user, elapsed;
470
    getProcessTimes( &user, &elapsed );
471 472 473

    RP_start_time = user;
    RPe_start_time = elapsed;
474
}
sof's avatar
sof committed
475
#endif /* PROFILING */
476 477 478 479

/* -----------------------------------------------------------------------------
   Called at the end of each Retainer Profiliing
   -------------------------------------------------------------------------- */
480 481 482 483

#ifdef PROFILING
void
stat_endRP(
484 485 486 487 488
  nat retainerGeneration,
#ifdef DEBUG_RETAINER
  nat maxCStackSize,
  int maxStackSize,
#endif
489
  double averageNumVisit)
490
{
Simon Marlow's avatar
Simon Marlow committed
491
    Time user, elapsed;
492
    getProcessTimes( &user, &elapsed );
493 494 495

    RP_tot_time += user - RP_start_time;
    RPe_tot_time += elapsed - RPe_start_time;
496 497 498 499 500 501 502 503 504

  fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n", 
    retainerGeneration, mut_user_time_during_RP());
#ifdef DEBUG_RETAINER
  fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
  fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
#endif
  fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
}
sof's avatar
sof committed
505
#endif /* PROFILING */
506 507

/* -----------------------------------------------------------------------------
508
   Called at the beginning of each heap census
509
   -------------------------------------------------------------------------- */
510 511
#ifdef PROFILING
void
512
stat_startHeapCensus(void)
513
{
Simon Marlow's avatar
Simon Marlow committed
514
    Time user, elapsed;
515
    getProcessTimes( &user, &elapsed );
516 517 518

    HC_start_time = user;
    HCe_start_time = elapsed;
519
}
sof's avatar
sof committed
520
#endif /* PROFILING */
521 522

/* -----------------------------------------------------------------------------
523
   Called at the end of each heap census
524
   -------------------------------------------------------------------------- */
525 526
#ifdef PROFILING
void
527
stat_endHeapCensus(void) 
528
{
Simon Marlow's avatar
Simon Marlow committed
529
    Time user, elapsed;
530
    getProcessTimes( &user, &elapsed );
531 532 533

    HC_tot_time += user - HC_start_time;
    HCe_tot_time += elapsed - HCe_start_time;
534
}
sof's avatar
sof committed
535
#endif /* PROFILING */
536

537 538 539 540 541 542 543 544
/* -----------------------------------------------------------------------------
   Called at the end of execution

   NOTE: number of allocations is not entirely accurate: it doesn't
   take into account the few bytes at the end of the heap that
   were left unused when the heap-check failed.
   -------------------------------------------------------------------------- */

Simon Marlow's avatar
Simon Marlow committed
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
#ifdef DEBUG
#define TICK_VAR_INI(arity) \
  StgInt SLOW_CALLS_##arity = 1; \
  StgInt RIGHT_ARITY_##arity = 1; \
  StgInt TAGGED_PTR_##arity = 0;

TICK_VAR_INI(1)
TICK_VAR_INI(2)

StgInt TOTAL_CALLS=1;
#endif

/* Report the value of a counter */
#define REPORT(counter) \
  { \
Ian Lynagh's avatar
Ian Lynagh committed
560
    showStgWord64(counter,temp,rtsTrue/*commas*/); \
Simon Marlow's avatar
Simon Marlow committed
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
    statsPrintf("  (" #counter ")  : %s\n",temp);				\
  }

/* Report the value of a counter as a percentage of another counter */
#define REPORT_PCT(counter,countertot) \
  statsPrintf("  (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
	      counter*100.0/countertot)

#define TICK_PRINT(arity) \
  REPORT(SLOW_CALLS_##arity); \
  REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
  REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
  REPORT(RIGHT_ARITY_##arity); \
  REPORT(TAGGED_PTR_##arity)

#define TICK_PRINT_TOT(arity) \
  statsPrintf("  (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
	      SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)

Simon Marlow's avatar
Simon Marlow committed
580 581
static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
582

583 584 585
void
stat_exit(int alloc)
{
Simon Marlow's avatar
Simon Marlow committed
586
    generation *gen;
Simon Marlow's avatar
Simon Marlow committed
587 588 589 590 591 592 593 594
    Time gc_cpu = 0;
    Time gc_elapsed = 0;
    Time init_cpu = 0;
    Time init_elapsed = 0;
    Time mut_cpu = 0;
    Time mut_elapsed = 0;
    Time exit_cpu = 0;
    Time exit_elapsed = 0;
Simon Marlow's avatar
Simon Marlow committed
595

596
    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
597 598

	char temp[BIG_STRING_LEN];
Simon Marlow's avatar
Simon Marlow committed
599 600
	Time tot_cpu;
	Time tot_elapsed;
Simon Marlow's avatar
Simon Marlow committed
601
	nat i, g, total_collections = 0;
602

Simon Marlow's avatar
Simon Marlow committed
603 604
	getProcessTimes( &tot_cpu, &tot_elapsed );
	tot_elapsed -= start_init_elapsed;
605 606

	GC_tot_alloc += alloc;
607

608 609 610 611 612 613
        for (i = 0; i < n_capabilities; i++) {
            traceEventHeapAllocated(&capabilities[i],
                                    CAPSET_HEAP_DEFAULT,
                                    capabilities[i].total_allocated * sizeof(W_));
        }

614 615 616
	/* Count total garbage collections */
	for (g = 0; g < RtsFlags.GcFlags.generations; g++)
	    total_collections += generations[g].collections;
617

Simon Marlow's avatar
Simon Marlow committed
618 619 620
	/* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
	if (tot_cpu  == 0.0)  tot_cpu = 1;
	if (tot_elapsed == 0.0) tot_elapsed = 1;
621
	
622 623 624
	if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
	    statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
	    statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
625 626
	}

Simon Marlow's avatar
Simon Marlow committed
627 628 629 630 631
        for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
            gc_cpu     += GC_coll_cpu[i];
            gc_elapsed += GC_coll_elapsed[i];
        }

632 633
        // heapCensus() is called by the GC, so RP and HC time are
        // included in the GC stats.  We therefore subtract them to
Ian Lynagh's avatar
Ian Lynagh committed
634
        // obtain the actual GC cpu time.
635 636
        gc_cpu     -= PROF_VAL(RP_tot_time + HC_tot_time);
        gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
637

638 639
        init_cpu     = get_init_cpu();
        init_elapsed = get_init_elapsed();
dmp's avatar
dmp committed
640 641 642 643 644 645 646 647 648 649

        exit_cpu     = end_exit_cpu - start_exit_cpu;
        exit_elapsed = end_exit_elapsed - start_exit_elapsed;

        mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;

        mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
            - PROF_VAL(RP_tot_time + HC_tot_time);
        if (mut_cpu < 0) { mut_cpu = 0; }

650
	if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
Ian Lynagh's avatar
Ian Lynagh committed
651
	    showStgWord64(GC_tot_alloc*sizeof(W_), 
652
				 temp, rtsTrue/*commas*/);
653
	    statsPrintf("%16s bytes allocated in the heap\n", temp);
654

Ian Lynagh's avatar
Ian Lynagh committed
655
	    showStgWord64(GC_tot_copied*sizeof(W_), 
656
				 temp, rtsTrue/*commas*/);
657
	    statsPrintf("%16s bytes copied during GC\n", temp);
658

Simon Marlow's avatar
Simon Marlow committed
659 660
            if ( residency_samples > 0 ) {
		showStgWord64(max_residency*sizeof(W_), 
661
				     temp, rtsTrue/*commas*/);
662
		statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
Simon Marlow's avatar
Simon Marlow committed
663
			temp, residency_samples);
664
	    }
665

Simon Marlow's avatar
Simon Marlow committed
666
	    showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
667 668
	    statsPrintf("%16s bytes maximum slop\n", temp);

669
	    statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n", 
670
                        peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
671
                        (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
672 673

	    /* Print garbage collections in each gen */
Simon Marlow's avatar
Simon Marlow committed
674 675 676 677 678 679 680
            statsPrintf("                                    Tot time (elapsed)  Avg pause  Max pause\n");
            for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
                gen = &generations[g];
                statsPrintf("  Gen %2d     %5d colls, %5d par   %5.2fs   %5.2fs     %3.4fs    %3.4fs\n",
                            gen->no,
                            gen->collections,
                            gen->par_collections,
Simon Marlow's avatar
Simon Marlow committed
681 682 683 684
                            TimeToSecondsDbl(GC_coll_cpu[g]),
                            TimeToSecondsDbl(GC_coll_elapsed[g]),
                            gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
                            TimeToSecondsDbl(GC_coll_max_pause[g]));
Simon Marlow's avatar
Simon Marlow committed
685
            }
686

687
#if defined(THREADED_RTS)
688 689 690 691
            if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
                statsPrintf("\n  Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n", 
                            100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
                                / (n_capabilities - 1)
692 693 694
                    );
            }
#endif
Simon Marlow's avatar
Simon Marlow committed
695
            statsPrintf("\n");
696

697
#if defined(THREADED_RTS)
698 699 700 701
            statsPrintf("  TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
                        taskCount, taskCount - workerCount,
                        peakWorkerCount, workerCount,
                        n_capabilities);
702

703 704
	    statsPrintf("\n");

705 706
            {
                nat i;
707
                SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
708
                for (i = 0; i < n_capabilities; i++) {
709 710
                    sparks.created   += capabilities[i].spark_stats.created;
                    sparks.dud       += capabilities[i].spark_stats.dud;
711
                    sparks.overflowed+= capabilities[i].spark_stats.overflowed;
712 713 714
                    sparks.converted += capabilities[i].spark_stats.converted;
                    sparks.gcd       += capabilities[i].spark_stats.gcd;
                    sparks.fizzled   += capabilities[i].spark_stats.fizzled;
715 716
                }

717 718 719
                statsPrintf("  SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n",
                            sparks.created + sparks.dud + sparks.overflowed,
                            sparks.converted, sparks.overflowed, sparks.dud,
720
                            sparks.gcd, sparks.fizzled);
721
            }
722 723
#endif

Simon Marlow's avatar
Simon Marlow committed
724
	    statsPrintf("  INIT    time  %6.2fs  (%6.2fs elapsed)\n",
Simon Marlow's avatar
Simon Marlow committed
725
                        TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
Simon Marlow's avatar
Simon Marlow committed
726 727

            statsPrintf("  MUT     time  %6.2fs  (%6.2fs elapsed)\n",
Simon Marlow's avatar
Simon Marlow committed
728
                        TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
Simon Marlow's avatar
Simon Marlow committed
729
            statsPrintf("  GC      time  %6.2fs  (%6.2fs elapsed)\n",
Simon Marlow's avatar
Simon Marlow committed
730
                        TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
Simon Marlow's avatar
Simon Marlow committed
731

732
#ifdef PROFILING
Simon Marlow's avatar
Simon Marlow committed
733
	    statsPrintf("  RP      time  %6.2fs  (%6.2fs elapsed)\n",
Simon Marlow's avatar
Simon Marlow committed
734
		    TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
Simon Marlow's avatar
Simon Marlow committed
735
	    statsPrintf("  PROF    time  %6.2fs  (%6.2fs elapsed)\n",
Simon Marlow's avatar
Simon Marlow committed
736
		    TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
737
#endif 
Simon Marlow's avatar
Simon Marlow committed
738
	    statsPrintf("  EXIT    time  %6.2fs  (%6.2fs elapsed)\n",
Simon Marlow's avatar
Simon Marlow committed
739
		    TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
Simon Marlow's avatar
Simon Marlow committed
740
	    statsPrintf("  Total   time  %6.2fs  (%6.2fs elapsed)\n\n",
Simon Marlow's avatar
Simon Marlow committed
741
		    TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
Simon Marlow's avatar
Simon Marlow committed
742 743
#ifndef THREADED_RTS
	    statsPrintf("  %%GC     time     %5.1f%%  (%.1f%% elapsed)\n\n",
Simon Marlow's avatar
Simon Marlow committed
744 745
		    TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
		    TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
Simon Marlow's avatar
Simon Marlow committed
746 747
#endif

Simon Marlow's avatar
Simon Marlow committed
748
            if (mut_cpu == 0) {
Ian Lynagh's avatar
Ian Lynagh committed
749
		showStgWord64(0, temp, rtsTrue/*commas*/);
Simon Marlow's avatar
Simon Marlow committed
750
            } else {
Ian Lynagh's avatar
Ian Lynagh committed
751
		showStgWord64(
Simon Marlow's avatar
Simon Marlow committed
752 753 754 755
                    (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
                    temp, rtsTrue/*commas*/);
            }

756
	    statsPrintf("  Alloc rate    %s bytes per MUT second\n\n", temp);
757
	
758
	    statsPrintf("  Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
Simon Marlow's avatar
Simon Marlow committed
759
                    TimeToSecondsDbl(tot_cpu - gc_cpu -
Simon Marlow's avatar
Simon Marlow committed
760
				PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 
Simon Marlow's avatar
Simon Marlow committed
761
		    / TimeToSecondsDbl(tot_cpu), 
Simon Marlow's avatar
Simon Marlow committed
762 763 764
                    TimeToSecondsDbl(tot_cpu - gc_cpu -
                                PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
                    / TimeToSecondsDbl(tot_elapsed));
Simon Marlow's avatar
Simon Marlow committed
765 766 767 768 769 770 771 772 773

            /*
            TICK_PRINT(1);
            TICK_PRINT(2);
	    REPORT(TOTAL_CALLS);
            TICK_PRINT_TOT(1);
            TICK_PRINT_TOT(2);
            */

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
774
#if USE_PAPI
775
            papi_stats_report();
776 777 778
#endif
#if defined(THREADED_RTS) && defined(PROF_SPIN)
            {
Simon Marlow's avatar
Simon Marlow committed
779
                nat g;
780 781
                
                statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
782
                statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
783
                for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
784
                    statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
785 786
                }
            }
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
787
#endif
788 789
	}

790
	if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
Ian Lynagh's avatar
Ian Lynagh committed
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
      char *fmt1, *fmt2;
      if (RtsFlags.MiscFlags.machineReadable) {
          fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
          fmt2 = " ,(\"num_GCs\", \"%d\")\n"
                 " ,(\"average_bytes_used\", \"%ld\")\n"
                 " ,(\"max_bytes_used\", \"%ld\")\n"
                 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
                 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
                 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
                 " ,(\"init_wall_seconds\", \"%.2f\")\n"
                 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
                 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
                 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
                 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
                 " ]\n";
      }
      else {
          fmt1 = "<<ghc: %llu bytes, ";
          fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
      }
rrt's avatar
rrt committed
811
	  /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
Ian Lynagh's avatar
Ian Lynagh committed
812
	  statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
Ian Lynagh's avatar
Ian Lynagh committed
813
	  statsPrintf(fmt2,
rrt's avatar
rrt committed
814
		    total_collections,
Simon Marlow's avatar
Simon Marlow committed
815
		    residency_samples == 0 ? 0 : 
816
		        cumulative_residency*sizeof(W_)/residency_samples, 
Simon Marlow's avatar
Simon Marlow committed
817 818
		    max_residency*sizeof(W_), 
		    residency_samples,
819
		    (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
Simon Marlow's avatar
Simon Marlow committed
820 821 822
		    TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
		    TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
		    TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
823
	}
824

825 826
	statsFlush();
	statsClose();
827
    }
simonmarhaskell@gmail.com's avatar
simonmarhaskell@gmail.com committed
828

Simon Marlow's avatar
Simon Marlow committed
829
    if (GC_coll_cpu) {
Simon Marlow's avatar
Simon Marlow committed
830
      stgFree(GC_coll_cpu);
Simon Marlow's avatar
Simon Marlow committed
831 832 833
      GC_coll_cpu = NULL;
    }
    if (GC_coll_elapsed) {
Simon Marlow's avatar
Simon Marlow committed
834
      stgFree(GC_coll_elapsed);
Simon Marlow's avatar
Simon Marlow committed
835 836 837 838 839 840
      GC_coll_elapsed = NULL;
    }
    if (GC_coll_max_pause) {
      stgFree(GC_coll_max_pause);
      GC_coll_max_pause = NULL;
    }
841
}
842 843 844 845 846 847 848

/* -----------------------------------------------------------------------------
   stat_describe_gens

   Produce some detailed info on the state of the generational GC.
   -------------------------------------------------------------------------- */
void
849
statDescribeGens(void)
850
{
851
  nat g, mut, lge, i;
Simon Marlow's avatar
Simon Marlow committed
852
  lnat gen_slop;
853
  lnat tot_live, tot_slop;
Simon Marlow's avatar
Simon Marlow committed
854
  lnat gen_live, gen_blocks;
855
  bdescr *bd;
Simon Marlow's avatar
Simon Marlow committed
856 857
  generation *gen;
  
858
  debugBelch(
Simon Marlow's avatar
Simon Marlow committed
859 860 861 862
"----------------------------------------------------------\n"
"  Gen     Max  Mut-list  Blocks    Large     Live     Slop\n"
"       Blocks     Bytes          Objects                  \n"
"----------------------------------------------------------\n");
863

864 865
  tot_live = 0;
  tot_slop = 0;
Simon Marlow's avatar
Simon Marlow committed
866

867
  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
Simon Marlow's avatar
Simon Marlow committed
868 869 870 871 872 873 874 875 876
      gen = &generations[g];

      for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
          lge++;
      }

      gen_live   = genLiveWords(gen);
      gen_blocks = genLiveBlocks(gen);

877
      mut = 0;
878 879
      for (i = 0; i < n_capabilities; i++) {
          mut += countOccupied(capabilities[i].mut_lists[g]);
880 881 882 883 884 885 886 887

          // Add the pinned object block.
          bd = capabilities[i].pinned_object_block;
          if (bd != NULL) {
              gen_live   += bd->free - bd->start;
              gen_blocks += bd->blocks;
          }

Simon Marlow's avatar
Simon Marlow committed
888 889
          gen_live   += gcThreadLiveWords(i,g);
          gen_blocks += gcThreadLiveBlocks(i,g);
890 891
      }

892
      debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
Simon Marlow's avatar
Simon Marlow committed
893

Simon Marlow's avatar
Simon Marlow committed
894 895 896 897 898 899
      gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;

      debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
                 gen_live*sizeof(W_), gen_slop*sizeof(W_));
      tot_live += gen_live;
      tot_slop += gen_slop;
900
  }
Simon Marlow's avatar
Simon Marlow committed
901 902 903
  debugBelch("----------------------------------------------------------\n");
  debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
  debugBelch("----------------------------------------------------------\n");
904
  debugBelch("\n");
905
}
906 907 908 909 910 911

/* -----------------------------------------------------------------------------
   Stats available via a programmatic interface, so eg. GHCi can time
   each compilation and expression evaluation.
   -------------------------------------------------------------------------- */

912
extern HsInt64 getAllocations( void ) 
913
{ return (HsInt64)GC_tot_alloc * sizeof(W_); }
914

915 916 917 918 919 920
/* EZY: I'm not convinced I got all the casting right. */

extern void getGCStats( GCStats *s )
{
    nat total_collections = 0;
    nat g;
Simon Marlow's avatar
Simon Marlow committed
921 922 923 924
    Time gc_cpu = 0;
    Time gc_elapsed = 0;
    Time current_elapsed = 0;
    Time current_cpu = 0;
925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947

    getProcessTimes(&current_cpu, &current_elapsed);

    /* EZY: static inline'ify these */
    for (g = 0; g < RtsFlags.GcFlags.generations; g++)
        total_collections += generations[g].collections;

    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
        gc_cpu     += GC_coll_cpu[g];
        gc_elapsed += GC_coll_elapsed[g];
    }

    s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
    s->num_gcs = total_collections;
    s->num_byte_usage_samples = residency_samples;
    s->max_bytes_used = max_residency*sizeof(W_);
    s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
    s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
    s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
    s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
    s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
    s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
    /*
Simon Marlow's avatar
Simon Marlow committed
948 949
    s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
    s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
950
    */
Simon Marlow's avatar
Simon Marlow committed
951 952 953 954
    s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
    s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
    s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
    s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
955
    /* EZY: Being consistent with incremental output, but maybe should also discount init */
Simon Marlow's avatar
Simon Marlow committed
956 957
    s->cpu_seconds = TimeToSecondsDbl(current_cpu);
    s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
958
    s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
959 960 961 962 963