Profiling.c 23.4 KB
Newer Older
1
/* -----------------------------------------------------------------------------
sof's avatar
sof committed
2
 * $Id: Profiling.c,v 1.37 2003/08/22 22:24:13 sof Exp $
3
 *
4
 * (c) The GHC Team, 1998-2000
5
6
7
8
9
10
11
 *
 * Support for profiling
 *
 * ---------------------------------------------------------------------------*/

#ifdef PROFILING

12
#include "PosixSource.h"
13
14
15
#include "Rts.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
16
#include "Profiling.h"
17
18
#include "Storage.h"
#include "Proftimer.h"
sof's avatar
sof committed
19
#include "Timer.h"
20
#include "ProfHeap.h"
21
#include "Arena.h"
22
23
#include "RetainerProfile.h"
#include "LdvProfile.h"
24

25
26
#include <string.h>

27
28
29
30
/*
 * Profiling allocation arena.
 */
Arena *prof_arena;
31
32
33
34
35
36
37
38
39
40
41
42

/*
 * Global variables used to assign unique IDs to cc's, ccs's, and 
 * closure_cats
 */

unsigned int CC_ID;
unsigned int CCS_ID;
unsigned int HP_ID;

/* figures for the profiling report.
 */
43
44
static ullong total_alloc;
static lnat   total_prof_ticks;
45

46
/* Globals for opening the profiling log file(s)
47
48
 */
static char *prof_filename; /* prof report file name = <program>.prof */
49
FILE *prof_file;
50

51
52
53
static char *hp_filename;	/* heap profile (hp2ps style) log file */
FILE *hp_file;

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/* The Current Cost Centre Stack (for attributing costs)
 */
CostCentreStack *CCCS;

/* Linked lists to keep track of cc's and ccs's that haven't
 * been declared in the log file yet
 */
CostCentre *CC_LIST;
CostCentreStack *CCS_LIST;

/*
 * Built-in cost centres and cost-centre stacks:
 *
 *    MAIN   is the root of the cost-centre stack tree.  If there are
 *           no _scc_s in the program, all costs will be attributed
 *           to MAIN.
 *
 *    SYSTEM is the RTS in general (scheduler, etc.).  All costs for
 *           RTS operations apart from garbage collection are attributed
 *           to SYSTEM.
 *
 *    GC     is the storage manager / garbage collector.
 *
 *    OVERHEAD gets all costs generated by the profiling system
 *           itself.  These are costs that would not be incurred
 *           during non-profiled execution of the program.
 *
 *    SUBSUMED is the one-and-only CCS placed on top-level functions. 
 *           It indicates that all costs are to be attributed to the
 *           enclosing cost centre stack.  SUBSUMED never accumulates
84
85
 *           any costs.  The is_caf flag is set on the subsumed cost
 *           centre.
86
87
88
89
90
 *
 *    DONT_CARE is a placeholder cost-centre we assign to static
 *           constructors.  It should *never* accumulate any costs.
 */

91
92
93
94
95
96
CC_DECLARE(CC_MAIN,      "MAIN", 	"MAIN",      CC_IS_BORING, );
CC_DECLARE(CC_SYSTEM,    "SYSTEM",   	"MAIN",      CC_IS_BORING, );
CC_DECLARE(CC_GC,        "GC",   	"GC",        CC_IS_BORING, );
CC_DECLARE(CC_OVERHEAD,  "OVERHEAD_of", "PROFILING", CC_IS_CAF,    );
CC_DECLARE(CC_SUBSUMED,  "SUBSUMED",    "MAIN",      CC_IS_CAF,    );
CC_DECLARE(CC_DONT_CARE, "DONT_CARE",   "MAIN",      CC_IS_BORING, );
97

98
99
100
101
102
103
CCS_DECLARE(CCS_MAIN, 	    CC_MAIN,       );
CCS_DECLARE(CCS_SYSTEM,	    CC_SYSTEM,     );
CCS_DECLARE(CCS_GC,         CC_GC,         );
CCS_DECLARE(CCS_OVERHEAD,   CC_OVERHEAD,   );
CCS_DECLARE(CCS_SUBSUMED,   CC_SUBSUMED,   );
CCS_DECLARE(CCS_DONT_CARE,  CC_DONT_CARE, );
104

105
106
107
108
109
110
111
112
113
114
/* 
 * Uniques for the XML log-file format
 */
#define CC_UQ         1
#define CCS_UQ        2
#define TC_UQ         3
#define HEAP_OBJ_UQ   4
#define TIME_UPD_UQ   5
#define HEAP_UPD_UQ   6

115
116
117
118
/* 
 * Static Functions
 */

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
static  CostCentreStack * ActualPush_     ( CostCentreStack *ccs, CostCentre *cc, 
					    CostCentreStack *new_ccs );
static  rtsBool           ccs_to_ignore   ( CostCentreStack *ccs );
static  void              count_ticks     ( CostCentreStack *ccs );
static  void              inherit_costs   ( CostCentreStack *ccs );
static  void              reportCCS       ( CostCentreStack *ccs, nat indent );
static  void              DecCCS          ( CostCentreStack *ccs );
static  void              DecBackEdge     ( CostCentreStack *ccs, 
					    CostCentreStack *oldccs );
static  CostCentreStack * CheckLoop       ( CostCentreStack *ccs, CostCentre *cc );
static  CostCentreStack * pruneCCSTree    ( CostCentreStack *ccs );
static  CostCentreStack * ActualPush      ( CostCentreStack *, CostCentre * );
static  CostCentreStack * IsInIndexTable  ( IndexTable *, CostCentre * );
static  IndexTable *      AddToIndexTable ( IndexTable *, CostCentreStack *, 
					    CostCentre *, unsigned int );
134
static  void              ccsSetSelected  ( CostCentreStack *ccs );
135

136
137
static  void              initTimeProfiling   ( void );
static  void              initProfilingLogFile( void );
andy's avatar
andy committed
138

139
static  void              reportCCS_XML       ( CostCentreStack *ccs );
140
141
142
143
144
145

/* -----------------------------------------------------------------------------
   Initialise the profiling environment
   -------------------------------------------------------------------------- */

void
146
initProfiling1 (void)
147
{
148
149
150
  // initialise our arena
  prof_arena = newArena();

151
152
  /* for the benefit of allocate()... */
  CCCS = CCS_SYSTEM;
153
  
154
  /* Initialize counters for IDs */
155
156
157
  CC_ID  = 1;
  CCS_ID = 1;
  HP_ID  = 1;
158
159
160
161
162
163
164
165
166
167
168
169
170
  
  /* Initialize Declaration lists to NULL */
  CC_LIST  = NULL;
  CCS_LIST = NULL;

  /* Register all the cost centres / stacks in the program 
   * CC_MAIN gets link = 0, all others have non-zero link.
   */
  REGISTER_CC(CC_MAIN);
  REGISTER_CC(CC_SYSTEM);
  REGISTER_CC(CC_GC);
  REGISTER_CC(CC_OVERHEAD);
  REGISTER_CC(CC_SUBSUMED);
171
  REGISTER_CC(CC_DONT_CARE);
172
173
174
175
176
  REGISTER_CCS(CCS_MAIN);
  REGISTER_CCS(CCS_SYSTEM);
  REGISTER_CCS(CCS_GC);
  REGISTER_CCS(CCS_OVERHEAD);
  REGISTER_CCS(CCS_SUBSUMED);
177
  REGISTER_CCS(CCS_DONT_CARE);
178
179

  CCCS = CCS_OVERHEAD;
180
181
182
183
184
185
186
187
188
189
190

  /* cost centres are registered by the per-module 
   * initialisation code now... 
   */
}

void
initProfiling2 (void)
{
  CostCentreStack *ccs, *next;

191
192
  CCCS = CCS_SYSTEM;

193
  /* Set up the log file, and dump the header and cost centre
194
   * information into it.  */
195
196
  initProfilingLogFile();

197
198
199
200
  /* find all the "special" cost centre stacks, and make them children
   * of CCS_MAIN.
   */
  ASSERT(CCS_MAIN->prevStack == 0);
201
  CCS_MAIN->root = CC_MAIN;
202
  ccsSetSelected(CCS_MAIN);
203
  DecCCS(CCS_MAIN);
204

205
206
207
208
  for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
    next = ccs->prevStack;
    ccs->prevStack = 0;
    ActualPush_(CCS_MAIN,ccs->cc,ccs);
209
    ccs->root = ccs->cc;
210
211
212
    ccs = next;
  }
  
213
214
215
216
217
218
219
220
  if (RtsFlags.CcFlags.doCostCentres) {
    initTimeProfiling();
  }

  if (RtsFlags.ProfFlags.doHeapProfile) {
    initHeapProfiling();
  }
}
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259

// Decide whether closures with this CCS should contribute to the heap
// profile.
static void 
ccsSetSelected( CostCentreStack *ccs )
{
    if (RtsFlags.ProfFlags.modSelector) {
	if (! strMatchesSelector( ccs->cc->module,
				  RtsFlags.ProfFlags.modSelector ) ) {
	    ccs->selected = 0;
	    return;
	}
    }
    if (RtsFlags.ProfFlags.ccSelector) {
	if (! strMatchesSelector( ccs->cc->label,
				  RtsFlags.ProfFlags.ccSelector ) ) {
	    ccs->selected = 0;
	    return;
	}
    }
    if (RtsFlags.ProfFlags.ccsSelector) {
	CostCentreStack *c;
	for (c = ccs; c != NULL; c = c->prevStack) {
	    if ( strMatchesSelector( c->cc->label,
				     RtsFlags.ProfFlags.ccsSelector )) {
		break; 
	    }
	}
	if (c == NULL) {
	    ccs->selected = 0;
	    return;
	}
    }

    ccs->selected = 1;
    return;
}


260
261
static void
initProfilingLogFile(void)
262
{
263
    /* Initialise the log file name */
sof's avatar
sof committed
264
265
    prof_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
    sprintf(prof_filename, "%s.prof", prog_name);
266
267
268
269
270

    /* open the log file */
    if ((prof_file = fopen(prof_filename, "w")) == NULL) {
	fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
	RtsFlags.CcFlags.doCostCentres = 0;
271
272
        // The following line was added by Sung; retainer/LDV profiling may need
        // two output files, i.e., <program>.prof/hp.
273
        if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
274
            RtsFlags.ProfFlags.doHeapProfile = 0;
275
276
	return;
    }
277

278
279
280
281
282
283
284
285
286
287
288
289
290
291
    if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
	/* dump the time, and the profiling interval */
	fprintf(prof_file, "\"%s\"\n", time_str());
	fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
	
	/* declare all the cost centres */
	{
	    CostCentre *cc;
	    for (cc = CC_LIST; cc != NULL; cc = cc->link) {
		fprintf(prof_file, "%d %d \"%s\" \"%s\"\n",
			CC_UQ, cc->ccID, cc->label, cc->module);
	    }
	}
    }
292
    
293
294
    if (RtsFlags.ProfFlags.doHeapProfile) {
	/* Initialise the log file name */
sof's avatar
sof committed
295
296
	hp_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
	sprintf(hp_filename, "%s.hp", prog_name);
297
298
299
300
301
302
303
304
	
	/* open the log file */
	if ((hp_file = fopen(hp_filename, "w")) == NULL) {
	    fprintf(stderr, "Can't open profiling report file %s\n", 
		    hp_filename);
	    RtsFlags.ProfFlags.doHeapProfile = 0;
	    return;
	}
305
306
307
308
309
310
    }
}

void
initTimeProfiling(void)
{
311
  /* Start ticking */
312
313
314
315
316
317
  startProfTimer();
};

void 
endProfiling ( void )
{
318
319
320
321
322
323
  if (RtsFlags.CcFlags.doCostCentres) {
    stopProfTimer();
  }
  if (RtsFlags.ProfFlags.doHeapProfile) {
    endHeapProfiling();
  }
324
325
326
}

/* -----------------------------------------------------------------------------
327
   Set cost centre stack when entering a function.
328
329
330
331
332
333
334
335
336
337
338
339
   -------------------------------------------------------------------------- */
rtsBool entering_PAP;

CostCentreStack *
EnterFunCCS ( CostCentreStack *cccs, CostCentreStack *ccsfn )
{
  /* PAP_entry has already set CCCS for us */
  if (entering_PAP) {
    entering_PAP = rtsFalse;
    return CCCS;
  }

340
  if (ccsfn->root->is_caf == CC_IS_CAF) {
341
    return AppendCCS(cccs,ccsfn);
342
343
  } else {
    return ccsfn;
344
345
346
  }
}

347
348
349
350
/* -----------------------------------------------------------------------------
   Cost-centre stack manipulation
   -------------------------------------------------------------------------- */

351
352
353
354
355
356
357
358
#ifdef DEBUG
CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
CostCentreStack *
PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
#define PushCostCentre _PushCostCentre
{
  IF_DEBUG(prof, 
	   fprintf(stderr,"Pushing %s on ", cc->label);
359
	   fprintCCS(stderr,ccs);
360
361
362
363
364
	   fprintf(stderr,"\n"));
  return PushCostCentre(ccs,cc);
}
#endif

365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
CostCentreStack *
PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
{
  CostCentreStack *temp_ccs;
  
  if (ccs == EMPTY_STACK)
    return ActualPush(ccs,cc);
  else {
    if (ccs->cc == cc)
      return ccs;
    else {
      /* check if we've already memoized this stack */
      temp_ccs = IsInIndexTable(ccs->indexTable,cc);
      
      if (temp_ccs != EMPTY_STACK)
	return temp_ccs;
      else {
382
383
384
385
386
387
388
389
	temp_ccs = CheckLoop(ccs,cc);
	if (temp_ccs != NULL) {
	  /* we have recursed to an older CCS.  Mark this in
	   * the index table, and emit a "back edge" into the
	   * log file.
	   */
	  ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
	  DecBackEdge(temp_ccs,ccs);
390
	  return temp_ccs;
391
	} else {
392
	  return ActualPush(ccs,cc);
393
	}
394
395
396
397
398
      }
    }
  }
}

399
400
401
402
403
404
405
406
407
408
409
static CostCentreStack *
CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
{
  while (ccs != EMPTY_STACK) {
    if (ccs->cc == cc)
      return ccs;
    ccs = ccs->prevStack;
  }
  return NULL;
}

410
411
412
413
414
415
416
417
418
/* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */

#ifdef DEBUG
CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
CostCentreStack *
AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
#define AppendCCS _AppendCCS
{
  IF_DEBUG(prof, 
419
420
	   if (ccs1 != ccs2) {
	     fprintf(stderr,"Appending ");
421
	     fprintCCS(stderr,ccs1);
422
	     fprintf(stderr," to ");
423
	     fprintCCS(stderr,ccs2);
424
	     fprintf(stderr,"\n");});
425
426
427
428
429
430
431
  return AppendCCS(ccs1,ccs2);
}
#endif

CostCentreStack *
AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
{
432
  CostCentreStack *ccs = NULL;
433
434
435
436
437

  if (ccs1 == ccs2) {
    return ccs1;
  }

438
  if (ccs2->cc->is_caf == CC_IS_CAF) {
439
440
441
    return ccs1;
  }
  
442
443
444
445
  if (ccs2->prevStack != NULL) {
    ccs = AppendCCS(ccs1, ccs2->prevStack);
  }

446
447
  return PushCostCentre(ccs,ccs2->cc);
}
448

449
static CostCentreStack *
450
451
452
453
454
ActualPush ( CostCentreStack *ccs, CostCentre *cc )
{
  CostCentreStack *new_ccs;
  
  /* allocate space for a new CostCentreStack */
455
  new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
  
  return ActualPush_(ccs, cc, new_ccs);
}

static CostCentreStack *
ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
{
  /* assign values to each member of the structure */
  ASSIGN_CCS_ID(new_ccs->ccsID);
  
  new_ccs->cc = cc;
  new_ccs->prevStack = ccs;
  
  new_ccs->indexTable = EMPTY_TABLE;
  
  /* Initialise the various _scc_ counters to zero
   */
  new_ccs->scc_count        = 0;
  
  /* Initialize all other stats here.  There should be a quick way
   * that's easily used elsewhere too 
   */
  new_ccs->time_ticks = 0;
  new_ccs->mem_alloc = 0;
480
481
  new_ccs->inherited_ticks = 0;
  new_ccs->inherited_alloc = 0;
482
  
483
484
  new_ccs->root = ccs->root;

485
486
487
  // Set the selected field.
  ccsSetSelected(new_ccs);

488
489
  /* update the memoization table for the parent stack */
  if (ccs != EMPTY_STACK)
490
491
    ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc, 
				      0/*not a back edge*/);
492
  
493
  /* make sure this CC is declared at the next heap/time sample */
494
495
496
497
498
499
500
  DecCCS(new_ccs);
  
  /* return a pointer to the new stack */
  return new_ccs;
}


501
static CostCentreStack *
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
IsInIndexTable(IndexTable *it, CostCentre *cc)
{
  while (it!=EMPTY_TABLE)
    {
      if (it->cc==cc)
	return it->ccs;
      else
	it = it->next;
    }
  
  /* otherwise we never found it so return EMPTY_TABLE */
  return EMPTY_TABLE;
}


517
518
519
static IndexTable *
AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, 
		CostCentre *cc, unsigned int back_edge)
520
521
522
{
  IndexTable *new_it;
  
523
  new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
524
525
526
527
  
  new_it->cc = cc;
  new_it->ccs = new_ccs;
  new_it->next = it;
528
  new_it->back_edge = back_edge;
529
530
531
532
533
534
535
  return new_it;
}


static void
DecCCS(CostCentreStack *ccs)
{
536
537
538
539
540
541
542
543
  if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
    if (ccs->prevStack == EMPTY_STACK)
      fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ, 
	      ccs->ccsID, ccs->cc->ccID);
    else
      fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ, 
	      ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
  }
544
545
}

546
547
548
549
550
551
552
553
554
555
556
557
558
static void
DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
{
  if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
    if (ccs->prevStack == EMPTY_STACK)
      fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ, 
	      ccs->ccsID, ccs->cc->ccID);
    else
      fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ, 
	      ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
  }
}

559
560
561
562
/* -----------------------------------------------------------------------------
   Generating a time & allocation profiling report.
   -------------------------------------------------------------------------- */

563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
/* We omit certain system-related CCs and CCSs from the default
 * reports, so as not to cause confusion.
 */
static rtsBool
cc_to_ignore (CostCentre *cc)
{
    if (    cc == CC_OVERHEAD 
	 || cc == CC_DONT_CARE
	 || cc == CC_GC 
	 || cc == CC_SYSTEM) {
	return rtsTrue;
    } else {
	return rtsFalse;
    }
}

static rtsBool
ccs_to_ignore (CostCentreStack *ccs)
{
    if (    ccs == CCS_OVERHEAD 
	 || ccs == CCS_DONT_CARE
	 || ccs == CCS_GC 
	 || ccs == CCS_SYSTEM) {
	return rtsTrue;
    } else {
	return rtsFalse;
    }
}

592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
/* -----------------------------------------------------------------------------
   Generating the aggregated per-cost-centre time/alloc report.
   -------------------------------------------------------------------------- */

static CostCentre *sorted_cc_list;

static void
aggregate_cc_costs( CostCentreStack *ccs )
{
  IndexTable *i;

  ccs->cc->mem_alloc += ccs->mem_alloc;
  ccs->cc->time_ticks += ccs->time_ticks;

  for (i = ccs->indexTable; i != 0; i = i->next) {
607
608
609
    if (!i->back_edge) {
      aggregate_cc_costs(i->ccs);
    }
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
  }
}

static void
insert_cc_in_sorted_list( CostCentre *new_cc )
{
  CostCentre **prev, *cc;

  prev = &sorted_cc_list;
  for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
    if (new_cc->time_ticks > cc->time_ticks) {
      new_cc->link = cc;
      *prev = new_cc;
      return;
    } else {
      prev = &(cc->link);
    }
  }
  new_cc->link = NULL;
  *prev = new_cc;
}

static void
report_per_cc_costs( void )
{
  CostCentre *cc, *next;

  aggregate_cc_costs(CCS_MAIN);
  sorted_cc_list = NULL;

  for (cc = CC_LIST; cc != NULL; cc = next) {
    next = cc->link;
    if (cc->time_ticks > total_prof_ticks/100
andy's avatar
andy committed
643
644
	|| cc->mem_alloc > total_alloc/100
	|| RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
645
646
647
648
      insert_cc_in_sorted_list(cc);
    }
  }
  
649
  fprintf(prof_file, "%-30s %-20s", "COST CENTRE", "MODULE");  
650
651
652
653
654
655
656
  fprintf(prof_file, "%6s %6s", "%time", "%alloc");
  if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
    fprintf(prof_file, "  %5s %9s", "ticks", "bytes");
  }
  fprintf(prof_file, "\n\n");

  for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
657
658
659
      if (cc_to_ignore(cc)) {
	  continue;
      }
660
      fprintf(prof_file, "%-30s %-20s", cc->label, cc->module);
661
662
663
664
665
666
667
668
669
670
      fprintf(prof_file, "%6.1f %6.1f",
	      total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
	      total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
					total_alloc * 100)
	  );
      
      if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
	  fprintf(prof_file, "  %5ld %9lld", cc->time_ticks, cc->mem_alloc);
      }
      fprintf(prof_file, "\n");
671
672
673
674
675
676
677
678
679
680
681
682
  }

  fprintf(prof_file,"\n\n");
}

/* -----------------------------------------------------------------------------
   Generate the cost-centre-stack time/alloc report
   -------------------------------------------------------------------------- */

static void 
fprint_header( void )
{
683
  fprintf(prof_file, "%-24s %-10s                                                            individual    inherited\n", "", "");
684

685
686
  fprintf(prof_file, "%-24s %-50s", "COST CENTRE", "MODULE");  
  fprintf(prof_file, "%6s %10s  %5s %5s   %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
687
688
689
690
691
692
693
694
695
696
697
698

  if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
    fprintf(prof_file, "  %5s %9s", "ticks", "bytes");
#if defined(PROFILING_DETAIL_COUNTS)
    fprintf(prof_file, "  %8s %8s %8s %8s %8s %8s %8s",
	    "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
#endif
  }

  fprintf(prof_file, "\n\n");
}

699
void
700
reportCCSProfiling( void )
701
702
703
704
705
706
{
    nat count;
    char temp[128]; /* sigh: magic constant */

    stopProfTimer();

707
    total_prof_ticks = 0;
708
709
710
    total_alloc = 0;
    count_ticks(CCS_MAIN);
    
711
712
713
714
715
716
717
    switch (RtsFlags.CcFlags.doCostCentres) {
    case 0:
      return;
    case COST_CENTRES_XML:
      gen_XML_logfile();
      return;
    default:
718
      break;
719
    }
720

721
722
723
724
    fprintf(prof_file, "\t%s Time and Allocation Profiling Report  (%s)\n", 
	    time_str(), "Final");

    fprintf(prof_file, "\n\t  ");
sof's avatar
sof committed
725
    fprintf(prof_file, " %s", prog_name);
726
727
728
729
730
731
732
733
734
    fprintf(prof_file, " +RTS");
    for (count = 0; rts_argv[count]; count++)
	fprintf(prof_file, " %s", rts_argv[count]);
    fprintf(prof_file, " -RTS");
    for (count = 1; prog_argv[count]; count++)
	fprintf(prof_file, " %s", prog_argv[count]);
    fprintf(prof_file, "\n\n");

    fprintf(prof_file, "\ttotal time  = %11.2f secs   (%lu ticks @ %d ms)\n",
735
736
	    total_prof_ticks / (StgFloat) TICK_FREQUENCY, 
	    total_prof_ticks, TICK_MILLISECS);
737
738

    fprintf(prof_file, "\ttotal alloc = %11s bytes",
739
	    ullong_format_string(total_alloc * sizeof(W_),
740
741
742
743
744
745
746
				 temp, rtsTrue/*commas*/));

#if defined(PROFILING_DETAIL_COUNTS)
    fprintf(prof_file, "  (%lu closures)", total_allocs);
#endif
    fprintf(prof_file, "  (excludes profiling overheads)\n\n");

747
    report_per_cc_costs();
748

749
750
    inherit_costs(CCS_MAIN);

751
    fprint_header();
752
    reportCCS(pruneCCSTree(CCS_MAIN), 0);
753
754
755
756
757
758
759
760
761
762
763
764
}

static void 
reportCCS(CostCentreStack *ccs, nat indent)
{
  CostCentre *cc;
  IndexTable *i;

  cc = ccs->cc;
  
  /* Only print cost centres with non 0 data ! */
  
765
766
767
768
769
  if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
       ! ccs_to_ignore(ccs))
	/* force printing of *all* cost centres if -P -P */ 
    {

770
    fprintf(prof_file, "%-*s%-*s %-50s", 
771
	    indent, "", 24-indent, cc->label, cc->module);
772

773
774
775
776
777
778
    fprintf(prof_file, "%6d %11.0f %5.1f  %5.1f   %5.1f  %5.1f",
	    ccs->ccsID, (double) ccs->scc_count, 
	    total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
	    total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
	    total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
	    total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
779
780
	    );

781
    if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
782
      fprintf(prof_file, "  %5ld %9lld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
783
784
785
786
787
788
789
790
791
792
793
794
#if defined(PROFILING_DETAIL_COUNTS)
      fprintf(prof_file, "  %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
	      ccs->mem_allocs, ccs->thunk_count,
	      ccs->function_count, ccs->pap_count,
	      ccs->subsumed_fun_count,	ccs->subsumed_caf_count,
	      ccs->caffun_subsumed);
#endif
    }
    fprintf(prof_file, "\n");
  }

  for (i = ccs->indexTable; i != 0; i = i->next) {
795
796
797
    if (!i->back_edge) {
      reportCCS(i->ccs, indent+1);
    }
798
799
800
  }
}

andy's avatar
andy committed
801

802
803
804
805
806
807
808
809
810
811
/* Traverse the cost centre stack tree and accumulate
 * ticks/allocations.
 */
static void
count_ticks(CostCentreStack *ccs)
{
  IndexTable *i;
  
  if (!ccs_to_ignore(ccs)) {
    total_alloc += ccs->mem_alloc;
812
    total_prof_ticks += ccs->time_ticks;
813
814
  }
  for (i = ccs->indexTable; i != NULL; i = i->next)
815
816
817
    if (!i->back_edge) {
      count_ticks(i->ccs);
    }
818
819
}

820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
/* Traverse the cost centre stack tree and inherit ticks & allocs.
 */
static void
inherit_costs(CostCentreStack *ccs)
{
  IndexTable *i;

  if (ccs_to_ignore(ccs)) { return; }

  ccs->inherited_ticks += ccs->time_ticks;
  ccs->inherited_alloc += ccs->mem_alloc;

  for (i = ccs->indexTable; i != NULL; i = i->next)
      if (!i->back_edge) {
	  inherit_costs(i->ccs);
	  ccs->inherited_ticks += i->ccs->inherited_ticks;
	  ccs->inherited_alloc += i->ccs->inherited_alloc;
      }
  
  return;
}

842
843
844
845
846
847
848
849
static CostCentreStack *
pruneCCSTree( CostCentreStack *ccs )
{
  CostCentreStack *ccs1;
  IndexTable *i, **prev;
  
  prev = &ccs->indexTable;
  for (i = ccs->indexTable; i != 0; i = i->next) {
850
851
    if (i->back_edge) { continue; }

852
853
854
855
856
857
858
859
860
861
862
863
    ccs1 = pruneCCSTree(i->ccs);
    if (ccs1 == NULL) {
      *prev = i->next;
    } else {
      prev = &(i->next);
    }
  }

  if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
	/* force printing of *all* cost centres if -P -P */ )
       
       || ( ccs->indexTable != 0 )
864
865
866
       || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
      ) {
      return ccs;
867
  } else {
868
      return NULL;
869
870
871
  }
}

872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
/* -----------------------------------------------------------------------------
   Generate the XML time/allocation profile
   -------------------------------------------------------------------------- */

void
gen_XML_logfile( void )
{
  fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);

  reportCCS_XML(pruneCCSTree(CCS_MAIN));

  fprintf(prof_file, " 0\n");

  fclose(prof_file);
}

static void 
reportCCS_XML(CostCentreStack *ccs)
{
  CostCentre *cc;
  IndexTable *i;

894
895
  if (ccs_to_ignore(ccs)) { return; }

896
897
  cc = ccs->cc;
  
898
  fprintf(prof_file, " 1 %d %llu %lu %llu", 
899
900
901
	  ccs->ccsID, ccs->scc_count, ccs->time_ticks, ccs->mem_alloc);

  for (i = ccs->indexTable; i != 0; i = i->next) {
902
903
904
    if (!i->back_edge) {
      reportCCS_XML(i->ccs);
    }
905
906
907
  }
}

908
void
909
fprintCCS( FILE *f, CostCentreStack *ccs )
910
{
911
912
913
914
915
916
  fprintf(f,"<");
  for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
      fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
      if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
	  fprintf(f,",");
      }
917
  }
918
  fprintf(f,">");
919
920
}

921
#endif /* PROFILING */