Commit 8db56c86 authored by simonmar@microsoft.com's avatar simonmar@microsoft.com
Browse files

improvements to PAPI support

- major (multithreaded) GC is measured separately from minor GC
- events to measure can now be specified on the command line, e.g
     prog +RTS -a+PAPI_TOT_CYC
parent 510d1738
......@@ -304,11 +304,13 @@ struct TRACE_FLAGS {
rtsBool timestamp; /* add timestamps to traces */
};
/* Put them together: */
#ifdef USE_PAPI
#define MAX_PAPI_USER_EVENTS 8
struct PAPI_FLAGS {
nat eventType; /* The type of events to count */
nat numUserEvents;
char * userEvents[MAX_PAPI_USER_EVENTS];
};
#define PAPI_FLAG_CACHE_L1 1
......@@ -316,9 +318,12 @@ struct PAPI_FLAGS {
#define PAPI_FLAG_BRANCH 3
#define PAPI_FLAG_STALLS 4
#define PAPI_FLAG_CB_EVENTS 5
#define PAPI_USER_EVENTS 6
#endif
/* Put them together: */
typedef struct _RTS_FLAGS {
/* The first portion of RTS_FLAGS is invariant. */
struct GC_FLAGS GcFlags;
......
......@@ -30,7 +30,7 @@ static Mutex papi_counter_mutex;
struct _papi_events {
int event_code;
char * event_name;
const char * event_name;
};
/* Beware, these counters are Opteron specific
......@@ -75,45 +75,54 @@ int papi_error;
/* Arbitrary, to avoid using malloc */
#define MAX_PAPI_EVENTS 10
int n_papi_events = 0;
static nat n_papi_events = 0;
/* Events counted during GC and Mutator execution */
/* There's a trailing comma, do all C compilers accept that? */
static struct _papi_events papi_events[MAX_PAPI_EVENTS];
long_long MutatorCounters[MAX_PAPI_EVENTS];
long_long GCCounters[MAX_PAPI_EVENTS];
long_long GC0Counters[MAX_PAPI_EVENTS];
long_long GC1Counters[MAX_PAPI_EVENTS];
long_long start_mutator_cycles;
long_long start_gc_cycles;
long_long mutator_cycles;
long_long gc_cycles;
long_long start_gc_cycles;
long_long gc0_cycles;
long_long gc1_cycles;
static long_long papi_counter(long_long values[],int event);
static void papi_add_events(int EventSet);
static nat max_hardware_counters = 2;
/* If you want to add events to count, extend the
* init_countable_events and the papi_report function.
* Be aware that your processor can count a limited number
* of events simultaneously, you can turn on multiplexing
* to increase that number, though.
*/
static void papi_add_event(const char *name, int code)
{
if (n_papi_events >= max_hardware_counters) {
errorBelch("too many PAPI events for this CPU (max: %d)",
max_hardware_counters);
stg_exit(EXIT_FAILURE);
}
papi_events[n_papi_events].event_code = code;
papi_events[n_papi_events].event_name = name;
n_papi_events++;
}
static void
init_countable_events(void)
{
#define PAPI_ADD_EVENT(EVENT) \
{ \
if (n_papi_events >= MAX_PAPI_EVENTS) { \
barf("too many PAPI events"); \
} \
papi_events[n_papi_events].event_code = EVENT; \
papi_events[n_papi_events].event_name = #EVENT; \
n_papi_events++; \
}
max_hardware_counters = PAPI_num_counters();
#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT)
PAPI_ADD_EVENT(PAPI_TOT_INS);
if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
PAPI_ADD_EVENT(FR_BR);
PAPI_ADD_EVENT(FR_BR_MIS);
......@@ -133,8 +142,35 @@ init_countable_events(void)
PAPI_ADD_EVENT(DC_L2_REFILL_MOES);
PAPI_ADD_EVENT(DC_SYS_REFILL_MOES);
PAPI_ADD_EVENT(FR_BR_MIS);
} else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) {
nat i;
char *name;
int code;
for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) {
name = RtsFlags.PapiFlags.userEvents[i];
PAPI_CHECK(PAPI_event_name_to_code(name, &code))
papi_add_event(name, code);
}
} else {
PAPI_ADD_EVENT(PAPI_STL_ICY);
// PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses
// PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads
// PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses
// PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses
// PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses
// PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses
// PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses
// PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses
// PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes
// PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads
// PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes
// PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads
// PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line
// PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses
PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions
PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions
// PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line
// PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource
}
// We might also consider:
......@@ -143,90 +179,58 @@ init_countable_events(void)
};
static char temp[BIG_STRING_LEN];
static void
papi_mut_cycles(void)
{
ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/);
statsPrintf(" (MUT_CYCLES) : %s\n",temp);
}
static void
papi_gc_cycles(void)
papi_report_event(const char *name, ullong value)
{
ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/);
statsPrintf(" (GC_CYCLES) : %s\n",temp);
static char temp[BIG_STRING_LEN];
ullong_format_string(value,temp,rtsTrue/*commas*/);
statsPrintf(" %15s %15s\n", name, temp);
}
/* This function reports counters for GC and mutator */
static void
papi_report(long_long PapiCounters[])
papi_report(long_long counters[])
{
/* Report the value of a counter */
#define PAPI_REPORT(EVENTSET,EVENT) \
{ \
ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \
statsPrintf(" (" #EVENT ") : %s\n",temp); \
}
nat i;
/* Report the value of a counter as a percentage of another counter */
#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \
papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \
papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
/* I need to improve formatting aesthetics */
PAPI_REPORT(PapiCounters,PAPI_TOT_INS);
if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
PAPI_REPORT(PapiCounters,FR_BR);
PAPI_REPORT(PapiCounters,FR_BR_MIS);
PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR);
PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR);
for (i = 0; i < n_papi_events; i++)
{
papi_report_event(papi_events[i].event_name, counters[i]);
}
else if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS);
PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR);
//PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC);
PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS);
//PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC);
if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR);
PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR);
}
else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
PAPI_REPORT(PapiCounters,PAPI_L1_DCA);
PAPI_REPORT(PapiCounters,PAPI_L1_DCM);
PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA);
PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA);
}
else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
PAPI_REPORT(PapiCounters,PAPI_L2_DCA);
PAPI_REPORT(PapiCounters,PAPI_L2_DCM);
PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA);
PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA);
}
else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES);
PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES);
PAPI_REPORT(PapiCounters,FR_BR_MIS);
}
else {
PAPI_REPORT(PapiCounters,PAPI_STL_ICY);
}
}
void
papi_stats_report (void)
{
statsPrintf(" -- CPU Mutator counters --\n");
papi_mut_cycles();
statsPrintf(" Mutator CPU counters\n");
papi_report_event("CYCLES", mutator_cycles);
papi_report(MutatorCounters);
statsPrintf("\n -- CPU GC counters --\n");
papi_gc_cycles();
papi_report(GCCounters);
statsPrintf("\n GC(0) CPU counters\n");
papi_report_event("CYCLES", gc0_cycles);
papi_report(GC0Counters);
statsPrintf("\n GC(1) CPU counters\n");
papi_report_event("CYCLES", gc1_cycles);
papi_report(GC1Counters);
}
void
......@@ -273,7 +277,7 @@ papi_init (void)
static long_long
papi_counter(long_long values[],int event)
{
int i;
nat i;
for(i=0;i<n_papi_events;i++) {
if(papi_events[i].event_code==event) {
return values[i];
......@@ -288,7 +292,7 @@ papi_counter(long_long values[],int event)
static void
papi_add_events(int EventSet)
{
int i;
nat i;
for(i=0;i<n_papi_events;i++) {
if((papi_error=PAPI_add_event(EventSet,
papi_events[i].event_code))
......@@ -334,18 +338,29 @@ papi_start_gc_count(void)
}
void
papi_stop_gc_count(void)
papi_stop_gc0_count(void)
{
ACQUIRE_LOCK(&papi_counter_mutex);
PAPI_CHECK( PAPI_accum(GCEvents,GC0Counters));
PAPI_CHECK( PAPI_stop(GCEvents,NULL));
gc0_cycles += PAPI_cycles() - start_gc_cycles;
RELEASE_LOCK(&papi_counter_mutex);
}
void
papi_stop_gc1_count(void)
{
ACQUIRE_LOCK(&papi_counter_mutex);
PAPI_CHECK( PAPI_accum(GCEvents,GCCounters));
PAPI_CHECK( PAPI_accum(GCEvents,GC1Counters));
PAPI_CHECK( PAPI_stop(GCEvents,NULL));
gc_cycles += PAPI_cycles() - start_gc_cycles;
gc1_cycles += PAPI_cycles() - start_gc_cycles;
RELEASE_LOCK(&papi_counter_mutex);
}
void
papi_thread_start_gc_count(int event_set)
papi_thread_start_gc1_count(int event_set)
{
ACQUIRE_LOCK(&papi_counter_mutex);
PAPI_CHECK( PAPI_start(event_set));
......@@ -353,10 +368,10 @@ papi_thread_start_gc_count(int event_set)
}
void
papi_thread_stop_gc_count(int event_set)
papi_thread_stop_gc1_count(int event_set)
{
ACQUIRE_LOCK(&papi_counter_mutex);
PAPI_CHECK( PAPI_accum(event_set,GCCounters));
PAPI_CHECK( PAPI_accum(event_set,GC1Counters));
PAPI_CHECK( PAPI_stop(event_set,NULL));
RELEASE_LOCK(&papi_counter_mutex);
}
......
......@@ -16,10 +16,12 @@ void papi_init_eventset(int * event_set);
void papi_init(void);
void papi_start_mutator_count(void);
void papi_stop_mutator_count(void);
void papi_start_gc_count(void);
void papi_stop_gc_count(void);
void papi_stop_gc0_count(void);
void papi_stop_gc1_count(void);
// for multithreaded GC, each sub-thread uses these functions to count
// events and aggregate them into the main GC counters.
void papi_thread_start_gc_count(int event_set);
void papi_thread_stop_gc_count(int event_set);
void papi_thread_start_gc1_count(int event_set);
void papi_thread_stop_gc1_count(int event_set);
......@@ -321,6 +321,7 @@ void initRtsFlagsDefaults(void)
#ifdef USE_PAPI
/* By default no special measurements taken */
RtsFlags.PapiFlags.eventType = 0;
RtsFlags.PapiFlags.numUserEvents = 0;
#endif
}
......@@ -721,6 +722,14 @@ error = rtsTrue;
case 'e':
RtsFlags.PapiFlags.eventType = PAPI_FLAG_CB_EVENTS;
break;
case '+':
if (RtsFlags.PapiFlags.numUserEvents >= MAX_PAPI_USER_EVENTS) {
errorBelch("maximum number of PAPI events reached");
stg_exit(EXIT_FAILURE);
}
RtsFlags.PapiFlags.eventType = PAPI_USER_EVENTS;
RtsFlags.PapiFlags.userEvents[RtsFlags.PapiFlags.numUserEvents++] = rts_argv[arg] + 3;
break;
default:
bad_option( rts_argv[arg] );
}
......
......@@ -167,10 +167,6 @@ hs_init(int *argc, char **argv[])
argv++; argc--;
#endif
#ifdef USE_PAPI
papi_init();
#endif
/* Set the RTS flags to default values. */
initRtsFlagsDefaults();
......@@ -185,6 +181,10 @@ hs_init(int *argc, char **argv[])
setProgArgv(*argc,*argv);
}
#ifdef USE_PAPI
papi_init();
#endif
/* initTracing must be after setupRtsFlags() */
initTracing();
......
......@@ -347,7 +347,11 @@ stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen)
#if USE_PAPI
if(papi_is_reporting) {
/* Switch to counting mutator events */
papi_stop_gc_count();
if (gen == 0) {
papi_stop_gc0_count();
} else {
papi_stop_gc1_count();
}
papi_start_mutator_count();
}
#endif
......
......@@ -1024,14 +1024,14 @@ gc_thread_mainloop (void)
if (gct->papi_events == -1) {
papi_init_eventset(&gct->papi_events);
}
papi_thread_start_gc_count(gct->papi_events);
papi_thread_start_gc1_count(gct->papi_events);
#endif
gc_thread_work();
#ifdef USE_PAPI
// count events in this thread towards the GC totals
papi_thread_stop_gc_count(gct->papi_events);
papi_thread_stop_gc1_count(gct->papi_events);
#endif
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment