Papi.c 11.8 KB
Newer Older
1 2
/* -----------------------------------------------------------------------------
 * (c) The GHC Team 2006
3
 *
4 5 6 7 8 9 10 11 12 13 14
 * Initialization and use of the PAPI performance monitoring library
 *
 *
 * For adding events or add your processor counters modify
 *
 *   init_countable_events
 *   papi_report
 *
 * ---------------------------------------------------------------------------*/


15
#ifdef USE_PAPI /* ugly */
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
16

17
#include <papi.h>
18 19 20 21 22
/* The posix symbols get defined in a header included from papi.h.
 * undefind them here to allow redefinition in PosixSource.h */
#undef _POSIX_SOURCE
#undef _POSIX_C_SOURCE
#undef _XOPEN_SOURCE
23

Simon Marlow's avatar
Simon Marlow committed
24
#include "PosixSource.h"
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
25
#include "Rts.h"
Simon Marlow's avatar
Simon Marlow committed
26

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
27 28
#include "RtsUtils.h"
#include "Stats.h"
Simon Marlow's avatar
Simon Marlow committed
29
#include "Papi.h"
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
30

31 32 33 34
// used to protect the aggregated counters
#ifdef THREADED_RTS
static Mutex papi_counter_mutex;
#endif
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
35 36 37

struct _papi_events {
  int event_code;
38
  const char * event_name;
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
39 40
};

41 42 43 44 45 46 47 48 49 50 51
/* Beware, these counters are Opteron specific
 * I obtained the numbers using the papi_avail
 * and papi_native_avail utilities.
 * This is certainly not the official PAPI way
 * of doing things.
 */
#define FR_BR 0x40000040
#define FR_BR_MIS 0x40000041
#define FR_BR_MISCOMPARE 0x40000048
#define DC_ACCESS 0x40000019
#define DC_MISS 0x4000001a
52
#define FR_DISPATCH_STALLS 0x40000054
53
#define FR_DISPATCH_STALLS_BR 0x40000055
54 55
#define FR_DISPATCH_STALLS_FULL_REORDER 0x40000058
#define FR_DISPATCH_STALLS_FULL_RESERVATION 0x40000059
56
#define FR_DISPATCH_STALLS_FULL_LS 0x4000005b
57 58
#define DC_L2_REFILL_MOES 0x40001e1b
#define DC_SYS_REFILL_MOES 0x40001e1c
59

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
60 61 62
/* This is bad, it should be in a header */
#define BIG_STRING_LEN 512

63

Austin Seipp's avatar
Austin Seipp committed
64 65 66 67
#define PAPI_CHECK(CALL)                                                \
  if((papi_error=(CALL)) != PAPI_OK) {                                  \
    debugBelch("PAPI function failed in module %s at line %d "          \
               "with error code %d\n",                                  \
68
              __FILE__,__LINE__,papi_error);                            \
69 70
  }

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
71 72 73 74 75 76 77 78 79 80
/* While PAPI reporting is going on this flag is on */
int papi_is_reporting;

/* Event sets and counter arrays for GC and mutator */

int MutatorEvents = PAPI_NULL;
int GCEvents = PAPI_NULL;

int papi_error;

81 82
/* Arbitrary, to avoid using malloc */
#define MAX_PAPI_EVENTS 10
83
static char papiNativeEventNames[MAX_PAPI_EVENTS][PAPI_MAX_STR_LEN];
84

85
static nat n_papi_events = 0;
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
86 87 88 89


/* Events counted during GC and Mutator execution */
/* There's a trailing comma, do all C compilers accept that? */
90
static struct _papi_events papi_events[MAX_PAPI_EVENTS];
91
long_long MutatorCounters[MAX_PAPI_EVENTS];
92 93
long_long GC0Counters[MAX_PAPI_EVENTS];
long_long GC1Counters[MAX_PAPI_EVENTS];
94

95
long_long start_mutator_cycles;
96
long_long mutator_cycles = 0;
97
long_long start_gc_cycles;
98 99
long_long gc0_cycles = 0;
long_long gc1_cycles = 0;
100 101 102



103 104 105
static long_long papi_counter(long_long values[],int event);
static void papi_add_events(int EventSet);

106 107
static nat max_hardware_counters = 2;

108 109 110 111 112 113
/* If you want to add events to count, extend the
 * init_countable_events and the papi_report function.
 * Be aware that your processor can count a limited number
 * of events simultaneously, you can turn on multiplexing
 * to increase that number, though.
 */
114 115 116
static void papi_add_event(const char *name, int code)
{
    if (n_papi_events >= max_hardware_counters) {
117
        errorBelch("too many PAPI events for this CPU (max: %d)",
118 119 120 121 122 123
                   max_hardware_counters);
        stg_exit(EXIT_FAILURE);
    }
    papi_events[n_papi_events].event_code = code;
    papi_events[n_papi_events].event_name = name;
    n_papi_events++;
124
}
125

126
static void
127
init_countable_events(void)
128
{
129 130 131
    max_hardware_counters = PAPI_num_counters();

#define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT)
132 133

    if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
134 135
        PAPI_ADD_EVENT(FR_BR);
        PAPI_ADD_EVENT(FR_BR_MIS);
Austin Seipp's avatar
Austin Seipp committed
136 137
        // Docs are wrong? Opteron does not count indirect branch
        // misses exclusively
138
        PAPI_ADD_EVENT(FR_BR_MISCOMPARE);
139
    } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
140 141 142
        PAPI_ADD_EVENT(FR_DISPATCH_STALLS);
        PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR);
        PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS);
143
    } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
144 145
        PAPI_ADD_EVENT(PAPI_L1_DCA);
        PAPI_ADD_EVENT(PAPI_L1_DCM);
146
    } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
147 148
        PAPI_ADD_EVENT(PAPI_L2_DCA);
        PAPI_ADD_EVENT(PAPI_L2_DCM);
149
    } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
150 151 152
        PAPI_ADD_EVENT(DC_L2_REFILL_MOES);
        PAPI_ADD_EVENT(DC_SYS_REFILL_MOES);
        PAPI_ADD_EVENT(FR_BR_MIS);
153 154 155
    } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) {
        nat i;
        char *name;
156
        char *asciiEventCode;
157 158
        int code;
        for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) {
159
          if(RtsFlags.PapiFlags.userEventsKind[i] == PAPI_PRESET_EVENT_KIND) {
160 161
            name = RtsFlags.PapiFlags.userEvents[i];
            PAPI_CHECK(PAPI_event_name_to_code(name, &code))
162 163 164 165 166 167 168 169
          }
          else { // PAPI_NATIVE_EVENT_KIND
            asciiEventCode = RtsFlags.PapiFlags.userEvents[i];
            name = papiNativeEventNames[i];
            code = strtol(asciiEventCode, NULL, 16 /* hex number expected */);
            PAPI_CHECK(PAPI_event_code_to_name(code, name))
          }
          papi_add_event(name, code);
170
        }
171
    } else {
172 173 174 175 176 177 178 179 180 181 182 183 184 185
        // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses
        // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads
        // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses
        // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses
        // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses
        // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses
        // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses
        // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses
        // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes
        // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads
        // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes
        // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads
        // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line
        // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses
186 187
        PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions
        PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions
188 189 190
        // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line
        // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource

191
    }
192 193 194 195

    // We might also consider:
    //  PAPI_BR_MSP     Conditional branch instructions mispredicted
    //  PAPI_RES_STL    Cycles stalled on any resource
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
196 197
};

198

Simon Marlow's avatar
Simon Marlow committed
199
static void
Ian Lynagh's avatar
Ian Lynagh committed
200
papi_report_event(const char *name, StgWord64 value)
201
{
202
    static char temp[BIG_STRING_LEN];
203
    showStgWord64(value,temp,rtsTrue/*commas*/);
204
    statsPrintf("  %15s  %15s\n", name, temp);
205 206
}

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
207
/* This function reports counters for GC and mutator */
208
static void
209
papi_report(long_long counters[])
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
210
{
211
    nat i;
212 213 214

/* Report the value of a counter as a percentage of another counter */
#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
215 216
    statsPrintf("   " #EVENT " %% of " #EVENTTOT " : %.1f%%\n",      \
         papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
217

218 219 220
    for (i = 0; i < n_papi_events; i++)
    {
        papi_report_event(papi_events[i].event_name, counters[i]);
221 222
    }

223
    if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
224 225
        PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR);
        PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR);
226 227
    }

228
    else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
229
        PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA);
230 231
    }

232
    else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
233
        PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA);
234
    }
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
235 236
}

237 238 239
void
papi_stats_report (void)
{
240 241
    statsPrintf("  Mutator CPU counters\n");
    papi_report_event("CYCLES", mutator_cycles);
242
    papi_report(MutatorCounters);
243

244 245 246 247 248 249 250
    statsPrintf("\n  GC(0) CPU counters\n");
    papi_report_event("CYCLES", gc0_cycles);
    papi_report(GC0Counters);

    statsPrintf("\n  GC(1) CPU counters\n");
    papi_report_event("CYCLES", gc1_cycles);
    papi_report(GC1Counters);
251
}
252

253 254 255 256 257 258 259
void
papi_init_eventset (int *event_set)
{
    PAPI_register_thread();
    PAPI_CHECK( PAPI_create_eventset(event_set));
    papi_add_events(*event_set);
}
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
260 261

void
262
papi_init (void)
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
263
{
264 265 266 267 268 269 270 271 272 273 274
    /* Initialise the performance tracking library */
    int ver;
    if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) {
        if (ver > 0) {
            errorBelch("PAPI_library_init: wrong version: %x", ver);
            stg_exit(EXIT_FAILURE);
        } else {
            sysErrorBelch("PAPI_library_init");
            stg_exit(EXIT_FAILURE);
        }
    }
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
275

276 277 278 279 280 281
#ifdef THREADED_RTS
    {
        int err;
        if ((err = PAPI_thread_init(osThreadId)) < 0) {
            barf("PAPI_thread_init: %d",err);
        }
282

283 284 285
        initMutex(&papi_counter_mutex);
    }
#endif
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
286

287
    init_countable_events();
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
288

289 290
    papi_init_eventset(&MutatorEvents);
    papi_init_eventset(&GCEvents);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
291 292
}

293
/* Extract the value corresponding to an event */
294
static long_long
295 296
papi_counter(long_long values[],int event)
{
297
  nat i;
298
  for(i=0;i<n_papi_events;i++) {
299 300 301 302 303 304 305 306 307 308
    if(papi_events[i].event_code==event) {
      return values[i];
    }
  }
  /* Passed a wrong event? */
  debugBelch("Event %d is not part of event set\n",event);
  return 0;
}

/* Add the events of papi_events into an event set */
309
static void
310 311
papi_add_events(int EventSet)
{
312
  nat i;
313
  for(i=0;i<n_papi_events;i++) {
314
    if((papi_error=PAPI_add_event(EventSet,
315
                                  papi_events[i].event_code))
316 317
       != PAPI_OK)
      debugBelch("Failed adding %s to event set with error code %d\n",
318
                 papi_events[i].event_name,papi_error);
319 320 321
  }
}

322 323 324 325 326 327 328
/* We should be using elapsed cycles
 * to be consistent with time metric chosen in Stats.c (Elapsed time).
 * This is an approximation to the cycles that the program spends.
 * Note that the counters, in contrast, are virtual and user space.
 */
#define PAPI_cycles PAPI_get_virt_cyc

mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
329 330 331
void
papi_start_mutator_count(void)
{
332
    ACQUIRE_LOCK(&papi_counter_mutex);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
333
    PAPI_CHECK( PAPI_start(MutatorEvents));
334
    start_mutator_cycles = PAPI_cycles();
335
    RELEASE_LOCK(&papi_counter_mutex);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
336 337 338 339 340
}

void
papi_stop_mutator_count(void)
{
341
    ACQUIRE_LOCK(&papi_counter_mutex);
342
    mutator_cycles += PAPI_cycles() - start_mutator_cycles;
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
343 344
    PAPI_CHECK( PAPI_accum(MutatorEvents,MutatorCounters));
    PAPI_CHECK( PAPI_stop(MutatorEvents,NULL));
345
    RELEASE_LOCK(&papi_counter_mutex);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
346 347 348 349 350
}

void
papi_start_gc_count(void)
{
351 352 353 354
    ACQUIRE_LOCK(&papi_counter_mutex);
    PAPI_CHECK( PAPI_start(GCEvents));
    start_gc_cycles = PAPI_cycles();
    RELEASE_LOCK(&papi_counter_mutex);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
355 356 357
}

void
358 359 360 361 362 363 364 365 366 367 368 369
papi_stop_gc0_count(void)
{
    ACQUIRE_LOCK(&papi_counter_mutex);
    PAPI_CHECK( PAPI_accum(GCEvents,GC0Counters));
    PAPI_CHECK( PAPI_stop(GCEvents,NULL));
    gc0_cycles += PAPI_cycles() - start_gc_cycles;
    RELEASE_LOCK(&papi_counter_mutex);
}


void
papi_stop_gc1_count(void)
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
370
{
371
    ACQUIRE_LOCK(&papi_counter_mutex);
372
    PAPI_CHECK( PAPI_accum(GCEvents,GC1Counters));
373
    PAPI_CHECK( PAPI_stop(GCEvents,NULL));
374
    gc1_cycles += PAPI_cycles() - start_gc_cycles;
375
    RELEASE_LOCK(&papi_counter_mutex);
mrchebas@gmail.com's avatar
mrchebas@gmail.com committed
376
}
377 378


379
void
380
papi_thread_start_gc1_count(int event_set)
381 382 383 384 385 386 387
{
    ACQUIRE_LOCK(&papi_counter_mutex);
    PAPI_CHECK( PAPI_start(event_set));
    RELEASE_LOCK(&papi_counter_mutex);
}

void
388
papi_thread_stop_gc1_count(int event_set)
389 390
{
    ACQUIRE_LOCK(&papi_counter_mutex);
391
    PAPI_CHECK( PAPI_accum(event_set,GC1Counters));
392 393 394 395
    PAPI_CHECK( PAPI_stop(event_set,NULL));
    RELEASE_LOCK(&papi_counter_mutex);
}

396
#endif /* USE_PAPI */
397 398 399 400 401 402 403 404

// Local Variables:
// mode: C
// fill-column: 80
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: