Commit fe07f054 authored by mrchebas@gmail.com's avatar mrchebas@gmail.com

Addition of PAPI to RTS

This patch still requires the addition of the USE_PAPI
define to compile with PAPI. Also, programs must be
compiled and linked with the appropriate library flags
for papi.
parent 74a87d70
#include "Papi.h"
#include "Rts.h"
#include "RtsUtils.h"
#include "Stats.h"
/* These constants specify which events to keep track of.
* Probably it is better to count one set of events at a time.
* The reason is that processors have limited counters and
* multiplexing is not enabled (yet).
*/
#define PAPI_COUNT_BRANCHES 0
/* The one below is Opteron specific.
*/
#define PAPI_COUNT_STALLS 0
#define PAPI_COUNT_DCACHE1_MISSES 1
#define PAPI_COUNT_DCACHE2_MISSES 0
struct _papi_events {
int event_code;
char * event_name;
};
#define PAPI_ADD_EVENT(EVENT) { EVENT, #EVENT }
/* Beware, these counters are Opteron specific */
#define FR_BR 0x40000040
#define FR_BR_MIS 0x40000041
#define FR_BR_MISCOMPARE 0x40000048
#define DC_ACCESS 0x40000019
#define DC_MISS 0x4000001a
#define FR_DISPATCH_STALLS_BR 0x40000055
#define FR_DISPATCH_STALLS_FULL_LS 0x4000005b
/* Report the value of a counter */
#define PAPI_REPORT(EVENTSET,EVENT) \
{ \
ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \
statsPrintf(" (" #EVENT ") : %s\n",temp); \
}
/* Report the value of a counter as a percentage of another counter */
#define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \
papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
/* Number of counted events, computed from size of papi_events */
#define N_PAPI_EVENTS ((int)(sizeof(papi_events)/sizeof(struct _papi_events)))
/* This is bad, it should be in a header */
#define BIG_STRING_LEN 512
/* While PAPI reporting is going on this flag is on */
int papi_is_reporting;
/* Event sets and counter arrays for GC and mutator */
int MutatorEvents = PAPI_NULL;
int GCEvents = PAPI_NULL;
int papi_error;
/* If you want to add events to count, extend the
* papi_events array and the papi_report function.
*/
/* Events counted during GC and Mutator execution */
/* There's a trailing comma, do all C compilers accept that? */
static struct _papi_events papi_events[] = {
PAPI_ADD_EVENT(PAPI_TOT_CYC),
#if PAPI_COUNT_BRANCHES
PAPI_ADD_EVENT(FR_BR),
PAPI_ADD_EVENT(FR_BR_MIS),
/* Docs are wrong? Opteron does not count indirect branch misses apparently */
PAPI_ADD_EVENT(FR_BR_MISCOMPARE),
#endif
#if PAPI_COUNT_STALLS
PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR),
PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS),
#endif
#if PAPI_COUNT_DCACHE1_MISSES
PAPI_ADD_EVENT(PAPI_L1_DCA),
PAPI_ADD_EVENT(PAPI_L1_DCM),
#endif
#if PAPI_COUNT_DCACHE2_MISSES
PAPI_ADD_EVENT(PAPI_L2_DCA),
PAPI_ADD_EVENT(PAPI_L2_DCM),
#endif
};
long_long MutatorCounters[N_PAPI_EVENTS];
long_long GCCounters[N_PAPI_EVENTS];
/* Extract the value corresponding to an event */
long_long
papi_counter(long_long values[],int event)
{
int i;
for(i=0;i<N_PAPI_EVENTS;i++) {
if(papi_events[i].event_code==event) {
return values[i];
}
}
/* Passed a wrong event? */
debugBelch("Event %d is not part of event set\n",event);
return 0;
}
/* This function reports counters for GC and mutator */
void
papi_report(long_long PapiCounters[])
{
char temp[BIG_STRING_LEN];
/* I need to improve formatting aesthetics */
PAPI_REPORT(PapiCounters,PAPI_TOT_CYC);
#if PAPI_COUNT_BRANCHES
PAPI_REPORT(PapiCounters,FR_BR);
PAPI_REPORT(PapiCounters,FR_BR_MIS);
PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR);
PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR);
#endif
#if PAPI_COUNT_STALLS
PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR);
PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC);
PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS);
PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC);
#endif
#if PAPI_COUNT_DCACHE1_MISSES
PAPI_REPORT(PapiCounters,PAPI_L1_DCA);
PAPI_REPORT(PapiCounters,PAPI_L1_DCM);
PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA);
#endif
#if PAPI_COUNT_DCACHE2_MISSES
PAPI_REPORT(PapiCounters,PAPI_L2_DCA);
PAPI_REPORT(PapiCounters,PAPI_L2_DCM);
PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA);
#endif
}
/* Add the events of papi_events into an event set */
void
papi_add_events(int EventSet)
{
int i;
for(i=0;i<N_PAPI_EVENTS;i++) {
if((papi_error=PAPI_add_event(EventSet,
papi_events[i].event_code))
!= PAPI_OK)
debugBelch("Failed adding %s to event set with error code %d\n",
papi_events[i].event_name,papi_error);
}
}
void
papi_init_eventsets(void)
{
/* One event set for the mutator and another for the GC */
PAPI_CHECK( PAPI_create_eventset(&MutatorEvents));
PAPI_CHECK( PAPI_create_eventset(&GCEvents));
/* Both sets contain the same events */
papi_add_events(MutatorEvents);
papi_add_events(GCEvents);
}
void
papi_start_mutator_count(void)
{
PAPI_CHECK( PAPI_start(MutatorEvents));
}
void
papi_stop_mutator_count(void)
{
PAPI_CHECK( PAPI_accum(MutatorEvents,MutatorCounters));
PAPI_CHECK( PAPI_stop(MutatorEvents,NULL));
}
void
papi_start_gc_count(void)
{
PAPI_CHECK( PAPI_start(GCEvents));
}
void
papi_stop_gc_count(void)
{
PAPI_CHECK( PAPI_accum(GCEvents,GCCounters));
PAPI_CHECK( PAPI_stop(GCEvents,NULL));
}
#include <papi.h>
#define PAPI_CHECK(CALL) \
if((papi_error=(CALL)) != PAPI_OK) { \
debugBelch("PAPI function failed in module %s at line %d with error code %d\n", \
__FILE__,__LINE__,papi_error); \
}
/* Check the error value of a PAPI call, reporting an error, if needed */
extern int papi_error;
/* While PAPI reporting is going on this flag is on */
extern int papi_is_reporting;
/* Event sets and counter arrays for GC and mutator */
extern int MutatorEvents;
extern int GCEvents;
extern long_long MutatorCounters[];
extern long_long GCCounters[];
long_long papi_counter(long_long values[],int event);
void papi_report(long_long PapiCounters[]);
void papi_add_events(int EventSet);
void papi_init_eventsets(void);
void papi_start_mutator_count(void);
void papi_stop_mutator_count(void);
void papi_start_gc_count(void);
void papi_stop_gc_count(void);
......@@ -6,7 +6,9 @@
*
* ---------------------------------------------------------------------------*/
#include "PosixSource.h"
// PAPI uses caddr_t, which is not POSIX
// #include "PosixSource.h"
#include "Rts.h"
#include "RtsAPI.h"
#include "RtsUtils.h"
......@@ -67,6 +69,10 @@
#include <signal.h>
#endif
#if USE_PAPI
#include "Papi.h"
#endif
// Count of how many outstanding hs_init()s there have been.
static int hs_init_count = 0;
......@@ -152,7 +158,23 @@ hs_init(int *argc, char **argv[])
argv++; argc--;
#endif
/* Initialise the performance tracking library */
#ifdef USE_PAPI
/* Must fix to abort gracefully */
if(PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT)
exit(1);
#ifdef THREADED_RTS
{
int err;
if ((err = PAPI_thread_init(osThreadId)) < 0) {
barf("PAPI_thread_init: %d",err);
}
}
#endif
#endif
/* Set the RTS flags to default values. */
initRtsFlagsDefaults();
/* Call the user hook to reset defaults, if present */
......
......@@ -17,6 +17,10 @@
#include "Profiling.h"
#include "GetTime.h"
#if USE_PAPI
#include "Papi.h"
#endif
/* huh? */
#define BIG_STRING_LEN 512
......@@ -64,9 +68,6 @@ static lnat GC_start_faults = 0, GC_end_faults = 0;
static Ticks *GC_coll_times;
static void statsPrintf( char *s, ... )
GNUC3_ATTRIBUTE(format (printf, 1, 2));
static void statsFlush( void );
static void statsClose( void );
......@@ -170,6 +171,18 @@ stat_endInit(void)
} else {
InitElapsedTime = elapsed - ElapsedTimeStart;
}
#if USE_PAPI
papi_init_eventsets();
/* We start counting events for the mutator
* when garbage collection starts
* we switch to the GC event set. */
papi_start_mutator_count();
/* This flag is needed to avoid counting the last GC */
papi_is_reporting = 1;
#endif
}
/* -----------------------------------------------------------------------------
......@@ -192,6 +205,16 @@ stat_startExit(void)
MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
if (MutUserTime < 0) { MutUserTime = 0; }
#if USE_PAPI
/* We stop counting mutator events
* GC events are not being counted at this point */
papi_stop_mutator_count();
/* This flag is needed, because GC is run once more after this function */
papi_is_reporting = 0;
#endif
}
void
......@@ -249,6 +272,15 @@ stat_startGC(void)
GC_start_faults = getPageFaults();
}
}
#if USE_PAPI
if(papi_is_reporting) {
/* Switch to counting GC events */
papi_stop_mutator_count();
papi_start_gc_count();
}
#endif
}
/* -----------------------------------------------------------------------------
......@@ -316,6 +348,14 @@ stat_endGC (lnat alloc, lnat live, lnat copied,
debugBelch("\b\b\b \b\b\b");
rub_bell = 0;
}
#if USE_PAPI
if(papi_is_reporting) {
/* Switch to counting mutator events */
papi_stop_gc_count();
papi_start_mutator_count();
}
#endif
}
/* -----------------------------------------------------------------------------
......@@ -517,6 +557,16 @@ stat_exit(int alloc)
TICK_TO_DBL(time - GC_tot_time -
PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100
/ TICK_TO_DBL(etime));
#if USE_PAPI
/* PAPI reporting, should put somewhere else?
* Note that the cycles are counted _after_ the initialization of the RTS -- AR */
statsPrintf(" -- CPU Mutator counters --\n");
papi_report(MutatorCounters);
statsPrintf("\n -- CPU GC counters --\n");
papi_report(GCCounters);
#endif
}
if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
......@@ -606,7 +656,7 @@ extern HsInt64 getAllocations( void )
Dumping stuff in the stats file, or via the debug message interface
-------------------------------------------------------------------------- */
static void
void
statsPrintf( char *s, ... )
{
FILE *sf = RtsFlags.GcFlags.statsFile;
......
......@@ -54,4 +54,9 @@ HsInt64 getAllocations( void );
Ticks stat_getElapsedGCTime(void);
Ticks stat_getElapsedTime(void);
/* Only exported for Papi.c */
void statsPrintf( char *s, ... )
GNUC3_ATTRIBUTE(format (printf, 1, 2));
#endif /* STATS_H */
......@@ -53,6 +53,9 @@ extra-libraries: "m" /* for ldexp() */
,"mingwex"
# endif
#endif
#if USE_PAPI
, "papi"
#endif
#ifdef INSTALLING
include-dirs: INCLUDE_DIR
......
......@@ -32,6 +32,10 @@
# include <sys/times.h>
#endif
#ifdef USE_PAPI
# include <papi.h>
#endif
#if ! ((defined(HAVE_GETRUSAGE) && !irix_HOST_OS) || defined(HAVE_TIMES))
#error No implementation for getProcessCPUTime() available.
#endif
......@@ -68,9 +72,17 @@ void getProcessTimes(Ticks *user, Ticks *elapsed)
Ticks getProcessCPUTime(void)
{
#if !defined(THREADED_RTS) && USE_PAPI
long long usec;
if ((usec = PAPI_get_virt_usec()) < 0) {
barf("PAPI_get_virt_usec: %lld", usec);
}
return ((usec * TICKS_PER_SECOND) / 1000000);
#else
Ticks user, elapsed;
getProcessTimes(&user,&elapsed);
return user;
#endif
}
Ticks getProcessElapsedTime(void)
......@@ -115,7 +127,14 @@ void getProcessTimes(Ticks *user, Ticks *elapsed)
Ticks getThreadCPUTime(void)
{
#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
#if USE_PAPI
long long usec;
if ((usec = PAPI_get_virt_usec()) < 0) {
barf("PAPI_get_virt_usec: %lld", usec);
}
return ((usec * TICKS_PER_SECOND) / 1000000);
#elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
// clock_gettime() gives us per-thread CPU time. It isn't
// reliable on Linux, but it's the best we have.
struct timespec ts;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment