Commit cd930da1 authored by Duncan Coutts's avatar Duncan Coutts

Change the presentation of parallel GC work balance in +RTS -s

Also rename internal variables to make the names match what they hold.
The parallel GC work balance is calculated using the total amount of
memory copied by all GC threads, and the maximum copied by any
individual thread. You have serial GC when the max is the same as
copied, and perfectly balanced GC when total/max == n_caps.

Previously we presented this as the ratio total/max and told users
that the serial value was 1 and the ideal value N, for N caps, e.g.

  Parallel GC work balance: 1.05 (4045071 / 3846774, ideal 2)

The downside of this is that the user always has to keep in mind the
number of cores being used. Our new presentation uses a normalised
scale 0--1 as a percentage. The 0% means completely serial and 100%
is perfect balance, e.g.

  Parallel GC work balance: 4.56% (serial 0%, perfect 100%)
parent 8536f09c
......@@ -192,7 +192,7 @@ typedef struct _GCStats {
StgWord64 current_bytes_slop;
StgWord64 max_bytes_slop;
StgWord64 peak_megabytes_allocated;
StgWord64 par_avg_bytes_copied;
StgWord64 par_tot_bytes_copied;
StgWord64 par_max_bytes_copied;
StgDouble mutator_cpu_seconds;
StgDouble mutator_wall_seconds;
......@@ -208,7 +208,7 @@ void getGCStats (GCStats *s);
// StgDouble init_wall_seconds;
typedef struct _ParGCStats {
StgWord64 avg_copied;
StgWord64 tot_copied;
StgWord64 max_copied;
} ParGCStats;
void getParGCStats (ParGCStats *s);
......
......@@ -40,7 +40,7 @@ static StgWord64 GC_tot_alloc = 0;
static StgWord64 GC_tot_copied = 0;
static StgWord64 GC_par_max_copied = 0;
static StgWord64 GC_par_avg_copied = 0;
static StgWord64 GC_par_tot_copied = 0;
#ifdef PROFILING
static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
......@@ -140,7 +140,7 @@ initStats0(void)
GC_tot_alloc = 0;
GC_tot_copied = 0;
GC_par_max_copied = 0;
GC_par_avg_copied = 0;
GC_par_tot_copied = 0;
GC_tot_cpu = 0;
#ifdef PROFILING
......@@ -332,7 +332,7 @@ stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
void
stat_endGC (gc_thread *gct,
lnat alloc, lnat live, lnat copied, nat gen,
lnat max_copied, lnat avg_copied, lnat slop)
lnat par_max_copied, lnat par_tot_copied, lnat slop)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
RtsFlags.ProfFlags.doHeapProfile)
......@@ -372,8 +372,8 @@ stat_endGC (gc_thread *gct,
GC_tot_copied += (StgWord64) copied;
GC_tot_alloc += (StgWord64) alloc;
GC_par_max_copied += (StgWord64) max_copied;
GC_par_avg_copied += (StgWord64) avg_copied;
GC_par_max_copied += (StgWord64) par_max_copied;
GC_par_tot_copied += (StgWord64) par_tot_copied;
GC_tot_cpu += gc_cpu;
/* For the moment we calculate both per-HEC and total allocation.
......@@ -642,11 +642,10 @@ stat_exit(int alloc)
}
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.parGcEnabled) {
statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
(double)GC_par_avg_copied / (double)GC_par_max_copied,
(lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
n_capabilities
if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
/ (n_capabilities - 1)
);
}
#endif
......@@ -913,7 +912,7 @@ extern void getGCStats( GCStats *s )
/* EZY: Being consistent with incremental output, but maybe should also discount init */
s->cpu_seconds = TimeToSecondsDbl(current_cpu);
s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_);
s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
}
// extern void getTaskStats( TaskStats **s ) {}
......
......@@ -178,7 +178,7 @@ GarbageCollect (rtsBool force_major_gc,
{
bdescr *bd;
generation *gen;
lnat live_blocks, live_words, allocated, max_copied, avg_copied;
lnat live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
......@@ -443,8 +443,8 @@ GarbageCollect (rtsBool force_major_gc,
}
copied = 0;
max_copied = 0;
avg_copied = 0;
par_max_copied = 0;
par_tot_copied = 0;
{
nat i;
for (i=0; i < n_gc_threads; i++) {
......@@ -457,13 +457,12 @@ GarbageCollect (rtsBool force_major_gc,
debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
}
copied += gc_threads[i]->copied;
max_copied = stg_max(gc_threads[i]->copied, max_copied);
par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
}
par_tot_copied = copied;
if (n_gc_threads == 1) {
max_copied = 0;
avg_copied = 0;
} else {
avg_copied = copied;
par_max_copied = 0;
par_tot_copied = 0;
}
}
......@@ -740,7 +739,7 @@ GarbageCollect (rtsBool force_major_gc,
// ok, GC over: tell the stats department what happened.
stat_endGC(gct, allocated, live_words,
copied, N, max_copied, avg_copied,
copied, N, par_max_copied, par_tot_copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */);
// Guess which generation we'll collect *next* time
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment