improvements to +RTS -s output

- count and report number of parallel collections
- calculate bytes scanned in addition to bytes copied per thread
- calculate "work balance factor"
- tidy up the formatting a bit
parent 96195367
......@@ -113,6 +113,7 @@ typedef struct generation_ {
// stats information
unsigned int collections;
unsigned int par_collections;
unsigned int failed_promotions;
// temporary use during GC:
......
......@@ -45,6 +45,9 @@ static Ticks ExitElapsedTime = 0;
static ullong GC_tot_alloc = 0;
static ullong GC_tot_copied = 0;
static ullong GC_par_max_copied = 0;
static ullong GC_par_avg_copied = 0;
static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */
static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */
......@@ -294,7 +297,8 @@ stat_startGC(void)
-------------------------------------------------------------------------- */
void
stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen)
stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen,
lnat max_copied, lnat avg_copied)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
Ticks time, etime, gc_time, gc_etime;
......@@ -327,6 +331,8 @@ stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen)
GC_tot_copied += (ullong) copied;
GC_tot_alloc += (ullong) alloc;
GC_par_max_copied += (ullong) max_copied;
GC_par_avg_copied += (ullong) avg_copied;
GC_tot_time += gc_time;
GCe_tot_time += gc_etime;
......@@ -527,30 +533,41 @@ stat_exit(int alloc)
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
ullong_format_string(GC_tot_alloc*sizeof(W_),
temp, rtsTrue/*commas*/);
statsPrintf("%11s bytes allocated in the heap\n", temp);
statsPrintf("%16s bytes allocated in the heap\n", temp);
ullong_format_string(GC_tot_copied*sizeof(W_),
temp, rtsTrue/*commas*/);
statsPrintf("%11s bytes copied during GC\n", temp);
statsPrintf("%16s bytes copied during GC\n", temp);
if ( ResidencySamples > 0 ) {
ullong_format_string(MaxResidency*sizeof(W_),
temp, rtsTrue/*commas*/);
statsPrintf("%11s bytes maximum residency (%ld sample(s))\n",
statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
temp, ResidencySamples);
}
statsPrintf("\n");
statsPrintf("%16ld MB total memory in use\n\n",
mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
/* Print garbage collections in each gen */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
statsPrintf("%11d collections in generation %d, %6.2fs, %6.2fs elapsed\n",
generations[g].collections, g,
statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n",
g, generations[g].collections,
generations[g].par_collections,
TICK_TO_DBL(GC_coll_times[g]),
TICK_TO_DBL(GC_coll_etimes[g]));
}
statsPrintf("\n%11ld MB total memory in use\n\n",
mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.gcThreads > 1) {
statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
(double)GC_par_avg_copied / (double)GC_par_max_copied,
(lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
RtsFlags.ParFlags.gcThreads
);
}
#endif
statsPrintf("\n");
#if defined(THREADED_RTS)
{
......
......@@ -16,7 +16,8 @@ void stat_endInit(void);
void stat_startGC(void);
void stat_endGC (lnat alloc, lnat live,
lnat copied, lnat gen);
lnat copied, lnat gen,
lnat max_copied, lnat avg_copied);
#ifdef PROFILING
void stat_startRP(void);
......
......@@ -181,7 +181,7 @@ GarbageCollect ( rtsBool force_major_gc )
{
bdescr *bd;
step *stp;
lnat live, allocated;
lnat live, allocated, max_copied, avg_copied;
lnat oldgen_saved_blocks = 0;
gc_thread *saved_gct;
nat g, s, t, n;
......@@ -471,24 +471,35 @@ GarbageCollect ( rtsBool force_major_gc )
/* run through all the generations/steps and tidy up
*/
copied = 0;
max_copied = 0;
avg_copied = 0;
{
nat i;
for (i=0; i < n_gc_threads; i++) {
if (n_gc_threads > 1) {
trace(TRACE_gc,"thread %d:", i);
trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work);
trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work);
trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
}
copied += gc_threads[i]->copied;
max_copied = stg_max(gc_threads[i]->copied, max_copied);
}
if (n_gc_threads == 1) {
max_copied = 0;
avg_copied = 0;
} else {
avg_copied = copied;
}
}
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
if (g <= N) {
if (g == N) {
generations[g].collections++; // for stats
if (n_gc_threads > 1) generations[g].par_collections++;
}
// Count the mutable list as bytes "copied" for the purposes of
......@@ -698,7 +709,7 @@ GarbageCollect ( rtsBool force_major_gc )
#endif
// ok, GC over: tell the stats department what happened.
stat_endGC(allocated, live, copied, N);
stat_endGC(allocated, live, copied, N, max_copied, avg_copied);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
......@@ -1399,10 +1410,6 @@ init_uncollected_gen (nat g, nat threads)
// from the current end point.
ws->scan_bd = ws->todo_bd;
ws->scan_bd->u.scan = ws->scan_bd->free;
// subtract the contents of this block from the stats,
// because we'll count the whole block later.
copied -= ws->scan_bd->free - ws->scan_bd->start;
}
else
{
......@@ -1440,10 +1447,10 @@ init_gc_thread (gc_thread *t)
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
t->copied = 0;
t->scanned = 0;
t->any_work = 0;
t->no_work = 0;
t->scav_find_work = 0;
}
/* -----------------------------------------------------------------------------
......
......@@ -126,7 +126,7 @@ typedef struct gc_thread_ {
StgClosure* static_objects; // live static objects
StgClosure* scavenged_static_objects; // static objects scavenged so far
lnat gc_count; // number of gc's this thread has done
lnat gc_count; // number of GCs this thread has done
// --------------------
// evacuate flags
......@@ -157,6 +157,7 @@ typedef struct gc_thread_ {
// stats
lnat copied;
lnat scanned;
lnat any_work;
lnat no_work;
lnat scav_find_work;
......
......@@ -115,6 +115,7 @@ todo_block_full (nat size, step_workspace *ws)
ASSERT(bd->link == NULL);
ASSERT(bd->step == ws->step);
gct->copied += ws->todo_free - bd->free;
bd->free = ws->todo_free;
// If the global list is not empty, or there's not much work in
......
......@@ -1378,6 +1378,9 @@ scavenge_large (step_workspace *ws)
recordMutableGen_GC((StgClosure *)p, ws->step->gen);
}
}
// stats
gct->scanned += closure_sizeW((StgClosure*)p);
}
}
......@@ -1428,12 +1431,14 @@ loop:
continue;
}
ws = &gct->steps[s];
if (ws->todo_bd != NULL)
{
ws->todo_bd->free = ws->todo_free;
bd = ws->todo_bd;
gct->copied += ws->todo_free - bd->free;
bd->free = ws->todo_free;
}
// If we have a todo block and no scan block, start
// scanning the todo block.
if (ws->scan_bd == NULL && ws->todo_bd != NULL)
......
......@@ -444,15 +444,15 @@ scavenge_block (bdescr *bd)
}
if (p > bd->free) {
gct->copied += ws->todo_free - bd->free;
bd->free = p;
}
debugTrace(DEBUG_gc, " scavenged %ld bytes",
(unsigned long)((bd->free - bd->u.scan) * sizeof(W_)));
// update stats: this is a block that has been copied & scavenged
gct->copied += bd->free - bd->u.scan;
// update stats: this is a block that has been scavenged
gct->scanned += bd->free - bd->u.scan;
bd->u.scan = bd->free;
}
......
......@@ -166,6 +166,7 @@ initStorage( void )
gen->no = g;
gen->mut_list = allocBlock();
gen->collections = 0;
gen->par_collections = 0;
gen->failed_promotions = 0;
gen->max_blocks = 0;
}
......
......@@ -388,6 +388,7 @@ sub process_stats_file {
local($count) = 0;
$GCWork = 0;
$GCs = 0;
while (<STATS>) {
if (! /Gen:\s+0/ && /^\s*\d+\s+\d+\s+(\d+)\s+\d+\.\d+/ ) {
$max_live = $1 if $max_live < $1;
......@@ -407,7 +408,7 @@ sub process_stats_file {
# $MaxResidency = $1; $ResidencySamples = $2;
# }
$GCs = $1 if /^\s*([0-9,]+) collections? in generation 0/;
$GCs += $1 if /^\s*Generation\s*\d+:\s*([0-9,]+) collections/;
if ( /^\s+([0-9]+)\s+M[Bb] total memory/ ) {
$TotMem = $1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment