Commit 0a778ebe authored by Ben Gamari's avatar Ben Gamari 🐢
Browse files

Revert "rts, base: Refactor stats.c to improve --machine-readable report"

This reverts commit 2d4bda2e.
parent 57001d93
......@@ -192,10 +192,6 @@ typedef struct _RTSStats {
// (we use signed values here because due to inaccuracies in timers
// the values can occasionally go slightly negative)
// Total CPU time used by the init phase
Time init_cpu_ns;
// Total elapsed time used by the init phase
Time init_elapsed_ns;
// Total CPU time used by the mutator
Time mutator_cpu_ns;
// Total elapsed time used by the mutator
......@@ -232,8 +228,8 @@ typedef struct _RTSStats {
// The number of times a GC thread has checked for work across all parallel
// GCs
uint64_t any_work;
// The number of times a GC thread has checked for work and found none
// across all parallel GCs
// The number of times a GC thread has checked for work and found none across
// all parallel GCs
uint64_t no_work;
// The number of times a GC thread has iterated it's outer loop across all
// parallel GCs
......
......@@ -83,12 +83,6 @@ data RTSStats = RTSStats {
-- (we use signed values here because due to inaccuracies in timers
-- the values can occasionally go slightly negative)
-- | Total CPU time used by the init phase
-- @since 4.12.0.0
, init_cpu_ns :: RtsTime
-- | Total elapsed time used by the init phase
-- @since 4.12.0.0
, init_elapsed_ns :: RtsTime
-- | Total CPU time used by the mutator
, mutator_cpu_ns :: RtsTime
-- | Total elapsed time used by the mutator
......@@ -181,8 +175,6 @@ getRTSStats = do
(# peek RTSStats, cumulative_par_max_copied_bytes) p
cumulative_par_balanced_copied_bytes <-
(# peek RTSStats, cumulative_par_balanced_copied_bytes) p
init_cpu_ns <- (# peek RTSStats, init_cpu_ns) p
init_elapsed_ns <- (# peek RTSStats, init_elapsed_ns) p
mutator_cpu_ns <- (# peek RTSStats, mutator_cpu_ns) p
mutator_elapsed_ns <- (# peek RTSStats, mutator_elapsed_ns) p
gc_cpu_ns <- (# peek RTSStats, gc_cpu_ns) p
......
......@@ -24,7 +24,6 @@
#include "ThreadPaused.h"
#include "Messages.h"
#include <string.h> // for memset
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
......@@ -93,9 +92,6 @@ mut_user_time_until( Time t )
return TimeToSecondsDbl(t - stats.gc_cpu_ns);
// heapCensus() time is included in GC_tot_cpu, so we don't need
// to subtract it here.
// TODO: This seems wrong to me. Surely we should be subtracting
// (at least) start_init_cpu?
}
double
......@@ -173,8 +169,6 @@ initStats0(void)
.any_work = 0,
.no_work = 0,
.scav_find_work = 0,
.init_cpu_ns = 0,
.init_elapsed_ns = 0,
.mutator_cpu_ns = 0,
.mutator_elapsed_ns = 0,
.gc_cpu_ns = 0,
......@@ -246,8 +240,6 @@ void
stat_endInit(void)
{
getProcessTimes(&end_init_cpu, &end_init_elapsed);
stats.init_cpu_ns = end_init_cpu - start_init_cpu;
stats.init_elapsed_ns = end_init_elapsed - start_init_elapsed;
}
/* -----------------------------------------------------------------------------
......@@ -427,8 +419,8 @@ stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
stats.gc.copied_bytes,
stats.gc.slop_bytes,
/* current loss due to fragmentation */
(mblocks_allocated * BLOCKS_PER_MBLOCK
- n_alloc_blocks) * BLOCK_SIZE,
(mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
* BLOCK_SIZE,
par_n_threads,
stats.gc.par_max_copied_bytes,
stats.gc.copied_bytes,
......@@ -523,8 +515,7 @@ stat_endRP(
fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
#endif
fprintf(prof_file, "\tAverage number of visits per object = %f\n",
averageNumVisit);
fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
}
#endif /* PROFILING */
......@@ -601,67 +592,90 @@ StgInt TOTAL_CALLS=1;
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
/*
Note [RTS Stats Reporting]
==========================
There are currently three reporting functions:
* report_summary:
Responsible for producing '+RTS -s' output.
Will report internal counters if the RTS flag --internal-counters is
passed. See [Internal Counters Stats]
* report_machine_readable:
Responsible for producing '+RTS -t --machine-readable' output.
* report_one_line:
Responsible for productin '+RTS -t' output
Stats are accumulated into the global variable 'stats' as the program runs, then
in 'stat_exit' we do the following:
* Finalise 'stats'. This involves setting final running times and allocations
that have not yet been accounted for.
* Create a RTSSummaryStats. This contains all data for reports that is not
included in stats (because they do not make sense before the program has
completed) or in a global variable.
* call the appropriate report_* function, passing the newly constructed
RTSSummaryStats.
To ensure that the data in the different reports is kept consistent, the
report_* functions should not do any calculation, excepting unit changes and
formatting. If you need to add a new calculated field, add it to
RTSSummaryStats.
STATIC_INLINE Time get_init_cpu(void) {
return end_init_cpu - start_init_cpu;
}
*/
STATIC_INLINE Time get_init_elapsed(void) {
return end_init_elapsed - start_init_elapsed;
}
static void init_RTSSummaryStats(RTSSummaryStats* sum)
void
stat_exit (void)
{
const size_t sizeof_gc_summary_stats =
RtsFlags.GcFlags.generations * sizeof(GenerationSummaryStats);
memset(sum, 0, sizeof(RTSSummaryStats));
sum->gc_summary_stats =
stgMallocBytes(sizeof_gc_summary_stats,
"alloc_RTSSummaryStats.gc_summary_stats");
memset(sum->gc_summary_stats, 0, sizeof_gc_summary_stats);
}
generation *gen;
Time gc_cpu = 0;
Time gc_elapsed = 0;
Time init_cpu = 0;
Time init_elapsed = 0;
Time mut_cpu = 0;
Time mut_elapsed = 0;
Time exit_cpu = 0;
Time exit_elapsed = 0;
Time exit_gc_cpu = 0;
Time exit_gc_elapsed = 0;
static void free_RTSSummaryStats(RTSSummaryStats * sum)
{
if (!sum) { return; }
if (!sum->gc_summary_stats) {
stgFree(sum->gc_summary_stats);
sum->gc_summary_stats = NULL;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
char temp[512];
Time tot_cpu;
Time tot_elapsed;
uint32_t g;
getProcessTimes( &tot_cpu, &tot_elapsed );
tot_cpu -= start_init_cpu;
tot_elapsed -= start_init_elapsed;
uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
// allocated since the last GC
stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
stats.allocated_bytes = tot_alloc_bytes;
/* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
if (tot_cpu <= 0) tot_cpu = 1;
if (tot_elapsed <= 0) tot_elapsed = 1;
if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
statsPrintf("%9" FMT_Word " %9.9s %9.9s",
(W_)stats.gc.allocated_bytes, "", "");
statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
}
}
static void report_summary(const RTSSummaryStats* sum)
{
// We should do no calculation, other than unit changes and formatting, and
// we should not not use any data from outside of globals, sum and stats
// here. See Note [RTS Stats Reporting]
// heapCensus() is called by the GC, so RP and HC time are
// included in the GC stats. We therefore subtract them to
// obtain the actual GC cpu time.
gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
uint32_t g;
char temp[512];
init_cpu = get_init_cpu();
init_elapsed = get_init_elapsed();
// We do a GC during the EXIT phase. We'll attribute the cost of that
// to GC instead of EXIT, so carefully subtract it from the EXIT time.
exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
mut_elapsed = start_exit_elapsed - end_init_elapsed -
(gc_elapsed - exit_gc_elapsed) -
PROF_VAL(RPe_tot_time + HCe_tot_time);
mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
- PROF_VAL(RP_tot_time + HC_tot_time);
if (mut_cpu < 0) { mut_cpu = 0; }
// The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
// subtracting, so the parts should add up to the total exactly. Note
// that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
// so we don't use it here.
ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
== end_exit_elapsed - start_init_elapsed);
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes allocated in the heap\n", temp);
......@@ -680,96 +694,109 @@ static void report_summary(const RTSSummaryStats* sum)
statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
FMT_SizeT " MB lost due to fragmentation)\n\n",
stats.max_live_bytes / (1024 * 1024),
sum->fragmentation_bytes / (1024 * 1024));
(size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
(size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
/* Print garbage collections in each gen */
statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
const GenerationSummaryStats * gen_stats =
&sum->gc_summary_stats[g];
statsPrintf(" Gen %2d %5d colls"
", %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
g, // REVIEWERS: this used to be gen->no
//, this can't ever be different right?
gen_stats->collections,
gen_stats->par_collections,
TimeToSecondsDbl(gen_stats->cpu_ns),
TimeToSecondsDbl(gen_stats->elapsed_ns),
TimeToSecondsDbl(gen_stats->avg_pause_ns),
TimeToSecondsDbl(gen_stats->max_pause_ns));
gen = &generations[g];
statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
gen->no,
gen->collections,
gen->par_collections,
TimeToSecondsDbl(GC_coll_cpu[g]),
TimeToSecondsDbl(GC_coll_elapsed[g]),
gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
TimeToSecondsDbl(GC_coll_max_pause[g]));
}
statsPrintf("\n");
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.parGcEnabled && sum->work_balance > 0) {
if (RtsFlags.ParFlags.parGcEnabled && stats.par_copied_bytes > 0) {
// See Note [Work Balance]
statsPrintf(" Parallel GC work balance: "
"%.2f%% (serial 0%%, perfect 100%%)\n\n",
sum->work_balance * 100);
statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
100 * (double)stats.cumulative_par_balanced_copied_bytes /
(double)stats.par_copied_bytes);
}
#endif
statsPrintf("\n");
statsPrintf(" TASKS: %d "
"(%d bound, %d peak workers (%d total), using -N%d)\n\n",
taskCount, sum->bound_task_count,
#if defined(THREADED_RTS)
statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
taskCount, taskCount - workerCount,
peakWorkerCount, workerCount,
n_capabilities);
statsPrintf(" SPARKS: %" FMT_Word
"(%" FMT_Word " converted, %" FMT_Word " overflowed, %"
FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
sum->sparks_count,
sum->sparks.converted, sum->sparks.overflowed,
sum->sparks.dud, sum->sparks.gcd,
sum->sparks.fizzled);
statsPrintf("\n");
{
uint32_t i;
SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
for (i = 0; i < n_capabilities; i++) {
sparks.created += capabilities[i]->spark_stats.created;
sparks.dud += capabilities[i]->spark_stats.dud;
sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
sparks.converted += capabilities[i]->spark_stats.converted;
sparks.gcd += capabilities[i]->spark_stats.gcd;
sparks.fizzled += capabilities[i]->spark_stats.fizzled;
}
statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
sparks.created + sparks.dud + sparks.overflowed,
sparks.converted, sparks.overflowed, sparks.dud,
sparks.gcd, sparks.fizzled);
}
#endif
statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
TimeToSecondsDbl(stats.init_cpu_ns),
TimeToSecondsDbl(stats.init_elapsed_ns));
TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
TimeToSecondsDbl(stats.mutator_cpu_ns),
TimeToSecondsDbl(stats.mutator_elapsed_ns));
TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
TimeToSecondsDbl(stats.gc_cpu_ns),
TimeToSecondsDbl(stats.gc_elapsed_ns));
TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
#if defined(PROFILING)
statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
TimeToSecondsDbl(sum->rc_cpu_ns),
TimeToSecondsDbl(sum->rc_elapsed_ns));
TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
TimeToSecondsDbl(sum->hc_cpu_ns),
TimeToSecondsDbl(sum->hc_elapsed_ns));
TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
#endif
statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
TimeToSecondsDbl(sum->exit_cpu_ns),
TimeToSecondsDbl(sum->exit_elapsed_ns));
TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
TimeToSecondsDbl(stats.cpu_ns),
TimeToSecondsDbl(stats.elapsed_ns));
TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
#if !defined(THREADED_RTS)
statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
sum->gc_cpu_percent * 100,
sum->gc_elapsed_percent * 100);
TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
#endif
showStgWord64(sum->alloc_rate, temp, true/*commas*/);
if (mut_cpu == 0) {
showStgWord64(0, temp, true/*commas*/);
} else {
showStgWord64(
(StgWord64)((double)stats.allocated_bytes /
TimeToSecondsDbl(mut_cpu)),
temp, true/*commas*/);
}
statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
statsPrintf(" Productivity %5.1f%% of total user, "
"%.1f%% of total elapsed\n\n",
sum->productivity_cpu_percent * 100,
sum->productivity_elapsed_percent * 100);
statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
TimeToSecondsDbl(tot_cpu - gc_cpu -
PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
/ TimeToSecondsDbl(tot_cpu),
TimeToSecondsDbl(tot_elapsed - gc_elapsed -
PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
/ TimeToSecondsDbl(tot_elapsed));
// See Note [Internal Counter Stats] for a description of the
// following counters. If you add a counter here, please remember
// to update the Note.
if (RtsFlags.MiscFlags.internalCounters) {
#if defined(THREADED_RTS) && defined(PROF_SPIN)
uint32_t g;
const int32_t col_width[] = {4, -30, 14, 14};
statsPrintf("Internal Counters:\n");
statsPrintf("%*s" "%*s" "%*s" "%*s" "\n"
......@@ -854,394 +881,51 @@ static void report_summary(const RTSSummaryStats* sum)
statsPrintf("Internal Counters require the threaded RTS");
#endif
}
}
static void report_machine_readable (const RTSSummaryStats * sum)
{
// We should do no calculation, other than unit changes and formatting, and
// we should not not use any data from outside of globals, sum and stats
// here. See Note [RTS Stats Reporting]
uint32_t g;
#define MR_STAT(field_name,format,value) \
statsPrintf(" ,(\"" field_name "\", \"%" format "\")\n", value)
#define MR_STAT_GEN(gen,field_name,format,value) \
statsPrintf(" ,(\"gen_%" FMT_Word32 "_" field_name "\", \"%" \
format "\")\n", g, value)
// These first values are for backwards compatibility.
// Some of these first fields are duplicated with more machine-readable
// names, or to match the name in RtsStats.
// we don't use for the first field helper macro here because the prefix is
// different
statsPrintf(" [(\"%s\", \"%" FMT_Word64 "\")\n", "bytes allocated",
stats.allocated_bytes);
MR_STAT("num_GCs", FMT_Word32, stats.gcs);
MR_STAT("average_bytes_used", FMT_Word64, sum->average_bytes_used);
MR_STAT("max_bytes_used", FMT_Word64, stats.max_live_bytes);
MR_STAT("num_byte_usage_samples", FMT_Word32, stats.major_gcs);
MR_STAT("peak_megabytes_allocated", FMT_SizeT,
stats.max_mem_in_use_bytes / (1024 * 1024));
MR_STAT("init_cpu_seconds", "f", TimeToSecondsDbl(stats.init_cpu_ns));
MR_STAT("init_wall_seconds", "f", TimeToSecondsDbl(stats.init_elapsed_ns));
MR_STAT("mut_cpu_seconds", "f", TimeToSecondsDbl(stats.mutator_cpu_ns));
MR_STAT("mut_wall_seconds", "f",
TimeToSecondsDbl(stats.mutator_elapsed_ns));
MR_STAT("GC_cpu_seconds", "f", TimeToSecondsDbl(stats.gc_cpu_ns));
MR_STAT("GC_wall_seconds", "f", TimeToSecondsDbl(stats.gc_elapsed_ns));
// end backward compatibility
// First, the rest of the times
MR_STAT("exit_cpu_seconds", "f", TimeToSecondsDbl(sum->exit_cpu_ns));
MR_STAT("exit_wall_seconds", "f", TimeToSecondsDbl(sum->exit_elapsed_ns));
#if defined(PROFILING)
MR_STAT("rp_cpu_seconds", "f", TimeToSecondsDbl(sum->rp_cpu_ns));
MR_STAT("rp_wall_seconds", "f", TimeToSecondsDbl(sum->rp_elapsed_ns));
MR_STAT("hc_cpu_seconds", "f", TimeToSecondsDbl(sum->hp_cpu_ns));
MR_STAT("hc_wall_seconds", "f", TimeToSecondsDbl(sum->hp_elapsed_ns));
#endif
MR_STAT("total_cpu_seconds", "f", TimeToSecondsDbl(stats.cpu_ns));
MR_STAT("total_wall_seconds", "f",
TimeToSecondsDbl(stats.elapsed_ns));
// next, the remainder of the fields of RTSStats, except internal counters
// The first two are duplicates of those above, but have more machine
// readable names that match the field names in RTSStats.
// gcs has been done as num_GCs above
MR_STAT("major_gcs", FMT_Word32, stats.major_gcs);
MR_STAT("allocated_bytes", FMT_Word64, stats.allocated_bytes);
MR_STAT("max_live_bytes", FMT_Word64, stats.max_live_bytes);
MR_STAT("max_large_objects_bytes", FMT_Word64,
stats.max_large_objects_bytes);
MR_STAT("max_compact_bytes", FMT_Word64, stats.max_compact_bytes);
MR_STAT("max_slop_bytes", FMT_Word64, stats.max_slop_bytes);
// This duplicates, except for unit, peak_megabytes_allocated above
MR_STAT("max_mem_in_use_bytes", FMT_Word64, stats.max_mem_in_use_bytes);
MR_STAT("cumulative_live_bytes", FMT_Word64, stats.cumulative_live_bytes);
MR_STAT("copied_bytes", FMT_Word64, stats.copied_bytes);
MR_STAT("par_copied_bytes", FMT_Word64, stats.par_copied_bytes);
MR_STAT("cumulative_par_max_copied_bytes", FMT_Word64,
stats.cumulative_par_max_copied_bytes);
MR_STAT("cumulative_par_balanced_copied_bytes", FMT_Word64,
stats.cumulative_par_balanced_copied_bytes);
// next, the computed fields in RTSSummaryStats
#if !defined(THREADED_RTS) // THREADED_RTS
MR_STAT("gc_cpu_percent", "f", sum->gc_cpu_percent);
MR_STAT("gc_wall_percent", "f", sum->gc_cpu_percent);
#endif
MR_STAT("fragmentation_bytes", FMT_SizeT, sum->fragmentation_bytes);
// average_bytes_used is done above
MR_STAT("alloc_rate", FMT_Word64, sum->alloc_rate);
MR_STAT("productivity_cpu_percent", "f", sum->productivity_cpu_percent);
MR_STAT("productivity_wall_percent", "f",
sum->productivity_elapsed_percent);
// next, the THREADED_RTS fields in RTSSummaryStats
#if defined(THREADED_RTS)
MR_STAT("bound_task_count", FMT_Word32, sum->bound_task_count);
MR_STAT("sparks_count", FMT_Word64, sum->sparks_count);
MR_STAT("sparks_converted", FMT_Word64, sum->sparks.converted);
MR_STAT("sparks_overflowed", FMT_Word64, sum->sparks.overflowed);
MR_STAT("sparks_dud ", FMT_Word64, sum->sparks.dud);
MR_STAT("sparks_gcd", FMT_Word64, sum->sparks.gcd);
MR_STAT("sparks_fizzled", FMT_Word64, sum->sparks.fizzled);
MR_STAT("work_balance", "f", sum->work_balance);
// next, globals (other than internal counters)
MR_STAT("n_capabilities", FMT_Word32, n_capabilities);
MR_STAT("task_count", FMT_Word32, taskCount);
MR_STAT("peak_worker_count", FMT_Word32, peakWorkerCount);
MR_STAT("worker_count", FMT_Word32, workerCount);
// next, internal counters
#if defined(PROF_SPIN)
MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
MR_STAT("gc_alloc_block_sync_yield", FMT_Word64,
gc_alloc_block_sync.yield);
MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
MR_STAT("gc_spin_spin", FMT_Word64, stats.gc_spin_spin);
MR_STAT("gc_spin_yield", FMT_Word64, stats.gc_spin_yield);
MR_STAT("mut_spin_spin", FMT_Word64, stats.mut_spin_spin);
MR_STAT("mut_spin_yield", FMT_Word64, stats.mut_spin_yield);
MR_STAT("waitForGcThreads_spin", FMT_Word64, waitForGcThreads_spin);
MR_STAT("waitForGcThreads_yield", FMT_Word64,
waitForGcThreads_yield);
MR_STAT("whitehole_gc_spin", FMT_Word64, whitehole_gc_spin);
MR_STAT("whitehole_lockClosure_spin", FMT_Word64,
whitehole_lockClosure_spin);
MR_STAT("whitehole_lockClosure_yield", FMT_Word64,
whitehole_lockClosure_yield);
MR_STAT("whitehole_executeMessage_spin", FMT_Word64,
whitehole_executeMessage_spin);
MR_STAT("whitehole_threadPaused_spin", FMT_Word64,
whitehole_threadPaused_spin);
MR_STAT("any_work", FMT_Word64,
stats.any_work);
MR_STAT("no_work", FMT_Word64,
stats.no_work);
MR_STAT("scav_find_work", FMT_Word64,
stats.scav_find_work);
#endif // PROF_SPIN
#endif // THREADED_RTS
// finally, per-generation stats. Named as, for example for generation 0,
// gen_0_collections
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
const GenerationSummaryStats* gc_sum = &sum->gc_summary_stats[g];
MR_STAT_GEN(g, "collections", FMT_Word32, gc_sum->collections);
MR_STAT_GEN(g, "par_collections", FMT_Word32, gc_sum->par_collections);
MR_STAT_GEN(g, "cpu_seconds", "f", TimeToSecondsDbl(gc_sum->cpu_ns));
MR_STAT_GEN(g, "wall_seconds", "f",
TimeToSecondsDbl(gc_sum->elapsed_ns));
MR_STAT_GEN(g, "max_pause_seconds", "f",
TimeToSecondsDbl(gc_sum->max_pause_ns));
MR_STAT_GEN(g, "avg_pause_seconds", "f",
TimeToSecondsDbl(gc_sum->avg_pause_ns));
#if defined(THREADED_RTS) && defined(PROF_SPIN)
MR_STAT_GEN(g, "sync_spin", FMT_Word64, gc_sum->sync_spin);
MR_STAT_GEN(g, "sync_yield", FMT_Word64, gc_sum->sync_yield);
#endif
}
statsPrintf(" ]\n");
}
static void report_one_line(const RTSSummaryStats * sum)
{
// We should do no calculation, other than unit changes and formatting, and
// we should not not use any data from outside of globals, sum and stats
// here. See Note [RTS Stats Reporting]
/* print the long long separately to avoid bugginess on mingwin (2001-07-02,
mingw-0.5) */
statsPrintf("<<ghc: %" FMT_Word64 " bytes, "
if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
char *fmt;
if (RtsFlags.MiscFlags.machineReadable) {
fmt =
" [(\"bytes allocated\", \"%" FMT_Word64 "\")\n"
" ,(\"num_GCs\", \"%" FMT_Word32 "\")\n"
" ,(\"average_bytes_used\", \"%" FMT_Word64 "\")\n"
" ,(\"max_bytes_used\", \"%" FMT_Word64 "\")\n"
" ,(\"num_byte_usage_samples\", \"%" FMT_Word32 "\")\n"
" ,(\"peak_megabytes_allocated\", \"%" FMT_Word64 "\")\n"
" ,(\"init_cpu_seconds\", \"%.3f\")\n"
" ,(\"init_wall_seconds\", \"%.3f\")\n"
" ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
" ,(\"mutator_wall_seconds\", \"%.3f\")\n"
" ,(\"GC_cpu_seconds\", \"%.3f\")\n"
" ,(\"GC_wall_seconds\", \"%.3f\")\n"
" ]\n";
}
else {
fmt =
"<<ghc: %" FMT_Word64 " bytes, "
"%" FMT_Word32 " GCs, "
"%" FMT_Word64 "/%" FMT_Word64 " avg/max bytes residency "
"(%" FMT_Word32 " samples), "
"%" FMT_Word64 "/%" FMT_Word64 " avg/max bytes residency (%" FMT_Word32 " samples), "
"%" FMT_Word64 "M in use, "
"%.3f INIT (%.3f elapsed), "
"%.3f MUT (%.3f elapsed), "
"%.3f GC (%.3f elapsed) :ghc>>\n",
"%.3f GC (%.3f elapsed) :ghc>>\n";
}
/* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
statsPrintf(fmt,
stats.allocated_bytes,
stats.gcs,
sum->average_bytes_used,
(uint64_t)
(stats.major_gcs == 0 ? 0 :