Commit 1fb38442 authored by Simon Marlow's avatar Simon Marlow

Refactoring and tidy up

This is a port of some of the changes from my private local-GC branch
(which is still in darcs, I haven't converted it to git yet).  There
are a couple of small functional differences in the GC stats: first,
per-thread GC timings should now be more accurate, and secondly we now
report average and maximum pause times. e.g. from minimax +RTS -N8 -s:

                                    Tot time (elapsed)  Avg pause  Max pause
  Gen  0      2755 colls,  2754 par   13.16s    0.93s     0.0003s    0.0150s
  Gen  1       769 colls,   769 par    3.71s    0.26s     0.0003s    0.0059s
parent 7bf5bf37
......@@ -842,11 +842,9 @@ freeCapabilities (void)
------------------------------------------------------------------------ */
void
markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta,
rtsBool no_mark_sparks USED_IF_THREADS)
markCapability (evac_fn evac, void *user, Capability *cap,
rtsBool no_mark_sparks USED_IF_THREADS)
{
nat i;
Capability *cap;
InCall *incall;
// Each GC thread is responsible for following roots from the
......@@ -854,39 +852,31 @@ markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta,
// or fewer Capabilities as GC threads, but just in case there
// are more, we mark every Capability whose number is the GC
// thread's index plus a multiple of the number of GC threads.
for (i = i0; i < n_capabilities; i += delta) {
cap = &capabilities[i];
evac(user, (StgClosure **)(void *)&cap->run_queue_hd);
evac(user, (StgClosure **)(void *)&cap->run_queue_tl);
evac(user, (StgClosure **)(void *)&cap->run_queue_hd);
evac(user, (StgClosure **)(void *)&cap->run_queue_tl);
#if defined(THREADED_RTS)
evac(user, (StgClosure **)(void *)&cap->inbox);
evac(user, (StgClosure **)(void *)&cap->inbox);
#endif
for (incall = cap->suspended_ccalls; incall != NULL;
incall=incall->next) {
evac(user, (StgClosure **)(void *)&incall->suspended_tso);
}
for (incall = cap->suspended_ccalls; incall != NULL;
incall=incall->next) {
evac(user, (StgClosure **)(void *)&incall->suspended_tso);
}
#if defined(THREADED_RTS)
if (!no_mark_sparks) {
traverseSparkQueue (evac, user, cap);
}
#endif
if (!no_mark_sparks) {
traverseSparkQueue (evac, user, cap);
}
#endif
#if !defined(THREADED_RTS)
evac(user, (StgClosure **)(void *)&blocked_queue_hd);
evac(user, (StgClosure **)(void *)&blocked_queue_tl);
evac(user, (StgClosure **)(void *)&sleeping_queue);
#endif
// Free STM structures for this Capability
stmPreGCHook(cap);
}
void
markCapabilities (evac_fn evac, void *user)
{
markSomeCapabilities(evac, user, 0, 1, rtsFalse);
nat n;
for (n = 0; n < n_capabilities; n++) {
markCapability(evac, user, &capabilities[n], rtsFalse);
}
}
/* -----------------------------------------------------------------------------
Messages
-------------------------------------------------------------------------- */
......@@ -278,9 +278,11 @@ INLINE_HEADER void contextSwitchCapability(Capability *cap);
void freeCapabilities (void);
// For the GC:
void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta,
rtsBool no_mark_sparks);
void markCapability (evac_fn evac, void *user, Capability *cap,
rtsBool no_mark_sparks USED_IF_THREADS);
void markCapabilities (evac_fn evac, void *user);
void traverseSparkQueues (evac_fn evac, void *user);
/* -----------------------------------------------------------------------------
......
......@@ -879,17 +879,12 @@ static StgBool check_read_only(StgTRecHeader *trec STG_UNUSED) {
/************************************************************************/
void stmPreGCHook() {
nat i;
void stmPreGCHook (Capability *cap) {
lock_stm(NO_TREC);
TRACE("stmPreGCHook");
for (i = 0; i < n_capabilities; i ++) {
Capability *cap = &capabilities[i];
cap -> free_tvar_watch_queues = END_STM_WATCH_QUEUE;
cap -> free_trec_chunks = END_STM_CHUNK_LIST;
cap -> free_trec_headers = NO_TREC;
}
cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE;
cap->free_trec_chunks = END_STM_CHUNK_LIST;
cap->free_trec_headers = NO_TREC;
unlock_stm(NO_TREC);
}
......
......@@ -48,7 +48,7 @@
--------------
*/
void stmPreGCHook(void);
void stmPreGCHook(Capability *cap);
/*----------------------------------------------------------------------
......
......@@ -2069,6 +2069,16 @@ freeScheduler( void )
#endif
}
void markScheduler (evac_fn evac USED_IF_NOT_THREADS,
void *user USED_IF_NOT_THREADS)
{
#if !defined(THREADED_RTS)
evac(user, (StgClosure **)(void *)&blocked_queue_hd);
evac(user, (StgClosure **)(void *)&blocked_queue_tl);
evac(user, (StgClosure **)(void *)&sleeping_queue);
#endif
}
/* -----------------------------------------------------------------------------
performGC
......
......@@ -23,6 +23,7 @@
void initScheduler (void);
void exitScheduler (rtsBool wait_foreign);
void freeScheduler (void);
void markScheduler (evac_fn evac, void *user);
// Place a new thread on the run queue of the current Capability
void scheduleThread (Capability *cap, StgTSO *tso);
......
......@@ -16,6 +16,8 @@
#include "GetTime.h"
#include "sm/Storage.h"
#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
#include "sm/GCThread.h"
#include "sm/BlockAlloc.h"
#if USE_PAPI
#include "Papi.h"
......@@ -26,31 +28,23 @@
#define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
static Ticks ElapsedTimeStart = 0;
static Ticks
start_init_cpu, start_init_elapsed,
end_init_cpu, end_init_elapsed,
start_exit_cpu, start_exit_elapsed,
end_exit_cpu, end_exit_elapsed;
static Ticks InitUserTime = 0;
static Ticks InitElapsedTime = 0;
static Ticks InitElapsedStamp = 0;
static Ticks GC_tot_cpu = 0;
static Ticks MutUserTime = 0;
static Ticks MutElapsedTime = 0;
static Ticks MutElapsedStamp = 0;
static Ticks ExitUserTime = 0;
static Ticks ExitElapsedTime = 0;
static StgWord64 GC_tot_alloc = 0;
static StgWord64 GC_tot_copied = 0;
static StgWord64 GC_tot_alloc = 0;
static StgWord64 GC_tot_copied = 0;
static StgWord64 GC_par_max_copied = 0;
static StgWord64 GC_par_avg_copied = 0;
static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */
static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */
#ifdef PROFILING
static Ticks RP_start_time = 0, RP_tot_time = 0; /* retainer prof user time */
static Ticks RPe_start_time = 0, RPe_tot_time = 0; /* retainer prof elap time */
static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
......@@ -62,99 +56,81 @@ static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#define PROF_VAL(x) 0
#endif
static lnat MaxResidency = 0; // in words; for stats only
static lnat AvgResidency = 0;
static lnat ResidencySamples = 0; // for stats only
static lnat MaxSlop = 0;
static lnat max_residency = 0; // in words; for stats only
static lnat avg_residency = 0;
static lnat residency_samples = 0; // for stats only
static lnat max_slop = 0;
static lnat GC_start_faults = 0, GC_end_faults = 0;
static lnat GC_end_faults = 0;
static Ticks *GC_coll_times = NULL;
static Ticks *GC_coll_etimes = NULL;
static Ticks *GC_coll_cpu = NULL;
static Ticks *GC_coll_elapsed = NULL;
static Ticks *GC_coll_max_pause = NULL;
static void statsFlush( void );
static void statsClose( void );
Ticks stat_getElapsedGCTime(void)
{
return GCe_tot_time;
}
/* -----------------------------------------------------------------------------
Current elapsed time
------------------------------------------------------------------------- */
Ticks stat_getElapsedTime(void)
{
return getProcessElapsedTime() - ElapsedTimeStart;
return getProcessElapsedTime() - start_init_elapsed;
}
/* mut_user_time_during_GC() and mut_user_time()
*
* The former function can be used to get the current mutator time
* *during* a GC, i.e. between stat_startGC and stat_endGC. This is
* used in the heap profiler for accurately time stamping the heap
* sample.
*
* ATTENTION: mut_user_time_during_GC() relies on GC_start_time being
* defined in stat_startGC() - to minimise system calls,
* GC_start_time is, however, only defined when really needed (check
* stat_startGC() for details)
*/
double
mut_user_time_during_GC( void )
{
return TICK_TO_DBL(GC_start_time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time));
}
/* ---------------------------------------------------------------------------
Measure the current MUT time, for profiling
------------------------------------------------------------------------ */
double
mut_user_time( void )
{
Ticks user;
user = getProcessCPUTime();
return TICK_TO_DBL(user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time));
Ticks cpu;
cpu = getProcessCPUTime();
return TICK_TO_DBL(cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
}
#ifdef PROFILING
/*
mut_user_time_during_RP() is similar to mut_user_time_during_GC();
it returns the MUT time during retainer profiling.
mut_user_time_during_RP() returns the MUT time during retainer profiling.
The same is for mut_user_time_during_HC();
*/
double
mut_user_time_during_RP( void )
{
return TICK_TO_DBL(RP_start_time - GC_tot_time - RP_tot_time - HC_tot_time);
return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time);
}
double
mut_user_time_during_heap_census( void )
{
return TICK_TO_DBL(HC_start_time - GC_tot_time - RP_tot_time - HC_tot_time);
return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time);
}
#endif /* PROFILING */
// initStats0() has no dependencies, it can be called right at the beginning
/* ---------------------------------------------------------------------------
initStats0() has no dependencies, it can be called right at the beginning
------------------------------------------------------------------------ */
void
initStats0(void)
{
ElapsedTimeStart = 0;
InitUserTime = 0;
InitElapsedTime = 0;
InitElapsedStamp = 0;
start_init_cpu = 0;
start_init_elapsed = 0;
end_init_cpu = 0;
end_init_elapsed = 0;
MutUserTime = 0;
MutElapsedTime = 0;
MutElapsedStamp = 0;
ExitUserTime = 0;
ExitElapsedTime = 0;
start_exit_cpu = 0;
start_exit_elapsed = 0;
end_exit_cpu = 0;
end_exit_elapsed = 0;
GC_tot_alloc = 0;
GC_tot_copied = 0;
GC_par_max_copied = 0;
GC_par_avg_copied = 0;
GC_start_time = 0;
GC_tot_time = 0;
GCe_start_time = 0;
GCe_tot_time = 0;
GC_tot_cpu = 0;
#ifdef PROFILING
RP_start_time = 0;
......@@ -168,16 +144,18 @@ initStats0(void)
HCe_tot_time = 0;
#endif
MaxResidency = 0;
AvgResidency = 0;
ResidencySamples = 0;
MaxSlop = 0;
max_residency = 0;
avg_residency = 0;
residency_samples = 0;
max_slop = 0;
GC_start_faults = 0;
GC_end_faults = 0;
}
// initStats1() can be called after setupRtsFlags()
/* ---------------------------------------------------------------------------
initStats1() can be called after setupRtsFlags()
------------------------------------------------------------------------ */
void
initStats1 (void)
{
......@@ -187,17 +165,22 @@ initStats1 (void)
statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
statsPrintf(" bytes bytes bytes user elap user elap\n");
}
GC_coll_times =
GC_coll_cpu =
(Ticks *)stgMallocBytes(
sizeof(Ticks)*RtsFlags.GcFlags.generations,
"initStats");
GC_coll_elapsed =
(Ticks *)stgMallocBytes(
sizeof(Ticks)*RtsFlags.GcFlags.generations,
"initStats");
GC_coll_etimes =
GC_coll_max_pause =
(Ticks *)stgMallocBytes(
sizeof(Ticks)*RtsFlags.GcFlags.generations,
"initStats");
for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
GC_coll_times[i] = 0;
GC_coll_etimes[i] = 0;
GC_coll_cpu[i] = 0;
GC_coll_elapsed[i] = 0;
GC_coll_max_pause[i] = 0;
}
}
......@@ -208,26 +191,14 @@ initStats1 (void)
void
stat_startInit(void)
{
Ticks elapsed;
elapsed = getProcessElapsedTime();
ElapsedTimeStart = elapsed;
getProcessTimes(&start_init_cpu, &start_init_elapsed);
}
void
stat_endInit(void)
{
Ticks user, elapsed;
getProcessTimes(&user, &elapsed);
getProcessTimes(&end_init_cpu, &end_init_elapsed);
InitUserTime = user;
InitElapsedStamp = elapsed;
if (ElapsedTimeStart > elapsed) {
InitElapsedTime = 0;
} else {
InitElapsedTime = elapsed - ElapsedTimeStart;
}
#if USE_PAPI
/* We start counting events for the mutator
* when garbage collection starts
......@@ -249,18 +220,7 @@ stat_endInit(void)
void
stat_startExit(void)
{
Ticks user, elapsed;
getProcessTimes(&user, &elapsed);
MutElapsedStamp = elapsed;
MutElapsedTime = elapsed - GCe_tot_time -
PROF_VAL(RPe_tot_time + HCe_tot_time) - InitElapsedStamp;
if (MutElapsedTime < 0) { MutElapsedTime = 0; } /* sometimes -0.00 */
MutUserTime = user - GC_tot_time -
PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
if (MutUserTime < 0) { MutUserTime = 0; }
getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
#if USE_PAPI
/* We stop counting mutator events
......@@ -269,25 +229,13 @@ stat_startExit(void)
/* This flag is needed, because GC is run once more after this function */
papi_is_reporting = 0;
#endif
}
void
stat_endExit(void)
{
Ticks user, elapsed;
getProcessTimes(&user, &elapsed);
ExitUserTime = user - MutUserTime - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
ExitElapsedTime = elapsed - MutElapsedStamp;
if (ExitUserTime < 0) {
ExitUserTime = 0;
}
if (ExitElapsedTime < 0) {
ExitElapsedTime = 0;
}
getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
}
/* -----------------------------------------------------------------------------
......@@ -296,13 +244,8 @@ stat_endExit(void)
static nat rub_bell = 0;
/* initialise global variables needed during GC
*
* * GC_start_time is read in mut_user_time_during_GC(), which in turn is
* needed if either PROFILING or DEBUGing is enabled
*/
void
stat_startGC(void)
stat_startGC (gc_thread *gct)
{
nat bell = RtsFlags.GcFlags.ringBell;
......@@ -315,16 +258,6 @@ stat_startGC(void)
}
}
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS
|| RtsFlags.ProfFlags.doHeapProfile)
// heap profiling needs GC_tot_time
{
getProcessTimes(&GC_start_time, &GCe_start_time);
if (RtsFlags.GcFlags.giveStats) {
GC_start_faults = getPageFaults();
}
}
#if USE_PAPI
if(papi_is_reporting) {
/* Switch to counting GC events */
......@@ -333,6 +266,40 @@ stat_startGC(void)
}
#endif
getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
gct->gc_start_thread_cpu = getThreadCPUTime();
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
{
gct->gc_start_faults = getPageFaults();
}
}
void
stat_gcWorkerThreadStart (gc_thread *gct)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
{
getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
gct->gc_start_thread_cpu = getThreadCPUTime();
}
}
void
stat_gcWorkerThreadDone (gc_thread *gct)
{
Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
{
elapsed = getProcessElapsedTime();
thread_cpu = getThreadCPUTime();
gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
gc_elapsed = elapsed - gct->gc_start_elapsed;
taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
}
}
/* -----------------------------------------------------------------------------
......@@ -340,67 +307,65 @@ stat_startGC(void)
-------------------------------------------------------------------------- */
void
stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen,
stat_endGC (gc_thread *gct,
lnat alloc, lnat live, lnat copied, nat gen,
lnat max_copied, lnat avg_copied, lnat slop)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
RtsFlags.ProfFlags.doHeapProfile)
// heap profiling needs GC_tot_time
{
Ticks time, etime, gc_time, gc_etime;
Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
getProcessTimes(&time, &etime);
gc_time = time - GC_start_time;
gc_etime = etime - GCe_start_time;
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
getProcessTimes(&cpu, &elapsed);
gc_elapsed = elapsed - gct->gc_start_elapsed;
thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
gc_cpu = cpu - gct->gc_start_cpu;
taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
nat faults = getPageFaults();
statsPrintf("%9ld %9ld %9ld",
alloc*sizeof(W_), copied*sizeof(W_),
live*sizeof(W_));
statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2ld)\n",
TICK_TO_DBL(gc_time),
TICK_TO_DBL(gc_etime),
TICK_TO_DBL(time),
TICK_TO_DBL(etime - ElapsedTimeStart),
faults - GC_start_faults,
GC_start_faults - GC_end_faults,
gen);
GC_end_faults = faults;
statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
TICK_TO_DBL(gc_cpu),
TICK_TO_DBL(gc_elapsed),
TICK_TO_DBL(cpu),
TICK_TO_DBL(elapsed - start_init_elapsed),
faults - gct->gc_start_faults,
gct->gc_start_faults - GC_end_faults,
gen);
GC_end_faults = faults;
statsFlush();
}
GC_coll_times[gen] += gc_time;
GC_coll_etimes[gen] += gc_etime;
GC_coll_cpu[gen] += gc_cpu;
GC_coll_elapsed[gen] += gc_elapsed;
if (GC_coll_max_pause[gen] < gc_elapsed) {
GC_coll_max_pause[gen] = gc_elapsed;
}
GC_tot_copied += (StgWord64) copied;
GC_tot_alloc += (StgWord64) alloc;
GC_par_max_copied += (StgWord64) max_copied;
GC_par_avg_copied += (StgWord64) avg_copied;
GC_tot_time += gc_time;
GCe_tot_time += gc_etime;
#if defined(THREADED_RTS)
{
Task *task;
if ((task = myTask()) != NULL) {
task->gc_time += gc_time;
task->gc_etime += gc_etime;
}
}
#endif
GC_tot_cpu += gc_cpu;
if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
if (live > MaxResidency) {
MaxResidency = live;
if (live > max_residency) {
max_residency = live;
}
ResidencySamples++;
AvgResidency += live;
residency_samples++;
avg_residency += live;
}
if (slop > MaxSlop) MaxSlop = slop;
if (slop > max_slop) max_slop = slop;
}
if (rub_bell) {
......@@ -539,20 +504,28 @@ StgInt TOTAL_CALLS=1;
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
extern lnat hw_alloc_blocks;
void
stat_exit(int alloc)
{
generation *gen;
Ticks gc_cpu = 0;
Ticks gc_elapsed = 0;
Ticks init_cpu = 0;
Ticks init_elapsed = 0;
Ticks mut_cpu = 0;
Ticks mut_elapsed = 0;
Ticks exit_cpu = 0;
Ticks exit_elapsed = 0;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
char temp[BIG_STRING_LEN];
Ticks time;
Ticks etime;
nat g, total_collections = 0;
Ticks tot_cpu;
Ticks tot_elapsed;
nat i, g, total_collections = 0;
getProcessTimes( &time, &etime );
etime -= ElapsedTimeStart;
getProcessTimes( &tot_cpu, &tot_elapsed );
tot_elapsed -= start_init_elapsed;
GC_tot_alloc += alloc;
......@@ -560,15 +533,20 @@ stat_exit(int alloc)
for (g = 0; g < RtsFlags.GcFlags.generations; g++)
total_collections += generations[g].collections;
/* avoid divide by zero if time is measured as 0.00 seconds -- SDM */
if (time == 0.0) time = 1;
if (etime == 0.0) etime = 1;
/* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
if (tot_cpu == 0.0) tot_cpu = 1;
if (tot_elapsed == 0.0) tot_elapsed = 1;
if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
}
for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
gc_cpu += GC_coll_cpu[i];
gc_elapsed += GC_coll_elapsed[i];
}
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
showStgWord64(GC_tot_alloc*sizeof(W_),
temp, rtsTrue/*commas*/);
......@@ -578,14 +556,14 @@ stat_exit(int alloc)
temp, rtsTrue/*commas*/);
statsPrintf("%16s bytes copied during GC\n", temp);
if ( ResidencySamples > 0 ) {
showStgWord64(MaxResidency*sizeof(W_),
if ( residency_samples > 0 ) {
showStgWord64(max_residency*sizeof(W_),
temp, rtsTrue/*commas*/);
statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
temp, ResidencySamples);
temp, residency_samples);
}
showStgWord64(MaxSlop*sizeof(W_), temp, rtsTrue/*commas*/);
showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
statsPrintf("%16s bytes maximum slop\n", temp);
statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
......@@ -593,13 +571,18 @@ stat_exit(int alloc)
(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
/* Print garbage collections in each gen */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n",
g, generations[g].collections,
generations[g].par_collections,
TICK_TO_DBL(GC_coll_times[g]),
TICK_TO_DBL(GC_coll_etimes[g]));
}
statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
gen = &generations[g];
statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
gen->no,
gen->collections,
gen->par_collections,
TICK_TO_DBL(GC_coll_cpu[g]),
TICK_TO_DBL(GC_coll_elapsed[g]),
gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections),
TICK_TO_DBL(GC_coll_max_pause[g]));
}
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.parGcEnabled) {
......@@ -610,8 +593,7 @@ stat_exit(int alloc)
);
}
#endif
statsPrintf("\n");
statsPrintf("\n");
#if defined(THREADED_RTS)
{
......@@ -653,44 +635,60 @@ stat_exit(int alloc)
}
#endif
statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",