Commit 65a0e1eb authored by Simon Marlow's avatar Simon Marlow

Simplify the allocation stats accounting

We were doing it in two different ways and asserting that the results
were the same.  In most cases they were, but I found one case where
they weren't: the GC itself allocates some memory for running
finalizers, and this memory was accounted for one way but not the
other.

It was simpler to remove the old way of counting allocation that to
try to fix it up, so I did that.
parent e5085db5
......@@ -77,7 +77,7 @@ typedef struct generation_ {
memcount n_large_blocks; // no. of blocks used by large objs
memcount n_large_words; // no. of words used by large objs
memcount n_new_large_words; // words of new large objects
// (for allocation stats)
// (for doYouWantToGC())
memcount max_blocks; // max blocks
......
......@@ -334,15 +334,39 @@ stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
#endif
}
/* -----------------------------------------------------------------------------
* Calculate the total allocated memory since the start of the
* program. Also emits events reporting the per-cap allocation
* totals.
* -------------------------------------------------------------------------- */
static StgWord
calcTotalAllocated(void)
{
W_ tot_alloc = 0;
W_ n;
for (n = 0; n < n_capabilities; n++) {
tot_alloc += capabilities[n].total_allocated;
traceEventHeapAllocated(&capabilities[n],
CAPSET_HEAP_DEFAULT,
capabilities[n].total_allocated * sizeof(W_));
}
return tot_alloc;
}
/* -----------------------------------------------------------------------------
Called at the end of each GC
-------------------------------------------------------------------------- */
void
stat_endGC (Capability *cap, gc_thread *gct,
W_ alloc, W_ live, W_ copied, W_ slop, nat gen,
W_ live, W_ copied, W_ slop, nat gen,
nat par_n_threads, W_ par_max_copied, W_ par_tot_copied)
{
W_ tot_alloc;
W_ alloc;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
RtsFlags.ProfFlags.doHeapProfile)
// heap profiling needs GC_tot_time
......@@ -380,6 +404,17 @@ stat_endGC (Capability *cap, gc_thread *gct,
gc_elapsed = elapsed - gct->gc_start_elapsed;
gc_cpu = cpu - gct->gc_start_cpu;
/* For the moment we calculate both per-HEC and total allocation.
* There is thus redundancy here, but for the moment we will calculate
* it both the old and new way and assert they're the same.
* When we're sure it's working OK then we can simplify things.
*/
tot_alloc = calcTotalAllocated();
// allocated since the last GC
alloc = tot_alloc - GC_tot_alloc;
GC_tot_alloc = tot_alloc;
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
W_ faults = getPageFaults();
......@@ -406,29 +441,10 @@ stat_endGC (Capability *cap, gc_thread *gct,
}
GC_tot_copied += (StgWord64) copied;
GC_tot_alloc += (StgWord64) alloc;
GC_par_max_copied += (StgWord64) par_max_copied;
GC_par_tot_copied += (StgWord64) par_tot_copied;
GC_tot_cpu += gc_cpu;
/* For the moment we calculate both per-HEC and total allocation.
* There is thus redundancy here, but for the moment we will calculate
* it both the old and new way and assert they're the same.
* When we're sure it's working OK then we can simplify things.
* TODO: simplify calcAllocated and clearNurseries so they don't have
* to calculate the total
*/
{
W_ tot_alloc = 0;
W_ n;
for (n = 0; n < n_capabilities; n++) {
tot_alloc += capabilities[n].total_allocated;
traceEventHeapAllocated(&capabilities[n],
CAPSET_HEAP_DEFAULT,
capabilities[n].total_allocated * sizeof(W_));
}
ASSERT(GC_tot_alloc == tot_alloc);
}
traceEventHeapSize(cap,
CAPSET_HEAP_DEFAULT,
mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
......@@ -587,8 +603,9 @@ StgInt TOTAL_CALLS=1;
static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
void
stat_exit(int alloc)
stat_exit (void)
{
generation *gen;
Time gc_cpu = 0;
......@@ -599,6 +616,8 @@ stat_exit(int alloc)
Time mut_elapsed = 0;
Time exit_cpu = 0;
Time exit_elapsed = 0;
W_ tot_alloc;
W_ alloc;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
......@@ -610,13 +629,11 @@ stat_exit(int alloc)
getProcessTimes( &tot_cpu, &tot_elapsed );
tot_elapsed -= start_init_elapsed;
GC_tot_alloc += alloc;
tot_alloc = calcTotalAllocated();
for (i = 0; i < n_capabilities; i++) {
traceEventHeapAllocated(&capabilities[i],
CAPSET_HEAP_DEFAULT,
capabilities[i].total_allocated * sizeof(W_));
}
// allocated since the last GC
alloc = tot_alloc - GC_tot_alloc;
GC_tot_alloc = tot_alloc;
/* Count total garbage collections */
for (g = 0; g < RtsFlags.GcFlags.generations; g++)
......
......@@ -29,7 +29,7 @@ void stat_endInit(void);
void stat_startGC(Capability *cap, struct gc_thread_ *_gct);
void stat_endGC (Capability *cap, struct gc_thread_ *_gct,
W_ alloc, W_ live, W_ copied, W_ slop, nat gen,
W_ live, W_ copied, W_ slop, nat gen,
nat n_gc_threads, W_ par_max_copied, W_ par_tot_copied);
void stat_gcWorkerThreadStart (struct gc_thread_ *_gct);
......@@ -52,7 +52,7 @@ void stat_endHeapCensus(void);
void stat_startExit(void);
void stat_endExit(void);
void stat_exit(int alloc);
void stat_exit(void);
void stat_workerStop(void);
void initStats0(void);
......
......@@ -156,7 +156,7 @@ static StgWord dec_running (void);
static void wakeup_gc_threads (nat me);
static void shutdown_gc_threads (nat me);
static void collect_gct_blocks (void);
static StgWord collect_pinned_object_blocks (void);
static void collect_pinned_object_blocks (void);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
......@@ -186,7 +186,7 @@ GarbageCollect (nat collect_gen,
{
bdescr *bd;
generation *gen;
StgWord live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
......@@ -243,11 +243,6 @@ GarbageCollect (nat collect_gen,
}
#endif
/* Approximate how much we allocated.
* Todo: only when generating stats?
*/
allocated = countLargeAllocated(); /* don't count the nursery yet */
/* Figure out which generation to collect
*/
N = collect_gen;
......@@ -304,7 +299,7 @@ GarbageCollect (nat collect_gen,
// gather blocks allocated using allocatePinned() from each capability
// and put them on the g0->large_object list.
allocated += collect_pinned_object_blocks();
collect_pinned_object_blocks();
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
......@@ -419,7 +414,7 @@ GarbageCollect (nat collect_gen,
}
if (!DEBUG_IS_ON && n_gc_threads != 1) {
gct->allocated = clearNursery(cap);
clearNursery(cap);
}
shutdown_gc_threads(gct->thread_index);
......@@ -659,17 +654,14 @@ GarbageCollect (nat collect_gen,
// Reset the nursery: make the blocks empty
if (DEBUG_IS_ON || n_gc_threads == 1) {
for (n = 0; n < n_capabilities; n++) {
allocated += clearNursery(&capabilities[n]);
clearNursery(&capabilities[n]);
}
} else {
// When doing parallel GC, clearNursery() is called by the
// worker threads, and the value returned is stored in
// gct->allocated.
// worker threads
for (n = 0; n < n_capabilities; n++) {
if (gc_threads[n]->idle) {
allocated += clearNursery(&capabilities[n]);
} else {
allocated += gc_threads[n]->allocated;
clearNursery(&capabilities[n]);
}
}
}
......@@ -781,7 +773,7 @@ GarbageCollect (nat collect_gen,
#endif
// ok, GC over: tell the stats department what happened.
stat_endGC(cap, gct, allocated, live_words, copied,
stat_endGC(cap, gct, live_words, copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */,
N, n_gc_threads, par_max_copied, par_tot_copied);
......@@ -1094,7 +1086,7 @@ gcWorkerThread (Capability *cap)
scavenge_until_all_done();
if (!DEBUG_IS_ON) {
gct->allocated = clearNursery(cap);
clearNursery(cap);
}
#ifdef THREADED_RTS
......@@ -1439,17 +1431,15 @@ collect_gct_blocks (void)
purposes.
-------------------------------------------------------------------------- */
static StgWord
static void
collect_pinned_object_blocks (void)
{
nat n;
bdescr *bd, *prev;
StgWord allocated = 0;
for (n = 0; n < n_capabilities; n++) {
prev = NULL;
for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
allocated += bd->free - bd->start;
prev = bd;
}
if (prev != NULL) {
......@@ -1461,8 +1451,6 @@ collect_pinned_object_blocks (void)
capabilities[n].pinned_object_blocks = 0;
}
}
return allocated;
}
/* -----------------------------------------------------------------------------
......@@ -1480,7 +1468,6 @@ init_gc_thread (gc_thread *t)
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
t->allocated = 0;
t->copied = 0;
t->scanned = 0;
t->any_work = 0;
......
......@@ -176,7 +176,6 @@ typedef struct gc_thread_ {
// -------------------
// stats
W_ allocated; // result of clearNursery()
W_ copied;
W_ scanned;
W_ any_work;
......
......@@ -240,8 +240,8 @@ void storageAddCapabilities (nat from, nat to)
void
exitStorage (void)
{
W_ allocated = updateNurseriesStats();
stat_exit(allocated);
updateNurseriesStats();
stat_exit();
}
void
......@@ -508,22 +508,18 @@ allocNurseries (nat from, nat to)
assignNurseriesToCapabilities(from, to);
}
W_
void
clearNursery (Capability *cap)
{
bdescr *bd;
W_ allocated = 0;
for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) {
allocated += (W_)(bd->free - bd->start);
cap->total_allocated += (W_)(bd->free - bd->start);
bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->gen == g0);
IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
}
return allocated;
}
void
......@@ -771,6 +767,7 @@ allocatePinned (Capability *cap, W_ n)
// g0->large_objects.
if (bd != NULL) {
dbl_link_onto(bd, &cap->pinned_object_blocks);
// add it to the allocation stats when the block is full
cap->total_allocated += bd->free - bd->start;
}
......@@ -927,32 +924,19 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p)
* updateNurseriesStats()
*
* Update the per-cap total_allocated numbers with an approximation of
* the amount of memory used in each cap's nursery. Also return the
* total across all caps.
*
* the amount of memory used in each cap's nursery.
*
* Since this update is also performed by clearNurseries() then we only
* need this function for the final stats when the RTS is shutting down.
* -------------------------------------------------------------------------- */
W_
updateNurseriesStats (void)
void updateNurseriesStats (void)
{
W_ allocated = 0;
nat i;
for (i = 0; i < n_capabilities; i++) {
int cap_allocated = countOccupied(nurseries[i].blocks);
capabilities[i].total_allocated += cap_allocated;
allocated += cap_allocated;
capabilities[i].total_allocated += countOccupied(nurseries[i].blocks);
}
return allocated;
}
W_
countLargeAllocated (void)
{
return g0->n_new_large_words;
}
W_ countOccupied (bdescr *bd)
......
......@@ -82,7 +82,7 @@ void dirty_TVAR(Capability *cap, StgTVar *p);
extern nursery *nurseries;
void resetNurseries ( void );
W_ clearNursery ( Capability *cap );
void clearNursery ( Capability *cap );
void resizeNurseries ( W_ blocks );
void resizeNurseriesFixed ( W_ blocks );
W_ countNurseryBlocks ( void );
......@@ -91,7 +91,7 @@ W_ countNurseryBlocks ( void );
Stats 'n' DEBUG stuff
-------------------------------------------------------------------------- */
W_ updateNurseriesStats (void);
void updateNurseriesStats (void);
W_ countLargeAllocated (void);
W_ countOccupied (bdescr *bd);
W_ calcNeeded (rtsBool force_major, W_ *blocks_needed);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment