Commit f49d6fb2 authored by Douglas Wilson's avatar Douglas Wilson Committed by Marge Bot

rts: stats: Some fixes to stats for sequential gcs

Solves #19147. When n_capabilities > 1 we were not correctly accounting
for gc time for sequential collections. In this case par_n_gcthreads ==
1, however it is not guaranteed that the single gc thread is capability 0.

A similar issue for copied is addressed as well.
parent e07ba458
......@@ -507,10 +507,18 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s
initiating_gct->gc_start_elapsed - initiating_gct->gc_sync_start_elapsed;
stats.gc.elapsed_ns = current_elapsed - initiating_gct->gc_start_elapsed;
stats.gc.cpu_ns = 0;
for (unsigned int i=0; i < par_n_threads; i++) {
gc_thread *gct = gc_threads[i];
ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu);
stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
// see Note [n_gc_threads]
// par_n_threads is set to n_gc_threads at the single callsite of this
// function
if (par_n_threads == 1) {
ASSERT(initiating_gct->gc_end_cpu >= initiating_gct->gc_start_cpu);
stats.gc.cpu_ns += initiating_gct->gc_end_cpu - initiating_gct->gc_start_cpu;
} else {
for (unsigned int i=0; i < par_n_threads; i++) {
gc_thread *gct = gc_threads[i];
ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu);
stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
}
}
}
// -------------------------------------------------
......
......@@ -136,8 +136,22 @@ StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
ATTRIBUTE_ALIGNED(64);
#endif
// Number of threads running in *this* GC. Affects how many
// step->todos[] lists we have to look in to find work.
/* Note [n_gc_threads]
This is a global variable that originally tracked the number of threads
participating in the current gc. It's meaing has diverged from this somewhate.
In practise, it now takes one of the values {1, n_capabilities}. Don't be
tricked into thinking this means garbage collections must have 1 or
n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included
in the n_gc_thread count.
Clearly this is in need of some tidying up, but for now we tread carefully. We
check n_gc_threads > 1 to see whether we are in a parallel or sequential. We
ensure n_gc_threads > 1 before iterating over gc_threads a la:
for(i=0;i<n_gc_threads;++i) { foo(gc_thread[i]; )}
Omitting this check has led to issues such as #19147.
*/
uint32_t n_gc_threads;
// For stats:
......@@ -552,12 +566,13 @@ GarbageCollect (uint32_t collect_gen,
uint64_t par_balanced_copied_acc = 0;
const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
copied += RELAXED_LOAD(&gc_threads[i]->copied);
}
for (i=0; i < n_gc_threads; i++) {
thread = gc_threads[i];
if (n_gc_threads > 1) {
// see Note [n_gc_threads]
if (n_gc_threads > 1) {
for (i=0; i < n_gc_threads; i++) {
copied += RELAXED_LOAD(&gc_threads[i]->copied);
}
for (i=0; i < n_gc_threads; i++) {
thread = gc_threads[i];
debugTrace(DEBUG_gc,"thread %d:", i);
debugTrace(DEBUG_gc," copied %ld",
RELAXED_LOAD(&thread->copied) * sizeof(W_));
......@@ -585,12 +600,12 @@ GarbageCollect (uint32_t collect_gen,
par_balanced_copied_acc +=
stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
}
}
if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
(n_gc_threads - 1);
} else {
copied += gct->copied;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment