From bedb4f0de102936099bda4e995cc83f1c344366c Mon Sep 17 00:00:00 2001 From: Teo Camarasu <teofilcamarasu@gmail.com> Date: Fri, 14 Jul 2023 15:26:29 +0100 Subject: [PATCH] nonmoving: Add support for heap profiling Add support for heap profiling while using the nonmoving collector. We greatly simply the implementation by disabling concurrent collection for GCs when heap profiling is enabled. This entails that the marked objects on the nonmoving heap are exactly the live objects. Note that we match the behaviour for live bytes accounting by taking the size of objects on the nonmoving heap to be that of the segment's block rather than the object itself. Resolves #22221 --- docs/users_guide/9.10.1-notes.rst | 2 + rts/Capability.h | 2 +- rts/ProfHeap.c | 128 +++++++++++++++++++++ rts/RtsFlags.c | 5 - rts/sm/GC.c | 4 +- testsuite/tests/profiling/should_run/all.T | 8 +- 6 files changed, 137 insertions(+), 12 deletions(-) diff --git a/docs/users_guide/9.10.1-notes.rst b/docs/users_guide/9.10.1-notes.rst index 385d4328da44..c33584be62c7 100644 --- a/docs/users_guide/9.10.1-notes.rst +++ b/docs/users_guide/9.10.1-notes.rst @@ -168,6 +168,8 @@ Runtime system In one real-world application, this has reduced resident set size by about 20% and modestly improved run-time. See :ghc-ticket:`23340`. :rts-flag:`--nonmoving-dense-allocator-count=⟨count⟩` has been added to fine-tune this behaviour. +- Add support for heap profiling with the non-moving GC. + See :ghc-ticket:`22221`. - Add a :rts-flag:`--no-automatic-time-samples` flag which stops time profiling samples being automatically started on startup. Time profiling can be controlled manually using functions in ``GHC.Profiling``. diff --git a/rts/Capability.h b/rts/Capability.h index a039aae23533..463e03cf133d 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -98,7 +98,7 @@ struct Capability_ { // The update remembered set for the non-moving collector UpdRemSet upd_rem_set; // Array of current segments for the non-moving collector. - // Of length NONMOVING_ALLOCA_CNT. + // Of length nonmoving_alloca_cnt. struct NonmovingSegment **current_segments; // block for allocating pinned objects into diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index dad27da10223..65044b5e801e 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -1280,6 +1280,116 @@ heapCensusBlock(Census *census, bdescr *bd) } } +// determine whether a closure should be assigned to the PRIM cost-centre. +static bool +closureIsPrim (StgPtr p) +{ + bool prim = false; + const StgInfoTable *info = get_itbl((const StgClosure *)p); + switch (info->type) { + case THUNK: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_2_0: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_SELECTOR: + case FUN: + case BLACKHOLE: + case BLOCKING_QUEUE: + case FUN_1_0: + case FUN_0_1: + case FUN_1_1: + case FUN_0_2: + case FUN_2_0: + case CONSTR: + case CONSTR_NOCAF: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_1_1: + case CONSTR_0_2: + case CONSTR_2_0: + case IND: + case AP: + case PAP: + case AP_STACK: + case CONTINUATION: + prim = false; + break; + + case BCO: + case MVAR_CLEAN: + case MVAR_DIRTY: + case TVAR: + case WEAK: + case PRIM: + case MUT_PRIM: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case ARR_WORDS: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN_CLEAN: + case MUT_ARR_PTRS_FROZEN_DIRTY: + case SMALL_MUT_ARR_PTRS_CLEAN: + case SMALL_MUT_ARR_PTRS_DIRTY: + case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: + case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: + case TSO: + case STACK: + case TREC_CHUNK: + prim = true; + break; + + case COMPACT_NFDATA: + barf("heapCensus, found compact object in the wrong list"); + break; + + default: + barf("heapCensus, unknown object: %d", info->type); + } + return prim; +} + +static void +heapCensusSegment (Census* census, struct NonmovingSegment* seg ) +{ + unsigned int block_size = nonmovingSegmentBlockSize(seg); + unsigned int block_count = nonmovingSegmentBlockCount(seg); + + for (unsigned int b = 0; b < block_count; b++) { + StgPtr p = nonmovingSegmentGetBlock(seg, b); + // ignore unmarked heap objects + if (!nonmovingClosureMarkedThisCycle(p)) continue; + // NB: We round up the size of objects to the segment block size. + // This aligns with live bytes accounting for the nonmoving collector. + heapProfObject(census, (StgClosure*)p, block_size / sizeof(W_), closureIsPrim(p)); + } +} + +/* Note [Non-concurrent nonmoving collector heap census] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * When using the nonmoving collector, we currently disable concurrent collection + * to simplify heap census accounting. + * + * Without concurrent allocation, marked objects on the nonmoving heap are exactly + * the live objects. + * + * We disable concurrent collection both for GCs that lead to a heap census and not. + * This is because a concurrent collection can overlap with a GC that is meant + * to perform a heap census. Alternatively we could better handle the case where + * a non-concurrent collection is triggered while a non-concurrent collection + * is running. + */ + +static void +heapCensusSegmentList (Census* census, struct NonmovingSegment* seg ) +{ + for (; seg; seg = seg->link) { + heapCensusSegment(census, seg); + } +} + /* ----------------------------------------------------------------------------- * Code to perform a heap census. * -------------------------------------------------------------------------- */ @@ -1350,6 +1460,24 @@ void heapCensus (Time t) } } + if (RtsFlags.GcFlags.useNonmoving) { + for (unsigned int i = 0; i < nonmoving_alloca_cnt; i++) { + heapCensusSegmentList(census, nonmovingHeap.allocators[i].filled); + heapCensusSegmentList(census, nonmovingHeap.allocators[i].saved_filled); + heapCensusSegmentList(census, nonmovingHeap.allocators[i].active); + + heapCensusChain(census, nonmoving_large_objects); + heapCensusCompactList(census, nonmoving_compact_objects); + + // segments living on capabilities + for (unsigned int j = 0; j < getNumCapabilities(); j++) { + Capability* cap = getCapability(j); + heapCensusSegment(census, cap->current_segments[i]); + } + } + + } + // dump out the census info #if defined(PROFILING) // We can't generate any info for LDV profiling until diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index d658a523efbb..36f8bd485aec 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -1987,11 +1987,6 @@ static void normaliseRtsOpts (void) } #endif - if (RtsFlags.ProfFlags.doHeapProfile != NO_HEAP_PROFILING && - RtsFlags.GcFlags.useNonmoving) { - barf("The non-moving collector doesn't support profiling"); - } - if (RtsFlags.GcFlags.compact && RtsFlags.GcFlags.useNonmoving) { errorBelch("The non-moving collector cannot be used in conjunction with\n" "the compacting collector."); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index eef653d1cf67..90afde90facf 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -874,7 +874,9 @@ GarbageCollect (struct GcConfig config, ASSERT(oldest_gen->old_weak_ptr_list == NULL); #if defined(THREADED_RTS) - concurrent = !config.nonconcurrent; + // Concurrent collection is currently incompatible with heap profiling. + // See Note [Non-concurrent nonmoving collector heap census] + concurrent = !config.nonconcurrent && !RtsFlags.ProfFlags.doHeapProfile; #else // In the non-threaded runtime this is the only time we push to the // upd_rem_set diff --git a/testsuite/tests/profiling/should_run/all.T b/testsuite/tests/profiling/should_run/all.T index b161f1afe1b8..11e6aa643d2a 100644 --- a/testsuite/tests/profiling/should_run/all.T +++ b/testsuite/tests/profiling/should_run/all.T @@ -13,9 +13,9 @@ test('T11489', [req_profiling], makefile_test, ['T11489']) test('dynamic-prof', [], compile_and_run, ['']) -test('dynamic-prof2', [only_ways(['normal']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, ['']) +test('dynamic-prof2', [only_ways(['normal', 'nonmoving_thr']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, ['']) -test('dynamic-prof3', [only_ways(['normal']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, ['']) +test('dynamic-prof3', [only_ways(['normal', 'nonmoving_thr']), extra_run_opts('+RTS -hT --no-automatic-heap-samples')], compile_and_run, ['']) # Remove the ipName field as it's volatile (depends on e.g. architecture and may change with every new GHC version) def normalise_InfoProv_ipName(str): @@ -34,9 +34,7 @@ test('staticcallstack002', ['-O0 -g3 -fdistinct-constructor-tables -finfo-table-map']) test('T21455', - [extra_run_opts('+RTS -hT -postem'), - # Nonmoving collector doesn't support -hT - omit_ways(['nonmoving', 'nonmoving_thr', 'nonmoving_thr_sanity'])], + [extra_run_opts('+RTS -hT -postem')], compile_and_run, ['']) -- GitLab