Skip to content
Commits on Source (38)
......@@ -40,6 +40,7 @@ module CLabel (
mkAsmTempDieLabel,
mkDirty_MUT_VAR_Label,
mkNonmovingWriteBarrierEnabledLabel,
mkUpdInfoLabel,
mkBHUpdInfoLabel,
mkIndStaticInfoLabel,
......@@ -484,7 +485,9 @@ mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
-- See Note [Proc-point local block entry-point].
-- Constructing Cmm Labels
mkDirty_MUT_VAR_Label, mkUpdInfoLabel,
mkDirty_MUT_VAR_Label,
mkNonmovingWriteBarrierEnabledLabel,
mkUpdInfoLabel,
mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
mkMAP_DIRTY_infoLabel,
......@@ -494,6 +497,8 @@ mkDirty_MUT_VAR_Label, mkUpdInfoLabel,
mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
mkNonmovingWriteBarrierEnabledLabel
= CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData
mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo
mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo
mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo
......
......@@ -631,6 +631,7 @@ emitBlackHoleCode node = do
-- work with profiling.
when eager_blackholing $ do
whenUpdRemSetEnabled dflags $ emitUpdRemSetPushThunk node
emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
emitPrimCall [] MO_WriteBarrier []
emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
......
......@@ -37,6 +37,7 @@ import BlockId
import MkGraph
import StgSyn
import Cmm
import Module ( rtsUnitId )
import Type ( Type, tyConAppTyCon )
import TyCon
import CLabel
......@@ -314,14 +315,21 @@ emitPrimOp dflags [res] ReadMutVarOp [mutv]
= emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
= do -- Without this write barrier, other CPUs may see this pointer before
= do old_val <- CmmLocal <$> newTemp (cmmExprType dflags var)
emitAssign old_val (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
-- Without this write barrier, other CPUs may see this pointer before
-- the writes for the closure it points to have occurred.
-- Note that this also must come after we read the old value to ensure
-- that the read of old_val comes before another core's write to the
-- MutVar's value.
emitPrimCall res MO_WriteBarrier []
emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
emitCCall
[{-no results-}]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
[(baseExpr, AddrHint), (mutv,AddrHint)]
[(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)]
-- #define sizzeofByteArrayzh(r,a) \
-- r = ((StgArrBytes *)(a))->bytes
......@@ -1622,17 +1630,21 @@ doWritePtrArrayOp :: CmmExpr
doWritePtrArrayOp addr idx val
= do dflags <- getDynFlags
let ty = cmmExprType dflags val
hdr_size = arrPtrsHdrSize dflags
-- Update remembered set for non-moving collector
whenUpdRemSetEnabled dflags
$ emitUpdRemSetPush (cmmLoadIndexOffExpr dflags hdr_size ty addr ty idx)
-- This write barrier is to ensure that the heap writes to the object
-- referred to by val have happened before we write val into the array.
-- See #12469 for details.
emitPrimCall [] MO_WriteBarrier []
mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
mkBasicIndexedWrite hdr_size Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
-- the write barrier. We must write a byte into the mark table:
-- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
-- the write barrier. We must write a byte into the mark table:
-- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
emit $ mkStore (
cmmOffsetExpr dflags
(cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
(cmmOffsetExprW dflags (cmmOffsetB dflags addr hdr_size)
(loadArrPtrsSize dflags addr))
(CmmMachOp (mo_wordUShr dflags) [idx,
mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
......@@ -2223,6 +2235,8 @@ emitCopyArray copy src0 src_off dst0 dst_off0 n =
dst <- assignTempE dst0
dst_off <- assignTempE dst_off0
emitCopyUpdRemSetPush dflags (arrPtrsHdrSizeW dflags) dst dst_off n
-- Set the dirty bit in the header.
emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
......@@ -2285,6 +2299,8 @@ emitCopySmallArray copy src0 src_off dst0 dst_off n =
src <- assignTempE src0
dst <- assignTempE dst0
emitCopyUpdRemSetPush dflags (smallArrPtrsHdrSizeW dflags) dst dst_off n
-- Set the dirty bit in the header.
emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
......@@ -2413,6 +2429,12 @@ doWriteSmallPtrArrayOp :: CmmExpr
doWriteSmallPtrArrayOp addr idx val = do
dflags <- getDynFlags
let ty = cmmExprType dflags val
-- Update remembered set for non-moving collector
tmp <- newTemp ty
mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing ty tmp addr ty idx
whenUpdRemSetEnabled dflags $ emitUpdRemSetPush (CmmReg (CmmLocal tmp))
emitPrimCall [] MO_WriteBarrier [] -- #12469
mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
......@@ -2592,3 +2614,31 @@ emitCtzCall res x width = do
[ res ]
(MO_Ctz width)
[ x ]
---------------------------------------------------------------------------
-- Pushing to the update remembered set
---------------------------------------------------------------------------
-- | Push a range of pointer-array elements that are about to be copied over to
-- the update remembered set.
emitCopyUpdRemSetPush :: DynFlags
-> WordOff -- ^ array header size
-> CmmExpr -- ^ destination array
-> CmmExpr -- ^ offset in destination array (in words)
-> Int -- ^ number of elements to copy
-> FCode ()
emitCopyUpdRemSetPush _dflags _hdr_size _dst _dst_off 0 = return ()
emitCopyUpdRemSetPush dflags hdr_size dst dst_off n =
whenUpdRemSetEnabled dflags $ do
updfr_off <- getUpdFrameOff
graph <- mkCall lbl (NativeNodeCall,NativeReturn) [] args updfr_off []
emit graph
where
lbl = mkLblExpr $ mkPrimCallLabel
$ PrimCall (fsLit "stg_copyArray_barrier") rtsUnitId
args =
[ mkIntExpr dflags hdr_size
, dst
, dst_off
, mkIntExpr dflags n
]
......@@ -39,6 +39,11 @@ module StgCmmUtils (
mkWordCLit,
newStringCLit, newByteStringCLit,
blankWord,
-- * Update remembered set operations
whenUpdRemSetEnabled,
emitUpdRemSetPush,
emitUpdRemSetPushThunk,
) where
#include "HsVersions.h"
......@@ -576,3 +581,40 @@ assignTemp' e
let reg = CmmLocal lreg
emitAssign reg e
return (CmmReg reg)
---------------------------------------------------------------------------
-- Pushing to the update remembered set
---------------------------------------------------------------------------
whenUpdRemSetEnabled :: DynFlags -> FCode a -> FCode ()
whenUpdRemSetEnabled dflags code = do
do_it <- getCode code
the_if <- mkCmmIfThenElse' is_enabled do_it mkNop (Just False)
emit the_if
where
enabled = CmmLoad (CmmLit $ CmmLabel mkNonmovingWriteBarrierEnabledLabel) (bWord dflags)
zero = zeroExpr dflags
is_enabled = cmmNeWord dflags enabled zero
-- | Emit code to add an entry to a now-overwritten pointer to the update
-- remembered set.
emitUpdRemSetPush :: CmmExpr -- ^ value of pointer which was overwritten
-> FCode ()
emitUpdRemSetPush ptr = do
emitRtsCall
rtsUnitId
(fsLit "updateRemembSetPushClosure_")
[(CmmReg (CmmGlobal BaseReg), AddrHint),
(ptr, AddrHint)]
False
emitUpdRemSetPushThunk :: CmmExpr -- ^ the thunk
-> FCode ()
emitUpdRemSetPushThunk ptr = do
emitRtsCall
rtsUnitId
(fsLit "updateRemembSetPushThunk_")
[(CmmReg (CmmGlobal BaseReg), AddrHint),
(ptr, AddrHint)]
False
......@@ -832,6 +832,10 @@
__gen = TO_W_(bdescr_gen_no(__bd)); \
if (__gen > 0) { recordMutableCap(__p, __gen); }
/* -----------------------------------------------------------------------------
Update remembered set write barrier
-------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
Arrays
-------------------------------------------------------------------------- */
......@@ -934,3 +938,25 @@
prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
\
return (dst);
//
// Nonmoving write barrier helpers
//
// See Note [Update remembered set] in NonMovingMark.c.
#if defined(THREADED_RTS)
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
#else
// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (0)
#define nonmoving_write_barrier_enabled 0
#endif
// A useful helper for pushing a pointer to the update remembered set.
#define updateRemembSetPushPtr(p) \
IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
}
......@@ -74,6 +74,10 @@ extern "C" {
#define RTS_UNREACHABLE abort()
#endif
/* Prefetch primitives */
#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0)
#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1)
/* Fix for mingw stat problem (done here so it's early enough) */
#if defined(mingw32_HOST_OS)
#define __MSVCRT__ 1
......@@ -189,6 +193,7 @@ void _assertFail(const char *filename, unsigned int linenum)
#include "rts/storage/ClosureMacros.h"
#include "rts/storage/MBlock.h"
#include "rts/storage/GC.h"
#include "rts/NonMoving.h"
/* Other RTS external APIs */
#include "rts/Parallel.h"
......
......@@ -151,6 +151,23 @@ typedef struct GCDetails_ {
Time cpu_ns;
// The time elapsed during GC itself
Time elapsed_ns;
//
// Concurrent garbage collector
//
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_elapsed_ns;
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_elapsed_ns;
} GCDetails;
//
......@@ -241,6 +258,28 @@ typedef struct _RTSStats {
// The number of times a GC thread has iterated it's outer loop across all
// parallel GCs
uint64_t scav_find_work;
// ----------------------------------
// Concurrent garbage collector
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_elapsed_ns;
// The maximum time elapsed during the post-mark pause phase of the
// concurrent nonmoving GC.
Time nonmoving_gc_sync_max_elapsed_ns;
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_elapsed_ns;
// The maximum time elapsed during the post-mark pause phase of the
// concurrent nonmoving GC.
Time nonmoving_gc_max_elapsed_ns;
} RTSStats;
void getRTSStats (RTSStats *s);
......
......@@ -182,12 +182,21 @@
#define EVENT_USER_BINARY_MSG 181
#define EVENT_CONC_MARK_BEGIN 200
#define EVENT_CONC_MARK_END 201
#define EVENT_CONC_SYNC_BEGIN 202
#define EVENT_CONC_SYNC_END 203
#define EVENT_CONC_SWEEP_BEGIN 204
#define EVENT_CONC_SWEEP_END 205
#define EVENT_CONC_UPD_REM_SET_FLUSH 206
#define EVENT_NONMOVING_HEAP_CENSUS 207
/*
* The highest event code +1 that ghc itself emits. Note that some event
* ranges higher than this are reserved but not currently emitted by ghc.
* This must match the size of the EventDesc[] array in EventLog.c
*/
#define NUM_GHC_EVENT_TAGS 182
#define NUM_GHC_EVENT_TAGS 208
#if 0 /* DEPRECATED EVENTS: */
/* we don't actually need to record the thread, it's implicit */
......
......@@ -169,6 +169,7 @@ typedef struct _TRACE_FLAGS {
bool timestamp; /* show timestamp in stderr output */
bool scheduler; /* trace scheduler events */
bool gc; /* trace GC events */
bool nonmoving_gc; /* trace nonmoving GC events */
bool sparks_sampled; /* trace spark events by a sampled method */
bool sparks_full; /* trace spark events 100% accurately */
bool user; /* trace user events (emitted from Haskell code) */
......
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team, 2018-2019
*
* Non-moving garbage collector
*
* Do not #include this file directly: #include "Rts.h" instead.
*
* To understand the structure of the RTS headers, see the wiki:
* http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
*
* -------------------------------------------------------------------------- */
#pragma once
/* This is called by the code generator */
extern DLL_IMPORT_RTS
void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p);
void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p);
// Note that RTS code should not condition on this directly by rather
// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that
// the barrier is eliminated in the non-threaded RTS.
extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
......@@ -107,6 +107,20 @@ INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c)
return CON_INFO_PTR_TO_STRUCT((c)->header.info);
}
/* Used when we expect another thread to be mutating the info table pointer of
* a closure (e.g. when busy-waiting on a WHITEHOLE).
*/
INLINE_HEADER const StgInfoTable *get_volatile_itbl(StgClosure *c) {
// The volatile here is import to ensure that the compiler does not
// optimise away multiple loads, e.g. in a busy-wait loop. Note that
// we can't use VOLATILE_LOAD here as the casts result in strict aliasing
// rule violations and this header may be compiled outside of the RTS
// (where we use -fno-strict-aliasing).
StgInfoTable * *volatile p = (StgInfoTable * *volatile) &c->header.info;
return INFO_PTR_TO_STRUCT(*p);
}
INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con)
{
return get_itbl(con)->srt;
......
......@@ -234,7 +234,7 @@ void setKeepCAFs (void);
and is put on the mutable list.
-------------------------------------------------------------------------- */
void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old);
/* set to disable CAF garbage collection in GHCi. */
/* (needed when dynamic libraries are used). */
......
......@@ -185,6 +185,53 @@ typedef struct StgTSO_ {
} *StgTSOPtr; // StgTSO defined in rts/Types.h
/* Note [StgStack dirtiness flags and concurrent marking]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Without concurrent collection by the nonmoving collector the stack dirtiness story
* is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
* or not.
*
* However, things are considerably more complicated with concurrent collection
* (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
* the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure
* that stacks are marked in accordance with the nonmoving collector's snapshot
* invariant. This is: every stack alive at the time the snapshot is taken must
* be marked at some point after the moment the snapshot is taken and before it
* is mutated or the commencement of the sweep phase.
*
* This marking may be done by the concurrent mark phase (in the case of a
* thread that never runs during the concurrent mark) or by the mutator when
* dirtying the stack. However, it is unsafe for the concurrent collector to
* traverse the stack while it is under mutation. Consequently, the following
* handshake is obeyed by the mutator's write barrier and the concurrent mark to
* ensure this doesn't happen:
*
* 1. The entity seeking to mark first checks that the stack lives in the nonmoving
* generation; if not then the stack was not alive at the time the snapshot
* was taken and therefore we need not mark it.
*
* 2. The entity seeking to mark checks the stack's mark bit. If it is set then
* no mark is necessary.
*
* 3. The entity seeking to mark tries to lock the stack for marking by
* atomically setting its `marking` field to the current non-moving mark
* epoch:
*
* a. If the mutator finds the concurrent collector has already locked the
* stack then it waits until it is finished (indicated by the mark bit
* being set) before proceeding with execution.
*
* b. If the concurrent collector finds that the mutator has locked the stack
* then it moves on, leaving the mutator to mark it. There is no need to wait;
* the mark is guaranteed to finish before sweep due to the post-mark
* synchronization with mutators.
*
* c. Whoever succeeds in locking the stack is responsible for marking it and
* setting the stack's mark bit (either the BF_MARKED bit for large objects
* or otherwise its bit in its segment's mark bitmap).
*
*/
#define STACK_DIRTY 1
// used by sanity checker to verify that all dirty stacks are on the mutable list
......@@ -193,7 +240,8 @@ typedef struct StgTSO_ {
typedef struct StgStack_ {
StgHeader header;
StgWord32 stack_size; // stack size in *words*
StgWord32 dirty; // non-zero => dirty
StgWord dirty; // non-zero => dirty
StgWord marking; // non-zero => someone is currently marking the stack
StgPtr sp; // current stack pointer
StgWord stack[];
} StgStack;
......
......@@ -542,5 +542,6 @@ void * pushCostCentre (void *ccs, void *cc);
// Capability.c
extern unsigned int n_capabilities;
extern void updateRemembSetPushThunk_(void *reg, void *p1);
#endif
......@@ -292,6 +292,8 @@ data TraceFlags = TraceFlags
, timestamp :: Bool -- ^ show timestamp in stderr output
, traceScheduler :: Bool -- ^ trace scheduler events
, traceGc :: Bool -- ^ trace GC events
, traceNonmovingGc
:: Bool -- ^ trace nonmoving GC heap census samples
, sparksSampled :: Bool -- ^ trace spark events by a sampled method
, sparksFull :: Bool -- ^ trace spark events 100% accurately
, user :: Bool -- ^ trace user events (emitted from Haskell code)
......@@ -525,6 +527,8 @@ getTraceFlags = do
(#{peek TRACE_FLAGS, scheduler} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, nonmoving_gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, sparks_sampled} ptr :: IO CBool))
<*> (toBool <$>
......
......@@ -103,6 +103,25 @@ data RTSStats = RTSStats {
-- | Total elapsed time (at the previous GC)
, elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_sync_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_sync_elapsed_ns :: RtsTime
-- | The maximum time elapsed during the post-mark pause phase of the
-- concurrent nonmoving GC.
, nonmoving_gc_sync_max_elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_elapsed_ns :: RtsTime
-- | The maximum time elapsed during the post-mark pause phase of the
-- concurrent nonmoving GC.
, nonmoving_gc_max_elapsed_ns :: RtsTime
-- | Details about the most recent GC
, gc :: GCDetails
} deriving ( Read -- ^ @since 4.10.0.0
......@@ -146,6 +165,13 @@ data GCDetails = GCDetails {
, gcdetails_cpu_ns :: RtsTime
-- | The time elapsed during GC itself
, gcdetails_elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, gcdetails_nonmoving_gc_sync_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, gcdetails_nonmoving_gc_sync_elapsed_ns :: RtsTime
} deriving ( Read -- ^ @since 4.10.0.0
, Show -- ^ @since 4.10.0.0
)
......@@ -192,6 +218,12 @@ getRTSStats = do
gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p
cpu_ns <- (# peek RTSStats, cpu_ns) p
elapsed_ns <- (# peek RTSStats, elapsed_ns) p
nonmoving_gc_sync_cpu_ns <- (# peek RTSStats, nonmoving_gc_sync_cpu_ns) p
nonmoving_gc_sync_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_elapsed_ns) p
nonmoving_gc_sync_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_max_elapsed_ns) p
nonmoving_gc_cpu_ns <- (# peek RTSStats, nonmoving_gc_cpu_ns) p
nonmoving_gc_elapsed_ns <- (# peek RTSStats, nonmoving_gc_elapsed_ns) p
nonmoving_gc_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_max_elapsed_ns) p
let pgc = (# ptr RTSStats, gc) p
gc <- do
gcdetails_gen <- (# peek GCDetails, gen) pgc
......@@ -211,5 +243,7 @@ getRTSStats = do
gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc
gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc
gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc
gcdetails_nonmoving_gc_sync_cpu_ns <- (# peek GCDetails, nonmoving_gc_sync_cpu_ns) pgc
gcdetails_nonmoving_gc_sync_elapsed_ns <- (# peek GCDetails, nonmoving_gc_sync_elapsed_ns) pgc
return GCDetails{..}
return RTSStats{..}
......@@ -2,7 +2,11 @@ test('heap_all',
[when(have_profiling(), extra_ways(['prof'])),
# These ways produce slightly different heap representations.
# Currently we don't test them.
omit_ways(['ghci', 'hpc'])
omit_ways(['ghci', 'hpc',
'nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']),
# The debug RTS initializes some fields with 0xaa and so
# this test spuriously fails.
when(compiler_debugged(), skip)
],
compile_and_run, [''])
......
......@@ -652,6 +652,8 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
/* someone else beat us to it */
jump ENTRY_LBL(stg_WHITEHOLE) (ap);
}
// Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is
// not reachable.
StgInd_indirectee(ap) = CurrentTSO;
prim_write_barrier;
SET_INFO(ap, __stg_EAGER_BLACKHOLE_info);
......
......@@ -292,6 +292,11 @@ initCapability (Capability *cap, uint32_t i)
RtsFlags.GcFlags.generations,
"initCapability");
// At this point storage manager is not initialized yet, so this will be
// initialized in initStorage().
cap->upd_rem_set.queue.blocks = NULL;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
cap->mut_lists[g] = NULL;
}
......@@ -861,16 +866,27 @@ yieldCapability (Capability** pCap, Task *task, bool gcAllowed)
{
PendingSync *sync = pending_sync;
if (sync && sync->type == SYNC_GC_PAR) {
if (! sync->idle[cap->no]) {
traceEventGcStart(cap);
gcWorkerThread(cap);
traceEventGcEnd(cap);
traceSparkCounters(cap);
// See Note [migrated bound threads 2]
if (task->cap == cap) {
return true;
if (sync) {
switch (sync->type) {
case SYNC_GC_PAR:
if (! sync->idle[cap->no]) {
traceEventGcStart(cap);
gcWorkerThread(cap);
traceEventGcEnd(cap);
traceSparkCounters(cap);
// See Note [migrated bound threads 2]
if (task->cap == cap) {
return true;
}
}
break;
case SYNC_FLUSH_UPD_REM_SET:
debugTrace(DEBUG_nonmoving_gc, "Flushing update remembered set blocks...");
break;
default:
break;
}
}
}
......
......@@ -85,6 +85,9 @@ struct Capability_ {
bdescr **mut_lists;
bdescr **saved_mut_lists; // tmp use during GC
// The update remembered set for the non-moving collector
UpdRemSet upd_rem_set;
// block for allocating pinned objects into
bdescr *pinned_object_block;
// full pinned object blocks allocated since the last GC
......@@ -257,7 +260,8 @@ extern Capability **capabilities;
typedef enum {
SYNC_OTHER,
SYNC_GC_SEQ,
SYNC_GC_PAR
SYNC_GC_PAR,
SYNC_FLUSH_UPD_REM_SET
} SyncType;
//
......