Skip to content
Commits on Source (55)
......@@ -40,6 +40,7 @@ module CLabel (
mkAsmTempDieLabel,
mkDirty_MUT_VAR_Label,
mkNonmovingWriteBarrierEnabledLabel,
mkUpdInfoLabel,
mkBHUpdInfoLabel,
mkIndStaticInfoLabel,
......@@ -484,7 +485,9 @@ mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
-- See Note [Proc-point local block entry-point].
-- Constructing Cmm Labels
mkDirty_MUT_VAR_Label, mkUpdInfoLabel,
mkDirty_MUT_VAR_Label,
mkNonmovingWriteBarrierEnabledLabel,
mkUpdInfoLabel,
mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
mkMAP_DIRTY_infoLabel,
......@@ -494,6 +497,8 @@ mkDirty_MUT_VAR_Label, mkUpdInfoLabel,
mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
mkNonmovingWriteBarrierEnabledLabel
= CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData
mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo
mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo
mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo
......
......@@ -631,6 +631,7 @@ emitBlackHoleCode node = do
-- work with profiling.
when eager_blackholing $ do
whenUpdRemSetEnabled dflags $ emitUpdRemSetPushThunk node
emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
emitPrimCall [] MO_WriteBarrier []
emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
......
......@@ -37,6 +37,7 @@ import BlockId
import MkGraph
import StgSyn
import Cmm
import Module ( rtsUnitId )
import Type ( Type, tyConAppTyCon )
import TyCon
import CLabel
......@@ -314,14 +315,21 @@ emitPrimOp dflags [res] ReadMutVarOp [mutv]
= emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
= do -- Without this write barrier, other CPUs may see this pointer before
= do old_val <- CmmLocal <$> newTemp (cmmExprType dflags var)
emitAssign old_val (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
-- Without this write barrier, other CPUs may see this pointer before
-- the writes for the closure it points to have occurred.
-- Note that this also must come after we read the old value to ensure
-- that the read of old_val comes before another core's write to the
-- MutVar's value.
emitPrimCall res MO_WriteBarrier []
emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
emitCCall
[{-no results-}]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
[(baseExpr, AddrHint), (mutv,AddrHint)]
[(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)]
-- #define sizzeofByteArrayzh(r,a) \
-- r = ((StgArrBytes *)(a))->bytes
......@@ -1622,17 +1630,21 @@ doWritePtrArrayOp :: CmmExpr
doWritePtrArrayOp addr idx val
= do dflags <- getDynFlags
let ty = cmmExprType dflags val
hdr_size = arrPtrsHdrSize dflags
-- Update remembered set for non-moving collector
whenUpdRemSetEnabled dflags
$ emitUpdRemSetPush (cmmLoadIndexOffExpr dflags hdr_size ty addr ty idx)
-- This write barrier is to ensure that the heap writes to the object
-- referred to by val have happened before we write val into the array.
-- See #12469 for details.
emitPrimCall [] MO_WriteBarrier []
mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
mkBasicIndexedWrite hdr_size Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
-- the write barrier. We must write a byte into the mark table:
-- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
-- the write barrier. We must write a byte into the mark table:
-- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
emit $ mkStore (
cmmOffsetExpr dflags
(cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
(cmmOffsetExprW dflags (cmmOffsetB dflags addr hdr_size)
(loadArrPtrsSize dflags addr))
(CmmMachOp (mo_wordUShr dflags) [idx,
mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
......@@ -2223,6 +2235,8 @@ emitCopyArray copy src0 src_off dst0 dst_off0 n =
dst <- assignTempE dst0
dst_off <- assignTempE dst_off0
emitCopyUpdRemSetPush dflags (arrPtrsHdrSizeW dflags) dst dst_off n
-- Set the dirty bit in the header.
emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
......@@ -2285,6 +2299,8 @@ emitCopySmallArray copy src0 src_off dst0 dst_off n =
src <- assignTempE src0
dst <- assignTempE dst0
emitCopyUpdRemSetPush dflags (smallArrPtrsHdrSizeW dflags) dst dst_off n
-- Set the dirty bit in the header.
emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
......@@ -2413,6 +2429,12 @@ doWriteSmallPtrArrayOp :: CmmExpr
doWriteSmallPtrArrayOp addr idx val = do
dflags <- getDynFlags
let ty = cmmExprType dflags val
-- Update remembered set for non-moving collector
tmp <- newTemp ty
mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing ty tmp addr ty idx
whenUpdRemSetEnabled dflags $ emitUpdRemSetPush (CmmReg (CmmLocal tmp))
emitPrimCall [] MO_WriteBarrier [] -- #12469
mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
......@@ -2592,3 +2614,31 @@ emitCtzCall res x width = do
[ res ]
(MO_Ctz width)
[ x ]
---------------------------------------------------------------------------
-- Pushing to the update remembered set
---------------------------------------------------------------------------
-- | Push a range of pointer-array elements that are about to be copied over to
-- the update remembered set.
emitCopyUpdRemSetPush :: DynFlags
-> WordOff -- ^ array header size
-> CmmExpr -- ^ destination array
-> CmmExpr -- ^ offset in destination array (in words)
-> Int -- ^ number of elements to copy
-> FCode ()
emitCopyUpdRemSetPush _dflags _hdr_size _dst _dst_off 0 = return ()
emitCopyUpdRemSetPush dflags hdr_size dst dst_off n =
whenUpdRemSetEnabled dflags $ do
updfr_off <- getUpdFrameOff
graph <- mkCall lbl (NativeNodeCall,NativeReturn) [] args updfr_off []
emit graph
where
lbl = mkLblExpr $ mkPrimCallLabel
$ PrimCall (fsLit "stg_copyArray_barrier") rtsUnitId
args =
[ mkIntExpr dflags hdr_size
, dst
, dst_off
, mkIntExpr dflags n
]
......@@ -39,6 +39,11 @@ module StgCmmUtils (
mkWordCLit,
newStringCLit, newByteStringCLit,
blankWord,
-- * Update remembered set operations
whenUpdRemSetEnabled,
emitUpdRemSetPush,
emitUpdRemSetPushThunk,
) where
#include "HsVersions.h"
......@@ -576,3 +581,40 @@ assignTemp' e
let reg = CmmLocal lreg
emitAssign reg e
return (CmmReg reg)
---------------------------------------------------------------------------
-- Pushing to the update remembered set
---------------------------------------------------------------------------
whenUpdRemSetEnabled :: DynFlags -> FCode a -> FCode ()
whenUpdRemSetEnabled dflags code = do
do_it <- getCode code
the_if <- mkCmmIfThenElse' is_enabled do_it mkNop (Just False)
emit the_if
where
enabled = CmmLoad (CmmLit $ CmmLabel mkNonmovingWriteBarrierEnabledLabel) (bWord dflags)
zero = zeroExpr dflags
is_enabled = cmmNeWord dflags enabled zero
-- | Emit code to add an entry to a now-overwritten pointer to the update
-- remembered set.
emitUpdRemSetPush :: CmmExpr -- ^ value of pointer which was overwritten
-> FCode ()
emitUpdRemSetPush ptr = do
emitRtsCall
rtsUnitId
(fsLit "updateRemembSetPushClosure_")
[(CmmReg (CmmGlobal BaseReg), AddrHint),
(ptr, AddrHint)]
False
emitUpdRemSetPushThunk :: CmmExpr -- ^ the thunk
-> FCode ()
emitUpdRemSetPushThunk ptr = do
emitRtsCall
rtsUnitId
(fsLit "updateRemembSetPushThunk_")
[(CmmReg (CmmGlobal BaseReg), AddrHint),
(ptr, AddrHint)]
False
......@@ -313,6 +313,24 @@ collection. Hopefully, you won't need any of these in normal operation,
but there are several things that can be tweaked for maximum
performance.
.. rts-flag:: -xn
:default: off
:since: 8.8.1
.. index::
single: concurrent mark and sweep
Enable the concurrent mark-and-sweep garbage collector for old generation
collectors. Typically GHC uses a stop-the-world copying garbage collector
for all generations. This can cause long pauses in execution during major
garbage collections. :rts-flag:`-xn` enables the use of a concurrent
mark-and-sweep garbage collector for oldest generation collections.
Under this collection strategy oldest-generation garbage collection
can proceed concurrently with mutation.
Note that :rts-flag:`-xn` cannot be used with ``-G1`` nor :rts-flag:`-c`.
.. rts-flag:: -A ⟨size⟩
:default: 1MB
......
......@@ -832,6 +832,10 @@
__gen = TO_W_(bdescr_gen_no(__bd)); \
if (__gen > 0) { recordMutableCap(__p, __gen); }
/* -----------------------------------------------------------------------------
Update remembered set write barrier
-------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
Arrays
-------------------------------------------------------------------------- */
......@@ -934,3 +938,25 @@
prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
\
return (dst);
//
// Nonmoving write barrier helpers
//
// See Note [Update remembered set] in NonMovingMark.c.
#if defined(THREADED_RTS)
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
#else
// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (0)
#define nonmoving_write_barrier_enabled 0
#endif
// A useful helper for pushing a pointer to the update remembered set.
#define updateRemembSetPushPtr(p) \
IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
}
......@@ -74,6 +74,10 @@ extern "C" {
#define RTS_UNREACHABLE abort()
#endif
/* Prefetch primitives */
#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0)
#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1)
/* Fix for mingw stat problem (done here so it's early enough) */
#if defined(mingw32_HOST_OS)
#define __MSVCRT__ 1
......@@ -189,6 +193,7 @@ void _assertFail(const char *filename, unsigned int linenum)
#include "rts/storage/ClosureMacros.h"
#include "rts/storage/MBlock.h"
#include "rts/storage/GC.h"
#include "rts/NonMoving.h"
/* Other RTS external APIs */
#include "rts/Parallel.h"
......
......@@ -151,6 +151,23 @@ typedef struct GCDetails_ {
Time cpu_ns;
// The time elapsed during GC itself
Time elapsed_ns;
//
// Concurrent garbage collector
//
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_elapsed_ns;
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_elapsed_ns;
} GCDetails;
//
......@@ -241,6 +258,28 @@ typedef struct _RTSStats {
// The number of times a GC thread has iterated it's outer loop across all
// parallel GCs
uint64_t scav_find_work;
// ----------------------------------
// Concurrent garbage collector
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_elapsed_ns;
// The maximum time elapsed during the post-mark pause phase of the
// concurrent nonmoving GC.
Time nonmoving_gc_sync_max_elapsed_ns;
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_elapsed_ns;
// The maximum time elapsed during the post-mark pause phase of the
// concurrent nonmoving GC.
Time nonmoving_gc_max_elapsed_ns;
} RTSStats;
void getRTSStats (RTSStats *s);
......
......@@ -182,12 +182,21 @@
#define EVENT_USER_BINARY_MSG 181
#define EVENT_CONC_MARK_BEGIN 200
#define EVENT_CONC_MARK_END 201
#define EVENT_CONC_SYNC_BEGIN 202
#define EVENT_CONC_SYNC_END 203
#define EVENT_CONC_SWEEP_BEGIN 204
#define EVENT_CONC_SWEEP_END 205
#define EVENT_CONC_UPD_REM_SET_FLUSH 206
#define EVENT_NONMOVING_HEAP_CENSUS 207
/*
* The highest event code +1 that ghc itself emits. Note that some event
* ranges higher than this are reserved but not currently emitted by ghc.
* This must match the size of the EventDesc[] array in EventLog.c
*/
#define NUM_GHC_EVENT_TAGS 182
#define NUM_GHC_EVENT_TAGS 208
#if 0 /* DEPRECATED EVENTS: */
/* we don't actually need to record the thread, it's implicit */
......
......@@ -52,6 +52,7 @@ typedef struct _GC_FLAGS {
double oldGenFactor;
double pcFreeHeap;
bool useNonmoving;
uint32_t generations;
bool squeezeUpdFrames;
......@@ -95,6 +96,7 @@ typedef struct _DEBUG_FLAGS {
bool weak; /* 'w' */
bool gccafs; /* 'G' */
bool gc; /* 'g' */
bool nonmoving_gc; /* 'n' */
bool block_alloc; /* 'b' */
bool sanity; /* 'S' warning: might be expensive! */
bool stable; /* 't' */
......@@ -167,6 +169,7 @@ typedef struct _TRACE_FLAGS {
bool timestamp; /* show timestamp in stderr output */
bool scheduler; /* trace scheduler events */
bool gc; /* trace GC events */
bool nonmoving_gc; /* trace nonmoving GC events */
bool sparks_sampled; /* trace spark events by a sampled method */
bool sparks_full; /* trace spark events 100% accurately */
bool user; /* trace user events (emitted from Haskell code) */
......
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team, 2018-2019
*
* Non-moving garbage collector
*
* Do not #include this file directly: #include "Rts.h" instead.
*
* To understand the structure of the RTS headers, see the wiki:
* http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
*
* -------------------------------------------------------------------------- */
#pragma once
/* This is called by the code generator */
extern DLL_IMPORT_RTS
void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p);
void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p);
// Note that RTS code should not condition on this directly by rather
// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that
// the barrier is eliminated in the non-threaded RTS.
extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
......@@ -97,6 +97,8 @@ typedef struct bdescr_ {
// block allocator. In particular, the
// value (StgPtr)(-1) is used to
// indicate that a block is unallocated.
//
// Unused by the non-moving allocator.
struct bdescr_ *link; // used for chaining blocks together
......@@ -141,7 +143,8 @@ typedef struct bdescr_ {
#define BF_LARGE 2
/* Block is pinned */
#define BF_PINNED 4
/* Block is to be marked, not copied */
/* Block is to be marked, not copied. Also used for marked large objects in
* non-moving heap. */
#define BF_MARKED 8
/* Block is executable */
#define BF_EXEC 32
......@@ -153,6 +156,12 @@ typedef struct bdescr_ {
#define BF_SWEPT 256
/* Block is part of a Compact */
#define BF_COMPACT 512
/* A non-moving allocator segment (see NonMoving.c) */
#define BF_NONMOVING 1024
/* A large object which has been moved to off of oldest_gen->large_objects and
* onto nonmoving_large_objects. The mark phase ignores objects which aren't
* so-flagged */
#define BF_NONMOVING_SWEEPING 2048
/* Maximum flag value (do not define anything higher than this!) */
#define BF_FLAG_MAX (1 << 15)
......@@ -290,6 +299,13 @@ EXTERN_INLINE bdescr* allocBlock(void)
bdescr *allocGroupOnNode(uint32_t node, W_ n);
// Allocate n blocks, aligned at n-block boundary. The returned bdescr will
// have this invariant
//
// bdescr->start % BLOCK_SIZE*n == 0
//
bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n);
EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node);
EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node)
{
......
......@@ -107,6 +107,14 @@ INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c)
return CON_INFO_PTR_TO_STRUCT((c)->header.info);
}
/* Used when we expect another thread to be mutating the info table pointer of
* a closure (e.g. when busy-waiting on a WHITEHOLE).
*/
INLINE_HEADER const StgInfoTable *get_volatile_itbl(StgClosure *c) {
return INFO_PTR_TO_STRUCT((StgInfoTable*) VOLATILE_LOAD(&c->header.info));
}
INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con)
{
return get_itbl(con)->srt;
......
......@@ -234,7 +234,7 @@ void setKeepCAFs (void);
and is put on the mutable list.
-------------------------------------------------------------------------- */
void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old);
/* set to disable CAF garbage collection in GHCi. */
/* (needed when dynamic libraries are used). */
......
......@@ -185,6 +185,53 @@ typedef struct StgTSO_ {
} *StgTSOPtr; // StgTSO defined in rts/Types.h
/* Note [StgStack dirtiness flags and concurrent marking]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Without concurrent collection by the nonmoving collector the stack dirtiness story
* is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
* or not.
*
* However, things are considerably more complicated with concurrent collection
* (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
* the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure
* that stacks are marked in accordance with the nonmoving collector's snapshot
* invariant. This is: every stack alive at the time the snapshot is taken must
* be marked at some point after the moment the snapshot is taken and before it
* is mutated or the commencement of the sweep phase.
*
* This marking may be done by the concurrent mark phase (in the case of a
* thread that never runs during the concurrent mark) or by the mutator when
* dirtying the stack. However, it is unsafe for the concurrent collector to
* traverse the stack while it is under mutation. Consequently, the following
* handshake is obeyed by the mutator's write barrier and the concurrent mark to
* ensure this doesn't happen:
*
* 1. The entity seeking to mark first checks that the stack lives in the nonmoving
* generation; if not then the stack was not alive at the time the snapshot
* was taken and therefore we need not mark it.
*
* 2. The entity seeking to mark checks the stack's mark bit. If it is set then
* no mark is necessary.
*
* 3. The entity seeking to mark tries to lock the stack for marking by
* atomically setting its `marking` field to the current non-moving mark
* epoch:
*
* a. If the mutator finds the concurrent collector has already locked the
* stack then it waits until it is finished (indicated by the mark bit
* being set) before proceeding with execution.
*
* b. If the concurrent collector finds that the mutator has locked the stack
* then it moves on, leaving the mutator to mark it. There is no need to wait;
* the mark is guaranteed to finish before sweep due to the post-mark
* synchronization with mutators.
*
* c. Whoever succeeds in locking the stack is responsible for marking it and
* setting the stack's mark bit (either the BF_MARKED bit for large objects
* or otherwise its bit in its segment's mark bitmap).
*
*/
#define STACK_DIRTY 1
// used by sanity checker to verify that all dirty stacks are on the mutable list
......@@ -193,7 +240,8 @@ typedef struct StgTSO_ {
typedef struct StgStack_ {
StgHeader header;
StgWord32 stack_size; // stack size in *words*
StgWord32 dirty; // non-zero => dirty
StgWord dirty; // non-zero => dirty
StgWord marking; // non-zero => someone is currently marking the stack
StgPtr sp; // current stack pointer
StgWord stack[];
} StgStack;
......
......@@ -542,5 +542,6 @@ void * pushCostCentre (void *ccs, void *cc);
// Capability.c
extern unsigned int n_capabilities;
extern void updateRemembSetPushThunk_(void *reg, void *p1);
#endif
......@@ -150,21 +150,22 @@ data MiscFlags = MiscFlags
--
-- @since 4.8.0.0
data DebugFlags = DebugFlags
{ scheduler :: Bool -- ^ @s@
, interpreter :: Bool -- ^ @i@
, weak :: Bool -- ^ @w@
, gccafs :: Bool -- ^ @G@
, gc :: Bool -- ^ @g@
, block_alloc :: Bool -- ^ @b@
, sanity :: Bool -- ^ @S@
, stable :: Bool -- ^ @t@
, prof :: Bool -- ^ @p@
, linker :: Bool -- ^ @l@ the object linker
, apply :: Bool -- ^ @a@
, stm :: Bool -- ^ @m@
, squeeze :: Bool -- ^ @z@ stack squeezing & lazy blackholing
, hpc :: Bool -- ^ @c@ coverage
, sparks :: Bool -- ^ @r@
{ scheduler :: Bool -- ^ @s@
, interpreter :: Bool -- ^ @i@
, weak :: Bool -- ^ @w@
, gccafs :: Bool -- ^ @G@
, gc :: Bool -- ^ @g@
, nonmoving_gc :: Bool -- ^ @n@
, block_alloc :: Bool -- ^ @b@
, sanity :: Bool -- ^ @S@
, stable :: Bool -- ^ @t@
, prof :: Bool -- ^ @p@
, linker :: Bool -- ^ @l@ the object linker
, apply :: Bool -- ^ @a@
, stm :: Bool -- ^ @m@
, squeeze :: Bool -- ^ @z@ stack squeezing & lazy blackholing
, hpc :: Bool -- ^ @c@ coverage
, sparks :: Bool -- ^ @r@
} deriving ( Show -- ^ @since 4.8.0.0
)
......@@ -291,6 +292,8 @@ data TraceFlags = TraceFlags
, timestamp :: Bool -- ^ show timestamp in stderr output
, traceScheduler :: Bool -- ^ trace scheduler events
, traceGc :: Bool -- ^ trace GC events
, traceNonmovingGc
:: Bool -- ^ trace nonmoving GC heap census samples
, sparksSampled :: Bool -- ^ trace spark events by a sampled method
, sparksFull :: Bool -- ^ trace spark events 100% accurately
, user :: Bool -- ^ trace user events (emitted from Haskell code)
......@@ -462,6 +465,8 @@ getDebugFlags = do
(#{peek DEBUG_FLAGS, gccafs} ptr :: IO CBool))
<*> (toBool <$>
(#{peek DEBUG_FLAGS, gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek DEBUG_FLAGS, nonmoving_gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek DEBUG_FLAGS, block_alloc} ptr :: IO CBool))
<*> (toBool <$>
......@@ -522,6 +527,8 @@ getTraceFlags = do
(#{peek TRACE_FLAGS, scheduler} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, nonmoving_gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, sparks_sampled} ptr :: IO CBool))
<*> (toBool <$>
......
......@@ -103,6 +103,25 @@ data RTSStats = RTSStats {
-- | Total elapsed time (at the previous GC)
, elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_sync_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_sync_elapsed_ns :: RtsTime
-- | The maximum time elapsed during the post-mark pause phase of the
-- concurrent nonmoving GC.
, nonmoving_gc_sync_max_elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_elapsed_ns :: RtsTime
-- | The maximum time elapsed during the post-mark pause phase of the
-- concurrent nonmoving GC.
, nonmoving_gc_max_elapsed_ns :: RtsTime
-- | Details about the most recent GC
, gc :: GCDetails
} deriving ( Read -- ^ @since 4.10.0.0
......@@ -146,6 +165,13 @@ data GCDetails = GCDetails {
, gcdetails_cpu_ns :: RtsTime
-- | The time elapsed during GC itself
, gcdetails_elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, gcdetails_nonmoving_gc_sync_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, gcdetails_nonmoving_gc_sync_elapsed_ns :: RtsTime
} deriving ( Read -- ^ @since 4.10.0.0
, Show -- ^ @since 4.10.0.0
)
......@@ -192,6 +218,12 @@ getRTSStats = do
gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p
cpu_ns <- (# peek RTSStats, cpu_ns) p
elapsed_ns <- (# peek RTSStats, elapsed_ns) p
nonmoving_gc_sync_cpu_ns <- (# peek RTSStats, nonmoving_gc_sync_cpu_ns) p
nonmoving_gc_sync_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_elapsed_ns) p
nonmoving_gc_sync_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_max_elapsed_ns) p
nonmoving_gc_cpu_ns <- (# peek RTSStats, nonmoving_gc_cpu_ns) p
nonmoving_gc_elapsed_ns <- (# peek RTSStats, nonmoving_gc_elapsed_ns) p
nonmoving_gc_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_max_elapsed_ns) p
let pgc = (# ptr RTSStats, gc) p
gc <- do
gcdetails_gen <- (# peek GCDetails, gen) pgc
......@@ -211,5 +243,7 @@ getRTSStats = do
gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc
gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc
gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc
gcdetails_nonmoving_gc_sync_cpu_ns <- (# peek GCDetails, nonmoving_gc_sync_cpu_ns) pgc
gcdetails_nonmoving_gc_sync_elapsed_ns <- (# peek GCDetails, nonmoving_gc_sync_elapsed_ns) pgc
return GCDetails{..}
return RTSStats{..}
......@@ -2,7 +2,11 @@ test('heap_all',
[when(have_profiling(), extra_ways(['prof'])),
# These ways produce slightly different heap representations.
# Currently we don't test them.
omit_ways(['ghci', 'hpc'])
omit_ways(['ghci', 'hpc',
'nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']),
# The debug RTS initializes some fields with 0xaa and so
# this test spuriously fails.
when(compiler_debugged(), skip)
],
compile_and_run, [''])
......
......@@ -652,6 +652,8 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
/* someone else beat us to it */
jump ENTRY_LBL(stg_WHITEHOLE) (ap);
}
// Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is
// not reachable.
StgInd_indirectee(ap) = CurrentTSO;
prim_write_barrier;
SET_INFO(ap, __stg_EAGER_BLACKHOLE_info);
......