...
 
Commits (76)
...@@ -743,12 +743,13 @@ oneSRT dflags staticFuns blockids lbls isCAF cafs = do ...@@ -743,12 +743,13 @@ oneSRT dflags staticFuns blockids lbls isCAF cafs = do
-- build an SRT object at all, instead we put the singleton SRT -- build an SRT object at all, instead we put the singleton SRT
-- entry in the info table. -- entry in the info table.
[one@(SRTEntry lbl)] [one@(SRTEntry lbl)]
| -- Info tables refer to SRTs by offset (as noted in the section | uSE_INLINE_SRT_FIELD dflags
-- Info tables refer to SRTs by offset (as noted in the section
-- "Referring to an SRT from the info table" of Note [SRTs]). However, -- "Referring to an SRT from the info table" of Note [SRTs]). However,
-- when dynamic linking is used we cannot guarantee that the offset -- when dynamic linking is used we cannot guarantee that the offset
-- between the SRT and the info table will fit in the offset field. -- between the SRT and the info table will fit in the offset field.
-- Consequently we build a singleton SRT in in this case. -- Consequently we build a singleton SRT in in this case.
not (labelDynamic dflags this_mod lbl) && not (labelDynamic dflags this_mod lbl)
-- MachO relocations can't express offsets between compilation units at -- MachO relocations can't express offsets between compilation units at
-- all, so we are always forced to build a singleton SRT in this case. -- all, so we are always forced to build a singleton SRT in this case.
......
...@@ -950,19 +950,23 @@ ...@@ -950,19 +950,23 @@
return (dst); return (dst);
//
// Nonmoving write barrier helpers
//
// See Note [Update remembered set] in NonMovingMark.c.
#if defined(THREADED_RTS) #if defined(THREADED_RTS)
#define IF_WRITE_BARRIER_ENABLED \ #define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False) if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
#else #else
// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C-- // A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
#define IF_WRITE_BARRIER_ENABLED \ #define IF_NONMOVING_WRITE_BARRIER_ENABLED \
if (0) if (0)
#define nonmoving_write_barrier_enabled 0 #define nonmoving_write_barrier_enabled 0
#endif #endif
// A useful helper for pushing a pointer to the update remembered set. // A useful helper for pushing a pointer to the update remembered set.
// See Note [Update remembered set] in NonMovingMark.c.
#define updateRemembSetPushPtr(p) \ #define updateRemembSetPushPtr(p) \
IF_WRITE_BARRIER_ENABLED { \ IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \ ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
} }
...@@ -74,6 +74,10 @@ extern "C" { ...@@ -74,6 +74,10 @@ extern "C" {
#define RTS_UNREACHABLE abort() #define RTS_UNREACHABLE abort()
#endif #endif
/* Prefetch primitives */
#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0)
#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1)
/* Fix for mingw stat problem (done here so it's early enough) */ /* Fix for mingw stat problem (done here so it's early enough) */
#if defined(mingw32_HOST_OS) #if defined(mingw32_HOST_OS)
#define __MSVCRT__ 1 #define __MSVCRT__ 1
......
...@@ -151,6 +151,23 @@ typedef struct GCDetails_ { ...@@ -151,6 +151,23 @@ typedef struct GCDetails_ {
Time cpu_ns; Time cpu_ns;
// The time elapsed during GC itself // The time elapsed during GC itself
Time elapsed_ns; Time elapsed_ns;
//
// Concurrent garbage collector
//
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_elapsed_ns;
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_elapsed_ns;
} GCDetails; } GCDetails;
// //
...@@ -241,6 +258,28 @@ typedef struct _RTSStats { ...@@ -241,6 +258,28 @@ typedef struct _RTSStats {
// The number of times a GC thread has iterated it's outer loop across all // The number of times a GC thread has iterated it's outer loop across all
// parallel GCs // parallel GCs
uint64_t scav_find_work; uint64_t scav_find_work;
// ----------------------------------
// Concurrent garbage collector
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_sync_elapsed_ns;
// The maximum time elapsed during the post-mark pause phase of the
// concurrent nonmoving GC.
Time nonmoving_gc_sync_max_elapsed_ns;
// The CPU time used during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_cpu_ns;
// The time elapsed during the post-mark pause phase of the concurrent
// nonmoving GC.
Time nonmoving_gc_elapsed_ns;
// The maximum time elapsed during the post-mark pause phase of the
// concurrent nonmoving GC.
Time nonmoving_gc_max_elapsed_ns;
} RTSStats; } RTSStats;
void getRTSStats (RTSStats *s); void getRTSStats (RTSStats *s);
......
...@@ -183,12 +183,21 @@ ...@@ -183,12 +183,21 @@
#define EVENT_USER_BINARY_MSG 181 #define EVENT_USER_BINARY_MSG 181
#define EVENT_CONC_MARK_BEGIN 200
#define EVENT_CONC_MARK_END 201
#define EVENT_CONC_SYNC_BEGIN 202
#define EVENT_CONC_SYNC_END 203
#define EVENT_CONC_SWEEP_BEGIN 204
#define EVENT_CONC_SWEEP_END 205
#define EVENT_CONC_UPD_REM_SET_FLUSH 206
#define EVENT_NONMOVING_HEAP_CENSUS 207
/* /*
* The highest event code +1 that ghc itself emits. Note that some event * The highest event code +1 that ghc itself emits. Note that some event
* ranges higher than this are reserved but not currently emitted by ghc. * ranges higher than this are reserved but not currently emitted by ghc.
* This must match the size of the EventDesc[] array in EventLog.c * This must match the size of the EventDesc[] array in EventLog.c
*/ */
#define NUM_GHC_EVENT_TAGS 182 #define NUM_GHC_EVENT_TAGS 208
#if 0 /* DEPRECATED EVENTS: */ #if 0 /* DEPRECATED EVENTS: */
/* we don't actually need to record the thread, it's implicit */ /* we don't actually need to record the thread, it's implicit */
......
...@@ -52,7 +52,9 @@ typedef struct _GC_FLAGS { ...@@ -52,7 +52,9 @@ typedef struct _GC_FLAGS {
double oldGenFactor; double oldGenFactor;
double pcFreeHeap; double pcFreeHeap;
bool useNonmoving; bool useNonmoving; // default = false
bool nonmovingSelectorOpt; // Do selector optimization in the
// non-moving heap, default = false
uint32_t generations; uint32_t generations;
bool squeezeUpdFrames; bool squeezeUpdFrames;
...@@ -170,6 +172,7 @@ typedef struct _TRACE_FLAGS { ...@@ -170,6 +172,7 @@ typedef struct _TRACE_FLAGS {
bool timestamp; /* show timestamp in stderr output */ bool timestamp; /* show timestamp in stderr output */
bool scheduler; /* trace scheduler events */ bool scheduler; /* trace scheduler events */
bool gc; /* trace GC events */ bool gc; /* trace GC events */
bool nonmoving_gc; /* trace nonmoving GC events */
bool sparks_sampled; /* trace spark events by a sampled method */ bool sparks_sampled; /* trace spark events by a sampled method */
bool sparks_full; /* trace spark events 100% accurately */ bool sparks_full; /* trace spark events 100% accurately */
bool user; /* trace user events (emitted from Haskell code) */ bool user; /* trace user events (emitted from Haskell code) */
......
...@@ -21,4 +21,7 @@ void updateRemembSetPushClosure(Capability *cap, StgClosure *p); ...@@ -21,4 +21,7 @@ void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p); void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p);
// Note that RTS code should not condition on this directly by rather
// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that
// the barrier is eliminated in the non-threaded RTS.
extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled); extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
...@@ -200,8 +200,10 @@ void freeThreadLocalKey (ThreadLocalKey *key); ...@@ -200,8 +200,10 @@ void freeThreadLocalKey (ThreadLocalKey *key);
void setThreadAffinity (uint32_t n, uint32_t m); void setThreadAffinity (uint32_t n, uint32_t m);
void setThreadNode (uint32_t node); void setThreadNode (uint32_t node);
void releaseThreadNode (void); void releaseThreadNode (void);
#endif // !CMINUSMINUS #endif // !CMINUSMINUS
#if defined(THREADED_RTS) #if defined(THREADED_RTS)
#define ACQUIRE_LOCK(l) OS_ACQUIRE_LOCK(l) #define ACQUIRE_LOCK(l) OS_ACQUIRE_LOCK(l)
......
...@@ -33,6 +33,7 @@ typedef int64_t Time; ...@@ -33,6 +33,7 @@ typedef int64_t Time;
#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) #define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) #define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
// Use instead of SecondsToTime() when we have a floating-point // Use instead of SecondsToTime() when we have a floating-point
// seconds value, to avoid truncating it. // seconds value, to avoid truncating it.
......
...@@ -88,17 +88,23 @@ typedef struct bdescr_ { ...@@ -88,17 +88,23 @@ typedef struct bdescr_ {
StgPtr start; // [READ ONLY] start addr of memory StgPtr start; // [READ ONLY] start addr of memory
StgPtr free; // First free byte of memory. union {
// allocGroup() sets this to the value of start. StgPtr free; // First free byte of memory.
// NB. during use this value should lie // allocGroup() sets this to the value of start.
// between start and start + blocks * // NB. during use this value should lie
// BLOCK_SIZE. Values outside this // between start and start + blocks *
// range are reserved for use by the // BLOCK_SIZE. Values outside this
// block allocator. In particular, the // range are reserved for use by the
// value (StgPtr)(-1) is used to // block allocator. In particular, the
// indicate that a block is unallocated. // value (StgPtr)(-1) is used to
// // indicate that a block is unallocated.
// Unused by the non-moving allocator. //
// Unused by the non-moving allocator.
struct NonmovingSegmentInfo {
StgWord8 log_block_size;
StgWord16 next_free_snap;
} nonmoving_segment;
};
struct bdescr_ *link; // used for chaining blocks together struct bdescr_ *link; // used for chaining blocks together
......
...@@ -338,7 +338,7 @@ typedef struct StgConInfoTable_ { ...@@ -338,7 +338,7 @@ typedef struct StgConInfoTable_ {
* info must be a Stg[Ret|Thunk]InfoTable* (an info table that has a SRT) * info must be a Stg[Ret|Thunk]InfoTable* (an info table that has a SRT)
*/ */
#if defined(TABLES_NEXT_TO_CODE) #if defined(TABLES_NEXT_TO_CODE)
#if defined(x86_64_HOST_ARCH) #if defined(USE_INLINE_SRT_FIELD)
#define GET_SRT(info) \ #define GET_SRT(info) \
((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt)) ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt))
#else #else
...@@ -365,7 +365,7 @@ typedef struct StgConInfoTable_ { ...@@ -365,7 +365,7 @@ typedef struct StgConInfoTable_ {
* info must be a StgFunInfoTable* * info must be a StgFunInfoTable*
*/ */
#if defined(TABLES_NEXT_TO_CODE) #if defined(TABLES_NEXT_TO_CODE)
#if defined(x86_64_HOST_ARCH) #if defined(USE_INLINE_SRT_FIELD)
#define GET_FUN_SRT(info) \ #define GET_FUN_SRT(info) \
((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt)) ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt))
#else #else
......
...@@ -231,6 +231,9 @@ typedef struct StgTSO_ { ...@@ -231,6 +231,9 @@ typedef struct StgTSO_ {
* setting the stack's mark bit (either the BF_MARKED bit for large objects * setting the stack's mark bit (either the BF_MARKED bit for large objects
* or otherwise its bit in its segment's mark bitmap). * or otherwise its bit in its segment's mark bitmap).
* *
* To ensure that mutation does not proceed until the stack is fully marked the
* mark phase must not set the mark bit until it has finished tracing.
*
*/ */
#define STACK_DIRTY 1 #define STACK_DIRTY 1
......
...@@ -292,6 +292,8 @@ data TraceFlags = TraceFlags ...@@ -292,6 +292,8 @@ data TraceFlags = TraceFlags
, timestamp :: Bool -- ^ show timestamp in stderr output , timestamp :: Bool -- ^ show timestamp in stderr output
, traceScheduler :: Bool -- ^ trace scheduler events , traceScheduler :: Bool -- ^ trace scheduler events
, traceGc :: Bool -- ^ trace GC events , traceGc :: Bool -- ^ trace GC events
, traceNonmovingGc
:: Bool -- ^ trace nonmoving GC heap census samples
, sparksSampled :: Bool -- ^ trace spark events by a sampled method , sparksSampled :: Bool -- ^ trace spark events by a sampled method
, sparksFull :: Bool -- ^ trace spark events 100% accurately , sparksFull :: Bool -- ^ trace spark events 100% accurately
, user :: Bool -- ^ trace user events (emitted from Haskell code) , user :: Bool -- ^ trace user events (emitted from Haskell code)
...@@ -525,6 +527,8 @@ getTraceFlags = do ...@@ -525,6 +527,8 @@ getTraceFlags = do
(#{peek TRACE_FLAGS, scheduler} ptr :: IO CBool)) (#{peek TRACE_FLAGS, scheduler} ptr :: IO CBool))
<*> (toBool <$> <*> (toBool <$>
(#{peek TRACE_FLAGS, gc} ptr :: IO CBool)) (#{peek TRACE_FLAGS, gc} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, nonmoving_gc} ptr :: IO CBool))
<*> (toBool <$> <*> (toBool <$>
(#{peek TRACE_FLAGS, sparks_sampled} ptr :: IO CBool)) (#{peek TRACE_FLAGS, sparks_sampled} ptr :: IO CBool))
<*> (toBool <$> <*> (toBool <$>
......
...@@ -103,6 +103,25 @@ data RTSStats = RTSStats { ...@@ -103,6 +103,25 @@ data RTSStats = RTSStats {
-- | Total elapsed time (at the previous GC) -- | Total elapsed time (at the previous GC)
, elapsed_ns :: RtsTime , elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_sync_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_sync_elapsed_ns :: RtsTime
-- | The maximum time elapsed during the post-mark pause phase of the
-- concurrent nonmoving GC.
, nonmoving_gc_sync_max_elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, nonmoving_gc_elapsed_ns :: RtsTime
-- | The maximum time elapsed during the post-mark pause phase of the
-- concurrent nonmoving GC.
, nonmoving_gc_max_elapsed_ns :: RtsTime
-- | Details about the most recent GC -- | Details about the most recent GC
, gc :: GCDetails , gc :: GCDetails
} deriving ( Read -- ^ @since 4.10.0.0 } deriving ( Read -- ^ @since 4.10.0.0
...@@ -146,6 +165,13 @@ data GCDetails = GCDetails { ...@@ -146,6 +165,13 @@ data GCDetails = GCDetails {
, gcdetails_cpu_ns :: RtsTime , gcdetails_cpu_ns :: RtsTime
-- | The time elapsed during GC itself -- | The time elapsed during GC itself
, gcdetails_elapsed_ns :: RtsTime , gcdetails_elapsed_ns :: RtsTime
-- | The CPU time used during the post-mark pause phase of the concurrent
-- nonmoving GC.
, gcdetails_nonmoving_gc_sync_cpu_ns :: RtsTime
-- | The time elapsed during the post-mark pause phase of the concurrent
-- nonmoving GC.
, gcdetails_nonmoving_gc_sync_elapsed_ns :: RtsTime
} deriving ( Read -- ^ @since 4.10.0.0 } deriving ( Read -- ^ @since 4.10.0.0
, Show -- ^ @since 4.10.0.0 , Show -- ^ @since 4.10.0.0
) )
...@@ -192,6 +218,12 @@ getRTSStats = do ...@@ -192,6 +218,12 @@ getRTSStats = do
gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p
cpu_ns <- (# peek RTSStats, cpu_ns) p cpu_ns <- (# peek RTSStats, cpu_ns) p
elapsed_ns <- (# peek RTSStats, elapsed_ns) p elapsed_ns <- (# peek RTSStats, elapsed_ns) p
nonmoving_gc_sync_cpu_ns <- (# peek RTSStats, nonmoving_gc_sync_cpu_ns) p
nonmoving_gc_sync_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_elapsed_ns) p
nonmoving_gc_sync_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_sync_max_elapsed_ns) p
nonmoving_gc_cpu_ns <- (# peek RTSStats, nonmoving_gc_cpu_ns) p
nonmoving_gc_elapsed_ns <- (# peek RTSStats, nonmoving_gc_elapsed_ns) p
nonmoving_gc_max_elapsed_ns <- (# peek RTSStats, nonmoving_gc_max_elapsed_ns) p
let pgc = (# ptr RTSStats, gc) p let pgc = (# ptr RTSStats, gc) p
gc <- do gc <- do
gcdetails_gen <- (# peek GCDetails, gen) pgc gcdetails_gen <- (# peek GCDetails, gen) pgc
...@@ -211,5 +243,7 @@ getRTSStats = do ...@@ -211,5 +243,7 @@ getRTSStats = do
gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc
gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc
gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc
gcdetails_nonmoving_gc_sync_cpu_ns <- (# peek GCDetails, nonmoving_gc_sync_cpu_ns) pgc
gcdetails_nonmoving_gc_sync_elapsed_ns <- (# peek GCDetails, nonmoving_gc_sync_elapsed_ns) pgc
return GCDetails{..} return GCDetails{..}
return RTSStats{..} return RTSStats{..}
...@@ -74,7 +74,7 @@ test('length001', ...@@ -74,7 +74,7 @@ test('length001',
# excessive amounts of stack space. So we specifically set a low # excessive amounts of stack space. So we specifically set a low
# stack limit and mark it as failing under a few conditions. # stack limit and mark it as failing under a few conditions.
[extra_run_opts('+RTS -K8m -RTS'), [extra_run_opts('+RTS -K8m -RTS'),
expect_fail_for(['normal', 'threaded1', 'llvm'])], expect_fail_for(['normal', 'threaded1', 'llvm', 'nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc'])],
compile_and_run, ['']) compile_and_run, [''])
test('ratio001', normal, compile_and_run, ['']) test('ratio001', normal, compile_and_run, [''])
......
...@@ -2,7 +2,11 @@ test('heap_all', ...@@ -2,7 +2,11 @@ test('heap_all',
[when(have_profiling(), extra_ways(['prof'])), [when(have_profiling(), extra_ways(['prof'])),
# These ways produce slightly different heap representations. # These ways produce slightly different heap representations.
# Currently we don't test them. # Currently we don't test them.
omit_ways(['ghci', 'hpc']) omit_ways(['ghci', 'hpc',
'nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']),
# The debug RTS initializes some fields with 0xaa and so
# this test spuriously fails.
when(compiler_debugged(), skip)
], ],
compile_and_run, ['']) compile_and_run, [''])
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
void initializeTimer (void); void initializeTimer (void);
Time getProcessCPUTime (void); Time getProcessCPUTime (void);
Time getMyThreadCPUTime (void);
void getProcessTimes (Time *user, Time *elapsed); void getProcessTimes (Time *user, Time *elapsed);
/* Get the current date and time. /* Get the current date and time.
......
/* ---------------------------------------------------------------------------
*
* (c) The GHC Team, 2001-2005
*
* Catching long lock-acquisition pauses.
*
* --------------------------------------------------------------------------*/
#include "PosixSource.h"
#include "Rts.h"
#include "Trace.h"
#include "LongPause.h"
#if defined(THREADED_RTS)
void longPauseCb(uint64_t dur_ns STG_UNUSED, const char *desc STG_UNUSED) {
trace(TRACE_gc, "LONG PAUSE(%s) %f", desc, 1.0 * dur_ns / 1e9);
}
void ACQUIRE_LOCK_CHECKED_(Mutex *mutex, int max_msec, const char *desc) {
struct long_pause_ctx ctx;
LONG_PAUSE_START(&ctx);
ACQUIRE_LOCK(mutex);
LONG_PAUSE_END(&ctx, max_msec, desc);
}
#endif
/* ---------------------------------------------------------------------------
*
* (c) The GHC Team, 2001-2005
*
* Accessing OS threads functionality in a (mostly) OS-independent
* manner.
*
* --------------------------------------------------------------------------*/
#pragma once
#include "rts/OSThreads.h"
#include "BeginPrivate.h"
void longPauseCb (uint64_t dur_ns, const char *desc);
#if !defined(CMINUSMINUS)
#if defined(THREADED_RTS)
#include <time.h>
struct long_pause_ctx {
struct timespec start;
};
INLINE_HEADER void LONG_PAUSE_START(struct long_pause_ctx *ctx) {
clock_gettime(CLOCK_MONOTONIC, &ctx->start);
}
INLINE_HEADER void LONG_PAUSE_END(struct long_pause_ctx *ctx, int max_msec, const char *desc) {
struct timespec end;
clock_gettime(CLOCK_MONOTONIC, &end);
int64_t dt = (end.tv_sec - ctx->start.tv_sec) * 1000*1000*1000 + end.tv_nsec - ctx->start.tv_nsec;
if (dt > max_msec * 1000*1000) {
longPauseCb(dt, desc);
}
}
/* Acquire the given lock, checking that it takes no more than max_msecs to do
* so.
*/
void ACQUIRE_LOCK_CHECKED_(Mutex *mutex, int max_msec, const char *desc);
/* Acquire the given lock, checking that it takes a reasonable amount of time
* to do so.
*/
INLINE_HEADER void ACQUIRE_LOCK_CHECKED(Mutex *mutex, const char *desc) {
ACQUIRE_LOCK_CHECKED_(mutex, 100, desc);
}
#else
struct long_pause_ctx {};
INLINE_HEADER void LONG_PAUSE_START(struct long_pause_ctx *ctx STG_UNUSED) {}
INLINE_HEADER void LONG_PAUSE_END(struct long_pause_ctx *ctx STG_UNUSED, int max_msec STG_UNUSED, const char *desc STG_UNUSED) {}
#define ACQUIRE_LOCK_CHECKED(l,desc)
#define ACQUIRE_LOCK_CHECKED_(l,m,desc)
#endif /* defined(THREADED_RTS) */
#endif /* !CMINUSMINUS */
#include "EndPrivate.h"
...@@ -262,7 +262,7 @@ loop: ...@@ -262,7 +262,7 @@ loop:
// point to the BLOCKING_QUEUE from the BLACKHOLE // point to the BLOCKING_QUEUE from the BLACKHOLE
write_barrier(); // make the BQ visible, see Note [Heap memory barriers]. write_barrier(); // make the BQ visible, see Note [Heap memory barriers].
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushClosure(cap, (StgClosure*)p); updateRemembSetPushClosure(cap, (StgClosure*)p);
} }
((StgInd*)bh)->indirectee = (StgClosure *)bq; ((StgInd*)bh)->indirectee = (StgClosure *)bq;
...@@ -293,7 +293,7 @@ loop: ...@@ -293,7 +293,7 @@ loop:
} }
#endif #endif
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { IF_NONMOVING_WRITE_BARRIER_ENABLED {
// We are about to overwrite bq->queue; make sure its current value // We are about to overwrite bq->queue; make sure its current value
// makes it into the update remembered set // makes it into the update remembered set
updateRemembSetPushClosure(cap, (StgClosure*)bq->queue); updateRemembSetPushClosure(cap, (StgClosure*)bq->queue);
......
...@@ -478,7 +478,7 @@ stg_copyArray_barrier ( W_ hdr_size, gcptr dst, W_ dst_off, W_ n) ...@@ -478,7 +478,7 @@ stg_copyArray_barrier ( W_ hdr_size, gcptr dst, W_ dst_off, W_ n)
end = p + WDS(n); end = p + WDS(n);
again: again:
IF_WRITE_BARRIER_ENABLED { IF_NONMOVING_WRITE_BARRIER_ENABLED {
ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[p] "ptr"); ccall updateRemembSetPushClosure_(BaseReg "ptr", W_[p] "ptr");
} }
p = p + WDS(1); p = p + WDS(1);
...@@ -494,7 +494,7 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n) ...@@ -494,7 +494,7 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
W_ dst_p, src_p, bytes; W_ dst_p, src_p, bytes;
if (n > 0) { if (n > 0) {
IF_WRITE_BARRIER_ENABLED { IF_NONMOVING_WRITE_BARRIER_ENABLED {
call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs, call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
dst, dst_off, n); dst, dst_off, n);
} }
...@@ -515,7 +515,7 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n ...@@ -515,7 +515,7 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
W_ dst_p, src_p, bytes; W_ dst_p, src_p, bytes;
if (n > 0) { if (n > 0) {
IF_WRITE_BARRIER_ENABLED { IF_NONMOVING_WRITE_BARRIER_ENABLED {
call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs, call stg_copyArray_barrier(SIZEOF_StgSmallMutArrPtrs,
dst, dst_off, n); dst, dst_off, n);
} }
......
...@@ -1179,6 +1179,8 @@ heapCensusChain( Census *census, bdescr *bd ) ...@@ -1179,6 +1179,8 @@ heapCensusChain( Census *census, bdescr *bd )
} }
} }
// Time is process CPU time of beginning of current GC and is used as
// the mutator CPU time reported as the census timestamp.
void heapCensus (Time t) void heapCensus (Time t)
{ {
uint32_t g, n; uint32_t g, n;
...@@ -1186,7 +1188,7 @@ void heapCensus (Time t) ...@@ -1186,7 +1188,7 @@ void heapCensus (Time t)
gen_workspace *ws; gen_workspace *ws;
census = &censuses[era]; census = &censuses[era];
census->time = mut_user_time_until(t); census->time = TimeToSecondsDbl(t);
census->rtime = TimeToNS(stat_getElapsedTime()); census->rtime = TimeToNS(stat_getElapsedTime());
......
...@@ -157,6 +157,7 @@ void initRtsFlagsDefaults(void) ...@@ -157,6 +157,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.pcFreeHeap = 3; /* 3% */ RtsFlags.GcFlags.pcFreeHeap = 3; /* 3% */
RtsFlags.GcFlags.oldGenFactor = 2; RtsFlags.GcFlags.oldGenFactor = 2;
RtsFlags.GcFlags.useNonmoving = false; RtsFlags.GcFlags.useNonmoving = false;
RtsFlags.GcFlags.nonmovingSelectorOpt = false;
RtsFlags.GcFlags.generations = 2; RtsFlags.GcFlags.generations = 2;
RtsFlags.GcFlags.squeezeUpdFrames = true; RtsFlags.GcFlags.squeezeUpdFrames = true;
RtsFlags.GcFlags.compact = false; RtsFlags.GcFlags.compact = false;
...@@ -222,6 +223,7 @@ void initRtsFlagsDefaults(void) ...@@ -222,6 +223,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.TraceFlags.timestamp = false; RtsFlags.TraceFlags.timestamp = false;
RtsFlags.TraceFlags.scheduler = false; RtsFlags.TraceFlags.scheduler = false;
RtsFlags.TraceFlags.gc = false; RtsFlags.TraceFlags.gc = false;
RtsFlags.TraceFlags.nonmoving_gc = false;
RtsFlags.TraceFlags.sparks_sampled= false; RtsFlags.TraceFlags.sparks_sampled= false;
RtsFlags.TraceFlags.sparks_full = false; RtsFlags.TraceFlags.sparks_full = false;
RtsFlags.TraceFlags.user = false; RtsFlags.TraceFlags.user = false;
...@@ -1541,6 +1543,10 @@ error = true; ...@@ -1541,6 +1543,10 @@ error = true;
OPTION_SAFE; OPTION_SAFE;
RtsFlags.GcFlags.useNonmoving = true; RtsFlags.GcFlags.useNonmoving = true;
unchecked_arg_start++; unchecked_arg_start++;
if (rts_argv[arg][3] == 's') {
RtsFlags.GcFlags.nonmovingSelectorOpt = true;
unchecked_arg_start++;
}
break; break;
case 'c': /* Debugging tool: show current cost centre on case 'c': /* Debugging tool: show current cost centre on
...@@ -2131,6 +2137,10 @@ static void read_trace_flags(const char *arg) ...@@ -2131,6 +2137,10 @@ static void read_trace_flags(const char *arg)
RtsFlags.TraceFlags.gc = enabled; RtsFlags.TraceFlags.gc = enabled;
enabled = true; enabled = true;
break; break;
case 'n':
RtsFlags.TraceFlags.nonmoving_gc = enabled;
enabled = true;
break;
case 'u': case 'u':
RtsFlags.TraceFlags.user = enabled; RtsFlags.TraceFlags.user = enabled;
enabled = true; enabled = true;
......
...@@ -297,8 +297,10 @@ static StgClosure *lock_tvar(Capability *cap, ...@@ -297,8 +297,10 @@ static StgClosure *lock_tvar(Capability *cap,
} while (cas((void *)&(s -> current_value), } while (cas((void *)&(s -> current_value),
(StgWord)result, (StgWord)trec) != (StgWord)result); (StgWord)result, (StgWord)trec) != (StgWord)result);
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) {
updateRemembSetPushClosure(cap, result); IF_NONMOVING_WRITE_BARRIER_ENABLED {
if (result)
updateRemembSetPushClosure(cap, result);
} }
return result; return result;
} }
...@@ -323,8 +325,9 @@ static StgBool cond_lock_tvar(Capability *cap, ...@@ -323,8 +325,9 @@ static StgBool cond_lock_tvar(Capability *cap,
TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected); TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec); w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec);
result = (StgClosure *)w; result = (StgClosure *)w;
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled && result)) { IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushClosure(cap, expected); if (result)
updateRemembSetPushClosure(cap, expected);
} }
TRACE("%p : %s", trec, result ? "success" : "failure"); TRACE("%p : %s", trec, result ? "success" : "failure");
return (result == expected); return (result == expected);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "sm/Sanity.h" #include "sm/Sanity.h"
#include "Stats.h" #include "Stats.h"
#include "STM.h" #include "STM.h"
#include "LongPause.h"
#include "Prelude.h" #include "Prelude.h"
#include "ThreadLabels.h" #include "ThreadLabels.h"
#include "Updates.h" #include "Updates.h"
...@@ -164,7 +165,8 @@ static void scheduleHandleThreadBlocked( StgTSO *t ); ...@@ -164,7 +165,8 @@ static void scheduleHandleThreadBlocked( StgTSO *t );
static bool scheduleHandleThreadFinished( Capability *cap, Task *task, static bool scheduleHandleThreadFinished( Capability *cap, Task *task,
StgTSO *t ); StgTSO *t );
static bool scheduleNeedHeapProfile(bool ready_to_gc); static bool scheduleNeedHeapProfile(bool ready_to_gc);
static void scheduleDoGC(Capability **pcap, Task *task, bool force_major); static void scheduleDoGC( Capability **pcap, Task *task,
bool force_major, bool deadlock_detect );
static void deleteThread (StgTSO *tso); static void deleteThread (StgTSO *tso);
static void deleteAllThreads (void); static void deleteAllThreads (void);
...@@ -264,7 +266,7 @@ schedule (Capability *initialCapability, Task *task) ...@@ -264,7 +266,7 @@ schedule (Capability *initialCapability, Task *task)
case SCHED_INTERRUPTING: case SCHED_INTERRUPTING:
debugTrace(DEBUG_sched, "SCHED_INTERRUPTING"); debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
/* scheduleDoGC() deletes all the threads */ /* scheduleDoGC() deletes all the threads */
scheduleDoGC(&cap,task,true); scheduleDoGC(&cap,task,true,false);
// after scheduleDoGC(), we must be shutting down. Either some // after scheduleDoGC(), we must be shutting down. Either some
// other Capability did the final GC, or we did it above, // other Capability did the final GC, or we did it above,
...@@ -561,7 +563,7 @@ run_thread: ...@@ -561,7 +563,7 @@ run_thread:
} }
if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) { if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
scheduleDoGC(&cap,task,false); scheduleDoGC(&cap,task,false,false);
} }
} /* end of while() */ } /* end of while() */
} }
...@@ -935,7 +937,7 @@ scheduleDetectDeadlock (Capability **pcap, Task *task) ...@@ -935,7 +937,7 @@ scheduleDetectDeadlock (Capability **pcap, Task *task)
// they are unreachable and will therefore be sent an // they are unreachable and will therefore be sent an
// exception. Any threads thus released will be immediately // exception. Any threads thus released will be immediately
// runnable. // runnable.
scheduleDoGC (pcap, task, true/*force major GC*/); scheduleDoGC (pcap, task, true/*force major GC*/, true/*deadlock detection*/);
cap = *pcap; cap = *pcap;
// when force_major == true. scheduleDoGC sets // when force_major == true. scheduleDoGC sets
// recent_activity to ACTIVITY_DONE_GC and turns off the timer // recent_activity to ACTIVITY_DONE_GC and turns off the timer
...@@ -1005,7 +1007,7 @@ scheduleProcessInbox (Capability **pcap USED_IF_THREADS) ...@@ -1005,7 +1007,7 @@ scheduleProcessInbox (Capability **pcap USED_IF_THREADS)
while (!emptyInbox(cap)) { while (!emptyInbox(cap)) {
// Executing messages might use heap, so we should check for GC. // Executing messages might use heap, so we should check for GC.
if (doYouWantToGC(cap)) { if (doYouWantToGC(cap)) {
scheduleDoGC(pcap, cap->running_task, false); scheduleDoGC(pcap, cap->running_task, false, false);
cap = *pcap; cap = *pcap;
} }
...@@ -1461,7 +1463,7 @@ static bool requestSync ( ...@@ -1461,7 +1463,7 @@ static bool requestSync (
// mark thread). Consequently we must wait until the pending sync is // mark thread). Consequently we must wait until the pending sync is
// finished before proceeding to ensure we don't loop. // finished before proceeding to ensure we don't loop.
// TODO: Don't busy-wait // TODO: Don't busy-wait
ACQUIRE_LOCK(&sync_finished_mutex); ACQUIRE_LOCK_CHECKED(&sync_finished_mutex, "sync_finished_mutex");
while (pending_sync) { while (pending_sync) {
waitCondition(&sync_finished_cond, &sync_finished_mutex); waitCondition(&sync_finished_cond, &sync_finished_mutex);
} }
...@@ -1552,9 +1554,11 @@ void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task) ...@@ -1552,9 +1554,11 @@ void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task)
* Perform a garbage collection if necessary * Perform a garbage collection if necessary
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
// N.B. See Note [Deadlock detection under nonmoving collector] for rationale
// behind deadlock_detect argument.
static void static void
scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS, scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
bool force_major) bool force_major, bool deadlock_detect)
{ {
Capability *cap = *pcap; Capability *cap = *pcap;
bool heap_census; bool heap_census;
...@@ -1847,9 +1851,9 @@ delete_threads_and_gc: ...@@ -1847,9 +1851,9 @@ delete_threads_and_gc:
// emerge they don't immediately re-enter the GC. // emerge they don't immediately re-enter the GC.
pending_sync = 0; pending_sync = 0;
signalCondition(&sync_finished_cond); signalCondition(&sync_finished_cond);
GarbageCollect(collect_gen, heap_census, gc_type, cap, idle_cap); GarbageCollect(collect_gen, heap_census, deadlock_detect, gc_type, cap, idle_cap);
#else #else
GarbageCollect(collect_gen, heap_census, 0, cap, NULL); GarbageCollect(collect_gen, heap_census, deadlock_detect, 0, cap, NULL);
#endif #endif
// If we're shutting down, don't leave any idle GC work to do. // If we're shutting down, don't leave any idle GC work to do.
...@@ -2257,6 +2261,12 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS) ...@@ -2257,6 +2261,12 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
// structures, the nursery, etc. // structures, the nursery, etc.
// //
for (n = new_n_capabilities; n < enabled_capabilities; n++) { for (n = new_n_capabilities; n < enabled_capabilities; n++) {
// Disabled capabilities do not participate in the nonmoving collector's
// final mark synchronization. Consequently it is very important that we
// flush a capability's update remembered set before disabling it since
// otherwise the nonmoving collector may not see live references.
nonmovingAddUpdRemSetBlocks(&capabilities[n]->upd_rem_set.queue);
capabilities[n]->disabled = true; capabilities[n]->disabled = true;
traceCapDisable(capabilities[n]); traceCapDisable(capabilities[n]);
} }
...@@ -2500,7 +2510,7 @@ resumeThread (void *task_) ...@@ -2500,7 +2510,7 @@ resumeThread (void *task_)
incall->suspended_tso = NULL; incall->suspended_tso = NULL;
incall->suspended_cap = NULL; incall->suspended_cap = NULL;
// we will modify tso->_link // we will modify tso->_link
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushClosure(cap, (StgClosure *)tso->_link); updateRemembSetPushClosure(cap, (StgClosure *)tso->_link);
} }
tso->_link = END_TSO_QUEUE; tso->_link = END_TSO_QUEUE;
...@@ -2717,7 +2727,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS) ...@@ -2717,7 +2727,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS)
nonmovingStop(); nonmovingStop();
Capability *cap = task->cap; Capability *cap = task->cap;
waitForCapability(&cap,task); waitForCapability(&cap,task);
scheduleDoGC(&cap,task,true); scheduleDoGC(&cap,task,true,false);
ASSERT(task->incall->tso == NULL); ASSERT(task->incall->tso == NULL);
releaseCapability(cap); releaseCapability(cap);
} }
...@@ -2785,7 +2795,7 @@ performGC_(bool force_major) ...@@ -2785,7 +2795,7 @@ performGC_(bool force_major)
// TODO: do we need to traceTask*() here? // TODO: do we need to traceTask*() here?
waitForCapability(&cap,task); waitForCapability(&cap,task);
scheduleDoGC(&cap,task,force_major); scheduleDoGC(&cap,task,force_major,false);
releaseCapability(cap); releaseCapability(cap);
boundTaskExiting(task); boundTaskExiting(task);
} }
...@@ -3070,6 +3080,7 @@ resurrectThreads (StgTSO *threads) ...@@ -3070,6 +3080,7 @@ resurrectThreads (StgTSO *threads)
for (tso = threads; tso != END_TSO_QUEUE; tso = next) { for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
next = tso->global_link; next = tso->global_link;
debugTrace(DEBUG_weak, "resurrect %p\n", tso);
gen = Bdescr((P_)tso)->gen; gen = Bdescr((P_)tso)->gen;
tso->global_link = gen->threads; tso->global_link = gen->threads;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "RtsAPI.h" #include "RtsAPI.h"
#include "Hash.h" #include "Hash.h"
#include "LongPause.h"
#include "RtsUtils.h" #include "RtsUtils.h"
#include "Trace.h" #include "Trace.h"
#include "StableName.h" #include "StableName.h"
...@@ -42,7 +43,7 @@ void ...@@ -42,7 +43,7 @@ void
stableNameLock(void) stableNameLock(void)
{ {
initStableNameTable(); initStableNameTable();
ACQUIRE_LOCK(&stable_name_mutex); ACQUIRE_LOCK_CHECKED(&stable_name_mutex, "stable_name_mutex");
} }
void void
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "RtsAPI.h" #include "RtsAPI.h"
#include "Hash.h" #include "Hash.h"
#include "LongPause.h"
#include "RtsUtils.h" #include "RtsUtils.h"
#include "Trace.h" #include "Trace.h"
#include "StablePtr.h" #include "StablePtr.h"
...@@ -125,7 +126,7 @@ void ...@@ -125,7 +126,7 @@ void
stablePtrLock(void) stablePtrLock(void)
{ {
initStablePtrTable(); initStablePtrTable();
ACQUIRE_LOCK(&stable_ptr_mutex); ACQUIRE_LOCK_CHECKED(&stable_ptr_mutex, "stable_ptr_mutex");
} }
void void
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "Rts.h" #include "Rts.h"
#include "RtsUtils.h" #include "RtsUtils.h"
#include "Hash.h" #include "Hash.h"
#include "LongPause.h"
#include "StablePtr.h" #include "StablePtr.h"
static HashTable * spt = NULL; static HashTable * spt = NULL;
...@@ -44,7 +45,7 @@ void hs_spt_insert_stableptr(StgWord64 key[2], StgStablePtr *entry) { ...@@ -44,7 +45,7 @@ void hs_spt_insert_stableptr(StgWord64 key[2], StgStablePtr *entry) {
#endif #endif
} }
ACQUIRE_LOCK(&spt_lock); ACQUIRE_LOCK_CHECKED(&spt_lock, "spt_lock");
insertHashTable(spt, (StgWord)key, entry); insertHashTable(spt, (StgWord)key, entry);
RELEASE_LOCK(&spt_lock); RELEASE_LOCK(&spt_lock);
} }
...@@ -67,7 +68,7 @@ static void freeSptEntry(void* entry) { ...@@ -67,7 +68,7 @@ static void freeSptEntry(void* entry) {
void hs_spt_remove(StgWord64 key[2]) { void hs_spt_remove(StgWord64 key[2]) {
if (spt) { if (spt) {
ACQUIRE_LOCK(&spt_lock); ACQUIRE_LOCK_CHECKED(&spt_lock, "spt_lock");
StgStablePtr* entry = removeHashTable(spt, (StgWord)key, NULL); StgStablePtr* entry = removeHashTable(spt, (StgWord)key, NULL);
RELEASE_LOCK(&spt_lock); RELEASE_LOCK(&spt_lock);
...@@ -78,7 +79,7 @@ void hs_spt_remove(StgWord64 key[2]) { ...@@ -78,7 +79,7 @@ void hs_spt_remove(StgWord64 key[2]) {
StgPtr hs_spt_lookup(StgWord64 key1, StgWord64 key2) { StgPtr hs_spt_lookup(StgWord64 key1, StgWord64 key2) {
if (spt) { if (spt) {
ACQUIRE_LOCK(&spt_lock); ACQUIRE_LOCK_CHECKED(&spt_lock, "spt_lock");
StgWord64 key[2] = { key1, key2 }; StgWord64 key[2] = { key1, key2 };
const StgStablePtr * entry = lookupHashTable(spt, (StgWord)key); const StgStablePtr * entry = lookupHashTable(spt, (StgWord)key);
const StgPtr ret = entry ? deRefStablePtr(*entry) : NULL; const StgPtr ret = entry ? deRefStablePtr(*entry) : NULL;
...@@ -90,7 +91,7 @@ StgPtr hs_spt_lookup(StgWord64 key1, StgWord64 key2) { ...@@ -90,7 +91,7 @@ StgPtr hs_spt_lookup(StgWord64 key1, StgWord64 key2) {
int hs_spt_keys(StgPtr keys[], int szKeys) { int hs_spt_keys(StgPtr keys[], int szKeys) {
if (spt) { if (spt) {
ACQUIRE_LOCK(&spt_lock); ACQUIRE_LOCK_CHECKED(&spt_lock, "spt_lock");
const int ret = keysHashTable(spt, (StgWord*)keys, szKeys); const int ret = keysHashTable(spt, (StgWord*)keys, szKeys);
RELEASE_LOCK(&spt_lock); RELEASE_LOCK(&spt_lock);
return ret; return ret;
......
This diff is collapsed.
...@@ -30,13 +30,21 @@ void stat_endInit(void); ...@@ -30,13 +30,21 @@ void stat_endInit(void);
void stat_startGCSync(struct gc_thread_ *_gct); void stat_startGCSync(struct gc_thread_ *_gct);
void stat_startGC(Capability *cap, struct gc_thread_ *_gct); void stat_startGC(Capability *cap, struct gc_thread_ *_gct);
void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live, void stat_startGCWorker (Capability *cap, struct gc_thread_ *_gct);
W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads, void stat_endGCWorker (Capability *cap, struct gc_thread_ *_gct);
void stat_endGC (Capability *cap, struct gc_thread_ *initiating_gct, W_ live,
W_ copied, W_ slop, uint32_t gen,
uint32_t n_gc_threads, struct gc_thread_ **gc_threads,
W_ par_max_copied, W_ par_balanced_copied, W_ par_max_copied, W_ par_balanced_copied,
W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin, W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin,
W_ mut_spin_yield, W_ any_work, W_ no_work, W_ mut_spin_yield, W_ any_work, W_ no_work,
W_ scav_find_work); W_ scav_find_work);
void stat_startNonmovingGcSync(void);
void stat_endNonmovingGcSync(void);
void stat_startNonmovingGc (void);
void stat_endNonmovingGc (void);
#if defined(PROFILING) #if defined(PROFILING)
void stat_startRP(void); void stat_startRP(void);
void stat_endRP(uint32_t, int, double); void stat_endRP(uint32_t, int, double);
......
...@@ -244,6 +244,7 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND") ...@@ -244,6 +244,7 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
TICK_ENT_DYN_IND(); /* tick */ TICK_ENT_DYN_IND(); /* tick */
node = UNTAG(StgInd_indirectee(node)); node = UNTAG(StgInd_indirectee(node));
TICK_ENT_VIA_NODE(); TICK_ENT_VIA_NODE();
W_[ben_IND] = W_[ben_IND] + 1;
jump %GET_ENTRY(node) (node); jump %GET_ENTRY(node) (node);
} }
#else #else
...@@ -252,6 +253,7 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND") ...@@ -252,6 +253,7 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
TICK_ENT_DYN_IND(); /* tick */ TICK_ENT_DYN_IND(); /* tick */
R1 = UNTAG(StgInd_indirectee(R1)); R1 = UNTAG(StgInd_indirectee(R1));
TICK_ENT_VIA_NODE(); TICK_ENT_VIA_NODE();
W_[ben_IND] = W_[ben_IND] + 1;
jump %GET_ENTRY(R1) [R1]; jump %GET_ENTRY(R1) [R1];
} }
#endif #endif
...@@ -262,6 +264,7 @@ INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND") ...@@ -262,6 +264,7 @@ INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND")
TICK_ENT_DYN_IND(); /* tick */ TICK_ENT_DYN_IND(); /* tick */
node = StgInd_indirectee(node); node = StgInd_indirectee(node);
TICK_ENT_VIA_NODE(); TICK_ENT_VIA_NODE();
W_[ben_IND] = W_[ben_IND] + 1;
jump %ENTRY_CODE(Sp(0)) (node); jump %ENTRY_CODE(Sp(0)) (node);
} }
...@@ -290,6 +293,7 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") ...@@ -290,6 +293,7 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
P_ p, bq, msg; P_ p, bq, msg;
TICK_ENT_DYN_IND(); /* tick */ TICK_ENT_DYN_IND(); /* tick */
W_[ben_IND] = W_[ben_IND] + 1;
retry: retry:
prim_read_barrier; prim_read_barrier;
......
...@@ -334,15 +334,16 @@ threadPaused(Capability *cap, StgTSO *tso) ...@@ -334,15 +334,16 @@ threadPaused(Capability *cap, StgTSO *tso)
} }
#endif #endif
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled IF_NONMOVING_WRITE_BARRIER_ENABLED {
&& ip_THUNK(INFO_PTR_TO_STRUCT(bh_info)))) { if (ip_THUNK(INFO_PTR_TO_STRUCT(bh_info))) {
// We are about to replace a thunk with a blackhole. // We are about to replace a thunk with a blackhole.
// Add the free variables of the closure we are about to // Add the free variables of the closure we are about to
// overwrite to the update remembered set. // overwrite to the update remembered set.
// N.B. We caught the WHITEHOLE case above. // N.B. We caught the WHITEHOLE case above.
updateRemembSetPushThunkEager(cap, updateRemembSetPushThunkEager(cap,
THUNK_INFO_PTR_TO_STRUCT(bh_info), THUNK_INFO_PTR_TO_STRUCT(bh_info),
(StgThunk *) bh); (StgThunk *) bh);
}
} }
// The payload of the BLACKHOLE points to the TSO // The payload of the BLACKHOLE points to the TSO
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "Rts.h" #include "Rts.h"
#include "Capability.h" #include "Capability.h"
#include "LongPause.h"
#include "Updates.h" #include "Updates.h"
#include "Threads.h" #include "Threads.h"
#include "STM.h" #include "STM.h"
...@@ -124,7 +125,7 @@ createThread(Capability *cap, W_ size) ...@@ -124,7 +125,7 @@ createThread(Capability *cap, W_ size)
/* Link the new thread on the global thread list. /* Link the new thread on the global thread list.
*/ */
ACQUIRE_LOCK(&sched_mutex); ACQUIRE_LOCK_CHECKED(&sched_mutex, "sched_mutex");
tso->id = next_thread_id++; // while we have the mutex tso->id = next_thread_id++; // while we have the mutex
tso->global_link = g0->threads; tso->global_link = g0->threads;
/* Mutations above need no memory barrier since this lock will provide /* Mutations above need no memory barrier since this lock will provide
...@@ -723,7 +724,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso) ...@@ -723,7 +724,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso)
barf("threadStackUnderflow: not enough space for return values"); barf("threadStackUnderflow: not enough space for return values");
} }
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { IF_NONMOVING_WRITE_BARRIER_ENABLED {
// ensure that values that we copy into the new stack are marked // ensure that values that we copy into the new stack are marked
// for the nonmoving collector. Note that these values won't // for the nonmoving collector. Note that these values won't
// necessarily form a full closure so we need to handle them // necessarily form a full closure so we need to handle them
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
// events // events
int TRACE_sched; int TRACE_sched;
int TRACE_gc; int TRACE_gc;
int TRACE_nonmoving_gc;
int TRACE_spark_sampled; int TRACE_spark_sampled;
int TRACE_spark_full; int TRACE_spark_full;
int TRACE_user; int TRACE_user;
...@@ -72,6 +73,9 @@ void initTracing (void) ...@@ -72,6 +73,9 @@ void initTracing (void)
RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS; RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS;
} }
TRACE_nonmoving_gc =
RtsFlags.TraceFlags.nonmoving_gc;
TRACE_spark_sampled = TRACE_spark_sampled =
RtsFlags.TraceFlags.sparks_sampled; RtsFlags.TraceFlags.sparks_sampled;
...@@ -802,6 +806,55 @@ void traceThreadLabel_(Capability *cap, ...@@ -802,6 +806,55 @@ void traceThreadLabel_(Capability *cap,
} }
} }
void traceConcMarkBegin()
{
if (eventlog_enabled)
postEventNoCap(EVENT_CONC_MARK_BEGIN);
}
void traceConcMarkEnd(StgWord32 marked_obj_count)
{
if (eventlog_enabled)
postConcMarkEnd(marked_obj_count);
}
void traceConcSyncBegin()
{
if (eventlog_enabled)
postEventNoCap(EVENT_CONC_SYNC_BEGIN);
}
void traceConcSyncEnd()
{
if (eventlog_enabled)
postEventNoCap(EVENT_CONC_SYNC_END);
}
void traceConcSweepBegin()
{
if (eventlog_enabled)
postEventNoCap(EVENT_CONC_SWEEP_BEGIN);
}
void traceConcSweepEnd()
{
if (eventlog_enabled)
postEventNoCap(EVENT_CONC_SWEEP_END);
}
void traceConcUpdRemSetFlush(Capability *cap)
{
if (eventlog_enabled)
postConcUpdRemSetFlush(cap);
}
void traceNonmovingHeapCensus(uint32_t log_blk_size,
const struct NonmovingAllocCensus *census)
{
if (eventlog_enabled && TRACE_nonmoving_gc)
postNonmovingHeapCensus(log_blk_size, census);
}
void traceThreadStatus_ (StgTSO *tso USED_IF_DEBUG) void traceThreadStatus_ (StgTSO *tso USED_IF_DEBUG)
{ {
#if defined(DEBUG) #if defined(DEBUG)
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#pragma once #pragma once
#include "rts/EventLogFormat.h" #include "rts/EventLogFormat.h"
#include "sm/NonMovingCensus.h"
#include "Capability.h" #include "Capability.h"
#if defined(DTRACE) #if defined(DTRACE)
...@@ -72,6 +73,7 @@ extern int TRACE_spark_sampled; ...@@ -72,6 +73,7 @@ extern int TRACE_spark_sampled;
extern int TRACE_spark_full; extern int TRACE_spark_full;
/* extern int TRACE_user; */ // only used in Trace.c /* extern int TRACE_user; */ // only used in Trace.c
extern int TRACE_cap; extern int TRACE_cap;
extern int TRACE_nonmoving_gc;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Posting events // Posting events
...@@ -304,6 +306,16 @@ void traceHeapProfSampleCostCentre(StgWord8 profile_id, ...@@ -304,6 +306,16 @@ void traceHeapProfSampleCostCentre(StgWord8 profile_id,
CostCentreStack *stack, StgWord residency); CostCentreStack *stack, StgWord residency);
#endif /* PROFILING */ #endif /* PROFILING */
void traceConcMarkBegin(void);
void traceConcMarkEnd(StgWord32 marked_obj_count);
void traceConcSyncBegin(void);
void traceConcSyncEnd(void);
void traceConcSweepBegin(void);
void traceConcSweepEnd(void);
void traceConcUpdRemSetFlush(Capability *cap);
void traceNonmovingHeapCensus(uint32_t log_blk_size,
const struct NonmovingAllocCensus *census);
void flushTrace(void); void flushTrace(void);
#else /* !TRACING */ #else /* !TRACING */
...@@ -344,6 +356,15 @@ void flushTrace(void); ...@@ -344,6 +356,15 @@ void flushTrace(void);
#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */ #define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */ #define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
#define traceConcMarkBegin() /* nothing */
#define traceConcMarkEnd(marked_obj_count) /* nothing */
#define traceConcSyncBegin() /* nothing */
#define traceConcSyncEnd() /* nothing */
#define traceConcSweepBegin() /* nothing */
#define traceConcSweepEnd() /* nothing */
#define traceConcUpdRemSetFlush(cap) /* nothing */
#define traceNonmovingHeapCensus(blk_size, census) /* nothing */
#define flushTrace() /* nothing */ #define flushTrace() /* nothing */
#endif /* TRACING */ #endif /* TRACING */
......
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
\ \
prim_write_barrier; \ prim_write_barrier; \
OVERWRITING_CLOSURE(p1); \ OVERWRITING_CLOSURE(p1); \
IF_WRITE_BARRIER_ENABLED { \ IF_NONMOVING_WRITE_BARRIER_ENABLED { \
ccall updateRemembSetPushThunk_(BaseReg, p1 "ptr"); \ ccall updateRemembSetPushThunk_(BaseReg, p1 "ptr"); \
} \ } \
StgInd_indirectee(p1) = p2; \ StgInd_indirectee(p1) = p2; \
...@@ -81,7 +81,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap, ...@@ -81,7 +81,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
/* See Note [Heap memory barriers] in SMP.h */ /* See Note [Heap memory barriers] in SMP.h */
write_barrier(); write_barrier();
OVERWRITING_CLOSURE(p1); OVERWRITING_CLOSURE(p1);
if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) { IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushThunk(cap, (StgThunk*)p1); updateRemembSetPushThunk(cap, (StgThunk*)p1);
} }
((StgInd *)p1)->indirectee = p2; ((StgInd *)p1)->indirectee = p2;
......
...@@ -107,7 +107,15 @@ char *EventDesc[] = { ...@@ -107,7 +107,15 @@ char *EventDesc[] = {
[EVENT_HEAP_PROF_SAMPLE_END] = "End of heap profile sample", [EVENT_HEAP_PROF_SAMPLE_END] = "End of heap profile sample",
[EVENT_HEAP_PROF_SAMPLE_STRING] = "Heap profile string sample", [EVENT_HEAP_PROF_SAMPLE_STRING] = "Heap profile string sample",
[EVENT_HEAP_PROF_SAMPLE_COST_CENTRE] = "Heap profile cost-centre sample", [EVENT_HEAP_PROF_SAMPLE_COST_CENTRE] = "Heap profile cost-centre sample",
[EVENT_USER_BINARY_MSG] = "User binary message" [EVENT_USER_BINARY_MSG] = "User binary message",
[EVENT_CONC_MARK_BEGIN] = "Begin concurrent mark phase",
[EVENT_CONC_MARK_END] = "End concurrent mark phase",
[EVENT_CONC_SYNC_BEGIN] = "Begin concurrent GC synchronisation",
[EVENT_CONC_SYNC_END] = "End concurrent GC synchronisation",
[EVENT_CONC_SWEEP_BEGIN] = "Begin concurrent sweep",
[EVENT_CONC_SWEEP_END] = "End concurrent sweep",
[EVENT_CONC_UPD_REM_SET_FLUSH] = "Update remembered set flushed",
[EVENT_NONMOVING_HEAP_CENSUS] = "Nonmoving heap census"
}; };
// Event type. // Event type.
...@@ -446,6 +454,27 @@ init_event_types(void) ...@@ -446,6 +454,27 @@ init_event_types(void)
eventTypes[t].size = EVENT_SIZE_DYNAMIC; eventTypes[t].size = EVENT_SIZE_DYNAMIC;
break; break;
case EVENT_CONC_MARK_BEGIN:
case EVENT_CONC_SYNC_BEGIN:
case EVENT_CONC_SYNC_END:
case EVENT_CONC_SWEEP_BEGIN:
case EVENT_CONC_SWEEP_END:
eventTypes[t].size = 0;
break;
case EVENT_CONC_MARK_END:
eventTypes[t].size = 4;