Commit eba7b660 authored by Simon Marlow's avatar Simon Marlow
Browse files

Merge the smp and threaded RTS ways

Now, the threaded RTS also includes SMP support.  The -smp flag is a
synonym for -threaded.  The performance implications of this are small
to negligible, and it results in a code cleanup and reduces the number
of combinations we have to test.
parent 608bc3f3
......@@ -124,7 +124,7 @@ static_flags = [
, ( "ticky" , NoArg (addWay WayTicky) )
, ( "parallel" , NoArg (addWay WayPar) )
, ( "gransim" , NoArg (addWay WayGran) )
, ( "smp" , NoArg (addWay WaySMP) )
, ( "smp" , NoArg (addWay WayThreaded) ) -- backwards compat.
, ( "debug" , NoArg (addWay WayDebug) )
, ( "ndp" , NoArg (addWay WayNDP) )
, ( "threaded" , NoArg (addWay WayThreaded) )
......@@ -416,7 +416,6 @@ data WayName
| WayTicky
| WayPar
| WayGran
| WaySMP
| WayNDP
| WayUser_a
| WayUser_b
......@@ -554,16 +553,6 @@ way_details =
, "-optc-DGRAN"
, "-package concurrent" ]),
(WaySMP, Way "s" True "SMP"
[
#if !defined(mingw32_TARGET_OS)
"-optc-pthread"
#endif
#if !defined(mingw32_TARGET_OS) && !defined(freebsd_TARGET_OS)
, "-optl-pthread"
#endif
]),
(WayNDP, Way "ndp" False "Nested data parallelism"
[ "-fparr"
, "-fflatten"]),
......
......@@ -356,7 +356,7 @@ typedef struct {
StgHeader header;
StgClosure *volatile current_value;
StgTVarWaitQueue *volatile first_wait_queue_entry;
#if defined(SMP)
#if defined(THREADED_RTS)
StgInt volatile num_updates;
#endif
} StgTVar;
......@@ -367,7 +367,7 @@ typedef struct {
StgTVar *tvar;
StgClosure *expected_value;
StgClosure *new_value;
#if defined(SMP)
#if defined(THREADED_RTS)
StgInt num_updates;
#endif
} TRecEntry;
......
......@@ -25,7 +25,8 @@
#include "gmp.h" // Needs MP_INT definition
/*
* Spark pools: used to store pending sparks (SMP & PARALLEL_HASKELL only)
* Spark pools: used to store pending sparks
* (THREADED_RTS & PARALLEL_HASKELL only)
* This is a circular buffer. Invariants:
* - base <= hd < lim
* - base <= tl < lim
......@@ -107,15 +108,16 @@ typedef struct StgRegTable_ {
struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */
struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */
StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
// rmp_tmp1..rmp_result2 are only used in SMP builds to avoid per-thread temps
// in bss, but currently always incldue here so we just run mkDerivedConstants once
// rmp_tmp1..rmp_result2 are only used in THREADED_RTS builds to
// avoid per-thread temps in bss, but currently always incldue here
// so we just run mkDerivedConstants once
StgInt rmp_tmp_w;
MP_INT rmp_tmp1;
MP_INT rmp_tmp2;
MP_INT rmp_result1;
MP_INT rmp_result2;
StgWord rRet; // holds the return code of the thread
#if defined(SMP) || defined(PAR)
#if defined(THREADED_RTS) || defined(PAR)
StgSparkPool rSparks; /* per-task spark pool */
#endif
} StgRegTable;
......@@ -330,10 +332,10 @@ struct PartCapability_ {
StgRegTable r;
};
/* No such thing as a MainCapability under SMP - each thread must have
/* No such thing as a MainCapability under THREADED_RTS - each thread must have
* its own Capability.
*/
#if IN_STG_CODE && !defined(SMP)
#if IN_STG_CODE && !defined(THREADED_RTS)
extern W_ MainCapability[];
#endif
......@@ -349,8 +351,8 @@ extern W_ MainCapability[];
GLOBAL_REG_DECL(StgRegTable *,BaseReg,REG_Base)
#define ASSIGN_BaseReg(e) (BaseReg = (e))
#else
#ifdef SMP
#error BaseReg must be in a register for SMP
#ifdef THREADED_RTS
#error BaseReg must be in a register for THREADED_RTS
#endif
#define BaseReg (&((struct PartCapability_ *)MainCapability)->r)
#define ASSIGN_BaseReg(e) /*nothing*/
......@@ -628,8 +630,8 @@ GLOBAL_REG_DECL(bdescr *,SparkLim,REG_SparkLim)
#endif
#ifdef CALLER_SAVES_Base
#ifdef SMP
#error "Can't have caller-saved BaseReg with SMP"
#ifdef THREADED_RTS
#error "Can't have caller-saved BaseReg with THREADED_RTS"
#endif
#define CALLER_SAVE_Base /* nothing */
#define CALLER_RESTORE_Base BaseReg = &MainRegTable;
......
......@@ -70,14 +70,6 @@ extern void _assertFail (char *, unsigned int);
#define doNothing() do { } while (0)
#ifdef SMP
#define USED_IF_SMP
#define USED_IF_NOT_SMP STG_UNUSED
#else
#define USED_IF_SMP STG_UNUSED
#define USED_IF_NOT_SMP
#endif
#ifdef DEBUG
#define USED_IF_DEBUG
#define USED_IF_NOT_DEBUG STG_UNUSED
......
......@@ -39,7 +39,7 @@
/* TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of
* single-entry thunks.
*/
/* #if defined(TICKY_TICKY) || defined(SMP) */
/* #if defined(TICKY_TICKY) || defined(THREADED_RTS) */
#if defined(TICKY_TICKY)
# define EAGER_BLACKHOLING
#else
......
......@@ -161,12 +161,12 @@ struct PAR_FLAGS {
};
#endif /* PAR */
#ifdef SMP
#ifdef THREADED_RTS
struct PAR_FLAGS {
nat nNodes; /* number of threads to run simultaneously */
unsigned int maxLocalSparks;
};
#endif /* SMP */
#endif /* THREADED_RTS */
#ifdef GRAN
struct GRAN_STATS_FLAGS {
......@@ -240,7 +240,7 @@ struct GRAN_FLAGS {
struct GRAN_COST_FLAGS Costs; /* cost metric for simulation */
struct GRAN_DEBUG_FLAGS Debug; /* debugging options */
nat maxThreads; /* ToDo: share with SMP and GUM */
nat maxThreads; /* ToDo: share with THREADED_RTS and GUM */
/* rtsBool labelling; */
nat packBufferSize;
nat packBufferSize_internal;
......@@ -300,7 +300,7 @@ typedef struct _RTS_FLAGS {
struct PROFILING_FLAGS ProfFlags;
struct TICKY_FLAGS TickyFlags;
#if defined(SMP) || defined(PAR)
#if defined(THREADED_RTS) || defined(PAR)
struct PAR_FLAGS ParFlags;
#endif
#ifdef GRAN
......
......@@ -2,25 +2,24 @@
*
* (c) The GHC Team, 2005
*
* Macros for SMP support
* Macros for THREADED_RTS support
*
* -------------------------------------------------------------------------- */
#ifndef SMP_H
#define SMP_H
/* SMP is currently not compatible with the following options:
/* THREADED_RTS is currently not compatible with the following options:
*
* INTERPRETER
* PROFILING
* PROFILING (but only 1 CPU supported)
* TICKY_TICKY
* and unregisterised builds.
* Unregisterised builds are ok, but only 1 CPU supported.
*/
#if defined(SMP)
#if defined(THREADED_RTS)
#if defined(PROFILING) || defined(TICKY_TICKY)
#error Build options incompatible with SMP.
#if defined(TICKY_TICKY)
#error Build options incompatible with THREADED_RTS.
#endif
/*
......@@ -113,7 +112,7 @@ unlockClosure(StgClosure *p, StgInfoTable *info)
#endif
}
#else /* !SMP */
#else /* !THREADED_RTS */
#define wb() /* nothing */
......@@ -125,6 +124,6 @@ xchg(StgPtr p, StgWord w)
return old;
}
#endif /* !SMP */
#endif /* !THREADED_RTS */
#endif /* SMP_H */
......@@ -33,7 +33,7 @@
#ifndef STM_H
#define STM_H
#ifdef SMP
#ifdef THREADED_RTS
//#define STM_CG_LOCK
#define STM_FG_LOCKS
#else
......
......@@ -140,7 +140,7 @@ extern void exitStorage(void);
via allocate() since the last GC.
Used in the reporting of statistics.
SMP: allocate and doYouWantToGC can be used from STG code, they are
THREADED_RTS: allocate and doYouWantToGC can be used from STG code, they are
surrounded by a mutex.
-------------------------------------------------------------------------- */
......@@ -198,11 +198,11 @@ extern void GarbageCollect(void (*get_roots)(evac_fn),rtsBool force_major_gc);
/*
* Storage manager mutex
*/
#if defined(SMP)
#if defined(THREADED_RTS)
extern Mutex sm_mutex;
#endif
#if defined(SMP)
#if defined(THREADED_RTS)
#define ACQUIRE_SM_LOCK ACQUIRE_LOCK(&sm_mutex);
#define RELEASE_SM_LOCK RELEASE_LOCK(&sm_mutex);
#define ASSERT_SM_LOCK() ASSERT_LOCK_HELD(&sm_mutex);
......
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team 1998-2000
* (c) The GHC Team 1998-2006
*
* The block allocator and free list manager.
*
......@@ -29,8 +29,7 @@ static void initMBlock(void *mblock);
static bdescr *allocMegaGroup(nat mblocks);
static void freeMegaGroup(bdescr *bd);
// In SMP mode, the free list is protected by sm_mutex. In the
// threaded RTS, it is protected by the Capability.
// In THREADED_RTS mode, the free list is protected by sm_mutex.
static bdescr *free_list = NULL;
/* -----------------------------------------------------------------------------
......
......@@ -10,9 +10,9 @@
* STG execution, a pointer to the capabilitity is kept in a
* register (BaseReg; actually it is a pointer to cap->r).
*
* Only in an SMP build will there be multiple capabilities, for
* the threaded RTS and other non-threaded builds, there is only
* one global capability, namely MainCapability.
* Only in an THREADED_RTS build will there be multiple capabilities,
* for non-threaded builds there is only one global capability, namely
* MainCapability.
*
* --------------------------------------------------------------------------*/
......@@ -26,9 +26,9 @@
#include "Schedule.h"
#include "Sparks.h"
#if !defined(SMP)
Capability MainCapability; // for non-SMP, we have one global capability
#endif
// one global capability, this is the Capability for non-threaded
// builds, and for +RTS -N1
Capability MainCapability;
nat n_capabilities;
Capability *capabilities = NULL;
......@@ -152,7 +152,7 @@ initCapability( Capability *cap, nat i )
/* ---------------------------------------------------------------------------
* Function: initCapabilities()
*
* Purpose: set up the Capability handling. For the SMP build,
* Purpose: set up the Capability handling. For the THREADED_RTS build,
* we keep a table of them, the size of which is
* controlled by the user via the RTS flag -N.
*
......@@ -160,8 +160,8 @@ initCapability( Capability *cap, nat i )
void
initCapabilities( void )
{
#if defined(SMP)
nat i,n;
#if defined(THREADED_RTS)
nat i;
#ifndef REG_BaseReg
// We can't support multiple CPUs if BaseReg is not a register
......@@ -171,18 +171,31 @@ initCapabilities( void )
}
#endif
n_capabilities = n = RtsFlags.ParFlags.nNodes;
capabilities = stgMallocBytes(n * sizeof(Capability), "initCapabilities");
n_capabilities = RtsFlags.ParFlags.nNodes;
if (n_capabilities == 1) {
capabilities = &MainCapability;
// THREADED_RTS must work on builds that don't have a mutable
// BaseReg (eg. unregisterised), so in this case
// capabilities[0] must coincide with &MainCapability.
} else {
capabilities = stgMallocBytes(n_capabilities * sizeof(Capability),
"initCapabilities");
}
for (i = 0; i < n; i++) {
for (i = 0; i < n_capabilities; i++) {
initCapability(&capabilities[i], i);
}
IF_DEBUG(scheduler, sched_belch("allocated %d capabilities", n));
#else
IF_DEBUG(scheduler, sched_belch("allocated %d capabilities",
n_capabilities));
#else /* !THREADED_RTS */
n_capabilities = 1;
capabilities = &MainCapability;
initCapability(&MainCapability, 0);
#endif
// There are no free capabilities to begin with. We will start
......@@ -263,15 +276,7 @@ releaseCapability_ (Capability* cap)
return;
}
// If we have an unbound thread on the run queue, or if there's
// anything else to do, give the Capability to a worker thread.
if (!emptyRunQueue(cap) || !emptySparkPoolCap(cap) || globalWorkToDo()) {
if (cap->spare_workers) {
giveCapabilityToTask(cap,cap->spare_workers);
// The worker Task pops itself from the queue;
return;
}
if (!cap->spare_workers) {
// Create a worker thread if we don't have one. If the system
// is interrupted, we only create a worker task if there
// are threads that need to be completed. If the system is
......@@ -284,6 +289,16 @@ releaseCapability_ (Capability* cap)
}
}
// If we have an unbound thread on the run queue, or if there's
// anything else to do, give the Capability to a worker thread.
if (!emptyRunQueue(cap) || !emptySparkPoolCap(cap) || globalWorkToDo()) {
if (cap->spare_workers) {
giveCapabilityToTask(cap,cap->spare_workers);
// The worker Task pops itself from the queue;
return;
}
}
last_free_capability = cap;
IF_DEBUG(scheduler, sched_belch("freeing capability %d", cap->no));
}
......@@ -512,6 +527,7 @@ prodCapabilities(rtsBool all)
}
RELEASE_LOCK(&cap->lock);
}
return;
}
void
......
/* ---------------------------------------------------------------------------
*
* (c) The GHC Team, 2001-2003
* (c) The GHC Team, 2001-2006
*
* Capabilities
*
* The notion of a capability is used when operating in multi-threaded
* environments (which the SMP and Threads builds of the RTS do), to
* environments (which the THREADED_RTS build of the RTS does), to
* hold all the state an OS thread/task needs to run Haskell code:
* its STG registers, a pointer to its TSO, a nursery etc. During
* STG execution, a pointer to the capabilitity is kept in a
* register (BaseReg).
*
* Only in an SMP build will there be multiple capabilities, the threaded
* RTS and other non-threaded builds, there is one global capability,
* namely MainRegTable.
* Only in an THREADED_RTS build will there be multiple capabilities,
* in the non-threaded builds there is one global capability, namely
* MainCapability.
*
* This header file contains the functions for working with capabilities.
* (the main, and only, consumer of this interface is the scheduler).
......@@ -140,8 +140,8 @@ INLINE_HEADER void releaseCapability (Capability* cap STG_UNUSED) {};
INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED) {};
#endif
#if !IN_STG_CODE && !defined(SMP)
// for non-SMP, we have one global capability
#if !IN_STG_CODE
// one global capability
extern Capability MainCapability;
#endif
......
......@@ -1154,7 +1154,9 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
ACQUIRE_SM_LOCK;
// send exceptions to any threads which were about to die
RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
ACQUIRE_SM_LOCK;
// Update the stable pointer hash table.
updateStablePtrTable(major_gc);
......
......@@ -217,7 +217,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.ConcFlags.ctxtSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */
#ifdef SMP
#ifdef THREADED_RTS
RtsFlags.ParFlags.nNodes = 1;
#endif
......@@ -244,9 +244,9 @@ void initRtsFlagsDefaults(void)
RtsFlags.ParFlags.fishDelay = FISH_DELAY;
#endif
#if defined(PAR) || defined(SMP)
#if defined(PAR) || defined(THREADED_RTS)
RtsFlags.ParFlags.maxLocalSparks = 4096;
#endif /* PAR || SMP */
#endif /* PAR || THREADED_RTS */
#if defined(GRAN)
/* ToDo: check defaults for GranSim and GUM */
......@@ -435,10 +435,10 @@ usage_text[] = {
" -Dz DEBUG: stack squezing",
"",
#endif /* DEBUG */
#if defined(SMP)
#if defined(THREADED_RTS)
" -N<n> Use <n> OS threads (default: 1)",
#endif
#if defined(SMP) || defined(PAR)
#if defined(THREADED_RTS) || defined(PAR)
" -e<size> Size of spark pools (default 100)",
#endif
#if defined(PAR)
......@@ -448,7 +448,7 @@ usage_text[] = {
" -qd Turn on PVM-ish debugging",
" -qO Disable output for performance measurement",
#endif
#if defined(SMP) || defined(PAR)
#if defined(THREADED_RTS) || defined(PAR)
" -e<n> Maximum number of outstanding local sparks (default: 4096)",
#endif
#if defined(PAR)
......@@ -611,14 +611,6 @@ errorBelch("not built for: -prof"); \
error = rtsTrue;
#endif
#ifdef SMP
# define SMP_BUILD_ONLY(x) x
#else
# define SMP_BUILD_ONLY(x) \
errorBelch("not built for: -smp"); \
error = rtsTrue;
#endif
#ifdef PAR
# define PAR_BUILD_ONLY(x) x
#else
......@@ -627,10 +619,18 @@ errorBelch("not built for: -parallel"); \
error = rtsTrue;
#endif
#if defined(SMP) || defined(PAR)
# define PAR_OR_SMP_BUILD_ONLY(x) x
#ifdef THREADED_RTS
# define THREADED_BUILD_ONLY(x) x
#else
# define THREADED_BUILD_ONLY(x) \
errorBelch("not built for: -smp"); \
error = rtsTrue;
#endif
#if defined(THREADED_RTS) || defined(PAR)
# define PAR_OR_THREADED_BUILD_ONLY(x) x
#else
# define PAR_OR_SMP_BUILD_ONLY(x) \
# define PAR_OR_THREADED_BUILD_ONLY(x) \
errorBelch("not built for: -parallel or -smp"); \
error = rtsTrue;
#endif
......@@ -1037,9 +1037,9 @@ error = rtsTrue;
}
break;
#ifdef SMP
#ifdef THREADED_RTS
case 'N':
SMP_BUILD_ONLY(
THREADED_BUILD_ONLY(
if (rts_argv[arg][2] != '\0') {
RtsFlags.ParFlags.nNodes
= strtol(rts_argv[arg]+2, (char **) NULL, 10);
......@@ -1052,7 +1052,7 @@ error = rtsTrue;
#endif
/* =========== PARALLEL =========================== */
case 'e':
PAR_OR_SMP_BUILD_ONLY(
PAR_OR_THREADED_BUILD_ONLY(
if (rts_argv[arg][2] != '\0') {
RtsFlags.ParFlags.maxLocalSparks
= strtol(rts_argv[arg]+2, (char **) NULL, 10);
......
......@@ -17,6 +17,7 @@
#include "Stats.h" /* initStats */
#include "STM.h" /* initSTM */
#include "Signals.h"
#include "RtsSignals.h"
#include "Timer.h" /* startTimer, stopTimer */
#include "Weak.h"
#include "Ticky.h"
......@@ -275,12 +276,8 @@ void
hs_add_root(void (*init_root)(void))
{
bdescr *bd;
#ifdef SMP
Capability cap;
#else
#define cap MainCapability
#endif
nat init_sp;
Capability *cap = &MainCapability;
if (hs_init_count <= 0) {
barf("hs_add_root() must be called after hs_init()");
......@@ -296,8 +293,8 @@ hs_add_root(void (*init_root)(void))
init_stack[--init_sp] = (F_)init_root;
}
cap.r.rSp = (P_)(init_stack + init_sp);
StgRun((StgFunPtr)stg_init, &cap.r);
cap->r.rSp = (P_)(init_stack + init_sp);
StgRun((StgFunPtr)stg_init, &cap->r);
freeGroup_lock(bd);
......
......@@ -28,7 +28,7 @@
* in STM.h:
*
* STM_UNIPROC assumes that the caller serialises invocations on the STM interface.
* In the Haskell RTS this means it is suitable only for non-SMP builds.
* In the Haskell RTS this means it is suitable only for non-THREADED_RTS builds.
*
* STM_CG_LOCK uses coarse-grained locking -- a single 'stm lock' is acquired during
* an invocation on the STM interface. Note that this does not mean that
......@@ -97,8 +97,8 @@
#define TRUE 1
#define FALSE 0
// ACQ_ASSERT is used for assertions which are only required for SMP builds with
// fine-grained locking.
// ACQ_ASSERT is used for assertions which are only required for
// THREADED_RTS builds with fine-grained locking.
#if defined(STM_FG_LOCKS)
#define ACQ_ASSERT(_X) ASSERT(_X)
......@@ -794,7 +794,7 @@ static volatile StgInt64 max_commits = 0;
static volatile StgBool token_locked = FALSE;
#if defined(SMP)
#if defined(THREADED_RTS)
static void getTokenBatch(Capability *cap) {
while (cas(&token_locked, FALSE, TRUE) == TRUE) { /* nothing */ }
max_commits += TOKEN_BATCH_SIZE;
......@@ -1252,7 +1252,7 @@ StgTVar *stmNewTVar(Capability *cap,
SET_HDR (result, &stg_TVAR_info, CCS_SYSTEM);
result -> current_value = new_value;
result -> first_wait_queue_entry = END_STM_WAIT_QUEUE;
#if defined(SMP)
#if defined(THREADED_RTS)
result -> num_updates = 0;
#endif
return result;
......
......@@ -549,7 +549,7 @@ checkHeap(bdescr *bd)
{
StgPtr p;
#if defined(SMP)
#if defined(THREADED_RTS)
// heap sanity checking doesn't work with SMP, because we can't
// zero the slop (see Updates.h).
return;
......
......@@ -175,7 +175,7 @@ rtsBool shutting_down_scheduler = rtsFalse;
/*
* This mutex protects most of the global scheduler data in
* the THREADED_RTS and (inc. SMP) runtime.
* the THREADED_RTS runtime.
*/
#if defined(THREADED_RTS)
Mutex sched_mutex;
......@@ -199,7 +199,7 @@ static Capability *schedule (Capability *initialCapability, Task *task);
// scheduler clearer.
//
static void schedulePreLoop (void);
#if defined(SMP)
#if defined(THREADED_RTS)
static void schedulePushWork(Capability *cap, Task *task);
#endif
static void scheduleStartSignalHandlers (Capability *cap);
......@@ -227,7 +227,8 @@ static void scheduleHandleThreadBlocked( StgTSO *t );
static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,