Commit 76ee2607 authored by Simon Marlow's avatar Simon Marlow Committed by Simon Marlow

Allow limiting the number of GC threads (+RTS -qn<n>)

This allows the GC to use fewer threads than the number of capabilities.
At each GC, we choose some of the capabilities to be "idle", which means
that the thread running on that capability (if any) will sleep for the
duration of the GC, and the other threads will do its work.  We choose
capabilities that are already idle (if any) to be the idle capabilities.

The idea is that this helps in the following situation:

* We want to use a large -N value so as to make use of hyperthreaded
  cores
* We use a large heap size, so GC is infrequent
* But we don't want to use all -N threads in the GC, because that
  thrashes the memory too much.

See docs for usage.
parent f9d93751
......@@ -437,6 +437,30 @@ performance.
program it is sometimes beneficial to disable load-balancing
entirely with ``-qb``.
.. rts-flag:: -qn <x>
:default: the value of ``-N``
:since: 8.2.1
.. index::
single: GC threads, setting the number of
By default, all of the capabilities participate in parallel
garbage collection. If we want to use a very large ``-N`` value,
however, this can reduce the performance of the GC. For this
reason, the ``-qn`` flag can be used to specify a lower number for
the threads that should participate in GC. During GC, if there
are more than this number of workers active, some of them will
sleep for the duration of the GC.
The ``-qn`` flag may be useful when running with a large ``-A`` value
(so that GC is infrequent), and a large ``-N`` value (so as to make
use of hyperthreaded cores, for example). For example, on a
24-core machine with 2 hyperthreads per core, we might use
``-N48 -qn24 -A128m`` to specify that the mutator should use
hyperthreads but the GC should only use real cores. Note that
this configuration would use 6GB for the allocation area.
.. rts-flag:: -H [⟨size⟩]
:default: 0
......
......@@ -203,6 +203,10 @@ typedef struct _PAR_FLAGS {
* non-load-balancing parallel GC.
* (zero disables) */
nat parGcThreads;
/* Use this many threads for parallel
* GC (default: use all nNodes). */
rtsBool setAffinity; /* force thread affinity with CPUs */
} PAR_FLAGS;
#endif /* THREADED_RTS */
......
......@@ -55,10 +55,9 @@ static Capability *last_free_capability = NULL;
/*
* Indicates that the RTS wants to synchronise all the Capabilities
* for some reason. All Capabilities should stop and return to the
* scheduler.
* for some reason. All Capabilities should yieldCapability().
*/
volatile StgWord pending_sync = 0;
PendingSync * volatile pending_sync = 0;
/* Let foreign code get the current Capability -- assuming there is one!
* This is useful for unsafe foreign calls because they are called with
......@@ -477,13 +476,19 @@ releaseCapability_ (Capability* cap,
return;
}
// If there is a pending sync, then we should just leave the
// Capability free. The thread trying to sync will be about to
// call waitForCapability().
if (pending_sync != 0 && pending_sync != SYNC_GC_PAR) {
last_free_capability = cap; // needed?
debugTrace(DEBUG_sched, "sync pending, set capability %d free", cap->no);
return;
// If there is a pending sync, then we should just leave the Capability
// free. The thread trying to sync will be about to call
// waitForCapability().
//
// Note: this is *after* we check for a returning task above,
// because the task attempting to acquire all the capabilities may
// be currently in waitForCapability() waiting for this
// capability, in which case simply setting it as free would not
// wake up the waiting task.
PendingSync *sync = pending_sync;
if (sync && (sync->type != SYNC_GC_PAR || sync->idle[cap->no])) {
debugTrace(DEBUG_sched, "sync pending, freeing capability %d", cap->no);
return;
}
// If the next thread on the run queue is a bound thread,
......@@ -795,14 +800,21 @@ yieldCapability (Capability** pCap, Task *task, rtsBool gcAllowed)
{
Capability *cap = *pCap;
if ((pending_sync == SYNC_GC_PAR) && gcAllowed) {
traceEventGcStart(cap);
gcWorkerThread(cap);
traceEventGcEnd(cap);
traceSparkCounters(cap);
// See Note [migrated bound threads 2]
if (task->cap == cap) {
return rtsTrue;
if (gcAllowed)
{
PendingSync *sync = pending_sync;
if (sync && sync->type == SYNC_GC_PAR) {
if (! sync->idle[cap->no]) {
traceEventGcStart(cap);
gcWorkerThread(cap);
traceEventGcEnd(cap);
traceSparkCounters(cap);
// See Note [migrated bound threads 2]
if (task->cap == cap) {
return rtsTrue;
}
}
}
}
......
......@@ -224,15 +224,30 @@ INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED,
//
extern Capability **capabilities;
//
// Types of global synchronisation
//
typedef enum {
SYNC_OTHER,
SYNC_GC_SEQ,
SYNC_GC_PAR
} SyncType;
//
// Details about a global synchronisation
//
typedef struct {
SyncType type; // The kind of synchronisation
rtsBool *idle;
Task *task; // The Task performing the sync
} PendingSync;
//
// Indicates that the RTS wants to synchronise all the Capabilities
// for some reason. All Capabilities should stop and return to the
// scheduler.
//
#define SYNC_GC_SEQ 1
#define SYNC_GC_PAR 2
#define SYNC_OTHER 3
extern volatile StgWord pending_sync;
extern PendingSync * volatile pending_sync;
// Acquires a capability at a return point. If *cap is non-NULL, then
// this is taken as a preference for the Capability we wish to
......
......@@ -226,6 +226,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.ParFlags.parGcLoadBalancingEnabled = rtsTrue;
RtsFlags.ParFlags.parGcLoadBalancingGen = 1;
RtsFlags.ParFlags.parGcNoSyncWithIdle = 0;
RtsFlags.ParFlags.parGcThreads = 0; /* defaults to -N */
RtsFlags.ParFlags.setAffinity = 0;
#endif
......@@ -388,6 +389,7 @@ usage_text[] = {
" (default: 0, -qg alone turns off parallel GC)",
" -qb[<n>] Use load-balancing in the parallel GC only for generations >= <n>",
" (default: 1, -qb alone turns off load-balancing)",
" -qn<n> Use <n> threads for parallel GC (defaults to value of -N)",
" -qa Use the OS to set thread affinity (experimental)",
" -qm Don't automatically migrate threads between CPUs",
" -qi<n> If a processor has been idle for the last <n> GCs, do not",
......@@ -1130,6 +1132,17 @@ error = rtsTrue;
RtsFlags.ParFlags.parGcNoSyncWithIdle
= strtol(rts_argv[arg]+3, (char **) NULL, 10);
break;
case 'n': {
int threads;
threads = strtol(rts_argv[arg]+3, (char **) NULL, 10);
if (threads <= 0) {
errorBelch("-qn must be 1 or greater");
error = rtsTrue;
} else {
RtsFlags.ParFlags.parGcThreads = threads;
}
break;
}
case 'a':
RtsFlags.ParFlags.setAffinity = rtsTrue;
break;
......@@ -1370,6 +1383,13 @@ static void normaliseRtsOpts (void)
"of the stack chunk size (-kc)");
errorUsage();
}
#ifdef THREADED_RTS
if (RtsFlags.ParFlags.parGcThreads > RtsFlags.ParFlags.nNodes) {
errorBelch("GC threads (-qn) must be between 1 and the value of -N");
errorUsage();
}
#endif
}
static void errorUsage (void)
......
This diff is collapsed.
......@@ -125,18 +125,16 @@ typedef struct Task_ {
rtsBool wakeup;
#endif
// This points to the Capability that the Task "belongs" to. If
// the Task owns a Capability, then task->cap points to it. If
// the task does not own a Capability, then either (a) if the task
// is a worker, then task->cap points to the Capability it belongs
// to, or (b) it is returning from a foreign call, then task->cap
// points to the Capability with the returning_worker queue that this
// this Task is on.
// If the task owns a Capability, task->cap points to it. (occasionally a
// task may own multiple capabilities, in which case task->cap may point to
// any of them. We must be careful to set task->cap to the appropriate one
// when using Capability APIs.)
//
// When a task goes to sleep, it may be migrated to a different
// Capability. Hence, we always check task->cap on wakeup. To
// syncrhonise between the migrater and the migratee, task->lock
// must be held when modifying task->cap.
// If the task is a worker, task->cap points to the Capability on which it
// is queued.
//
// If the task is in an unsafe foreign call, then task->cap can be used to
// retrieve the capability (see rts_unsafeGetMyCapability()).
struct Capability_ *cap;
// The current top-of-stack InCall
......
......@@ -619,8 +619,7 @@ GarbageCollect (nat collect_gen,
live_words += genLiveWords(gen);
live_blocks += genLiveBlocks(gen);
// add in the partial blocks in the gen_workspaces, but ignore gen 0
// if this is a local GC (we can't count another capability's part_list)
// add in the partial blocks in the gen_workspaces
{
nat i;
for (i = 0; i < n_capabilities; i++) {
......@@ -1071,7 +1070,6 @@ waitForGcThreads (Capability *cap USED_IF_THREADS)
stat_startGCSync(gc_threads[cap->no]);
while(retry) {
for (i=0; i < n_threads; i++) {
if (i == me || gc_threads[i]->idle) continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment