Commit 304e7fb7 authored by Simon Marlow's avatar Simon Marlow
Browse files

Instead of a separate context-switch flag, set HpLim to zero

This reduces the latency between a context-switch being triggered and
the thread returning to the scheduler, which in turn should reduce the
cost of the GC barrier when there are many cores.

We still retain the old context_switch flag which is checked at the
end of each block of allocation.  The idea is that setting HpLim may
fail if the the target thread is modifying HpLim at the same time; the
context_switch flag is a fallback.  It also allows us to "context
switch soon" without forcing an immediate switch, which can be costly.
parent c197fe60
......@@ -95,7 +95,6 @@
s0 $9 Sp
s2 $11 SpLim
s3 $12 Hp
s4 $13 HpLim
t8 $22 NCG_reserved
t12 $27 NCG_reserved
-------------------------------------------------------------------------- */
......@@ -134,7 +133,6 @@
# define REG_SpLim 11
# define REG_Hp 12
# define REG_HpLim 13
# define NCG_Reserved_I1 22
# define NCG_Reserved_I2 27
......@@ -189,7 +187,6 @@
#define REG_SpLim r6
#define REG_Hp r7
#define REG_HpLim r8
#define NCG_Reserved_I1 r28
#define NCG_Reserved_I2 r29
......@@ -215,7 +212,7 @@
esi R1
edi Hp
Leaving SpLim, and HpLim out of the picture.
Leaving SpLim out of the picture.
-------------------------------------------------------------------------- */
......@@ -284,12 +281,12 @@
#define REG_Sp rbp
#define REG_Hp r12
#define REG_R1 rbx
#define REG_R2 rsi
#define REG_R3 rdi
#define REG_R4 r8
#define REG_R5 r9
#define REG_SpLim r14
#define REG_HpLim r15
#define REG_R2 r14
#define REG_R3 rsi
#define REG_R4 rdi
#define REG_R5 r8
#define REG_R6 r9
#define REG_SpLim r15
#define REG_F1 xmm1
#define REG_F2 xmm2
......@@ -299,10 +296,10 @@
#define REG_D1 xmm5
#define REG_D2 xmm6
#define CALLER_SAVES_R2
#define CALLER_SAVES_R3
#define CALLER_SAVES_R4
#define CALLER_SAVES_R5
#define CALLER_SAVES_R6
#define CALLER_SAVES_F1
#define CALLER_SAVES_F2
......@@ -312,7 +309,7 @@
#define CALLER_SAVES_D1
#define CALLER_SAVES_D2
#define MAX_REAL_VANILLA_REG 5
#define MAX_REAL_VANILLA_REG 6
#define MAX_REAL_FLOAT_REG 4
#define MAX_REAL_DOUBLE_REG 2
#define MAX_REAL_LONG_REG 0
......@@ -361,7 +358,6 @@
#define REG_SpLim d3
#define REG_Hp d4
#define REG_HpLim d5
#define REG_R1 a5
#define REG_R2 d6
......@@ -425,7 +421,6 @@
#define REG_SpLim 21
#define REG_Hp 22
#define REG_HpLim 23
#define REG_Base 30
......@@ -500,7 +495,6 @@
#define REG_SpLim r24
#define REG_Hp r25
#define REG_HpLim r26
#define REG_Base r27
......@@ -543,7 +537,6 @@
#define REG_SpLim loc26
#define REG_Hp loc27
#define REG_HpLim loc28
#endif /* ia64 */
......@@ -597,7 +590,6 @@
%i1 Base
%i2 SpLim
%i3 Hp
%i4 HpLim
%i5 R6
%i6 C frame ptr
%i7 C ret addr
......@@ -666,7 +658,6 @@
#define REG_SpLim i2
#define REG_Hp i3
#define REG_HpLim i4
#define REG_Base i1
......
......@@ -133,7 +133,6 @@ typedef struct StgRegTable_ {
#define SAVE_SpLim (CurrentTSO->splim)
#define SAVE_Hp (BaseReg->rHp)
#define SAVE_HpLim (BaseReg->rHpLim)
#define SAVE_CurrentTSO (BaseReg->rCurrentTSO)
#define SAVE_CurrentNursery (BaseReg->rCurrentNursery)
......@@ -349,7 +348,7 @@ GLOBAL_REG_DECL(P_,Hp,REG_Hp)
#endif
#if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(P_,HpLim,REG_HpLim)
#error HpLim cannot be in a register
#else
#define HpLim (BaseReg->rHpLim)
#endif
......@@ -570,14 +569,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
#define CALLER_RESTORE_Hp /* nothing */
#endif
#ifdef CALLER_SAVES_HpLim
#define CALLER_SAVE_HpLim SAVE_HpLim = HpLim;
#define CALLER_RESTORE_HpLim HpLim = SAVE_HpLim;
#else
#define CALLER_SAVE_HpLim /* nothing */
#define CALLER_RESTORE_HpLim /* nothing */
#endif
#ifdef CALLER_SAVES_Base
#ifdef THREADED_RTS
#error "Can't have caller-saved BaseReg with THREADED_RTS"
......@@ -644,7 +635,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
CALLER_SAVE_Sp \
CALLER_SAVE_SpLim \
CALLER_SAVE_Hp \
CALLER_SAVE_HpLim \
CALLER_SAVE_CurrentTSO \
CALLER_SAVE_CurrentNursery \
CALLER_SAVE_Base
......@@ -673,7 +663,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
CALLER_RESTORE_Sp \
CALLER_RESTORE_SpLim \
CALLER_RESTORE_Hp \
CALLER_RESTORE_HpLim \
CALLER_RESTORE_CurrentTSO \
CALLER_RESTORE_CurrentNursery
......
......@@ -294,10 +294,10 @@ initCapabilities( void )
void setContextSwitches(void)
{
nat i;
for (i=0; i < n_capabilities; i++) {
capabilities[i].context_switch = 1;
}
nat i;
for (i=0; i < n_capabilities; i++) {
contextSwitchCapability(&capabilities[i]);
}
}
/* ----------------------------------------------------------------------------
......@@ -482,14 +482,17 @@ waitForReturnCapability (Capability **pCap, Task *task)
if (!cap->running_task) {
nat i;
// otherwise, search for a free capability
cap = NULL;
for (i = 0; i < n_capabilities; i++) {
cap = &capabilities[i];
if (!cap->running_task) {
if (!capabilities[i].running_task) {
cap = &capabilities[i];
break;
}
}
// Can't find a free one, use last_free_capability.
cap = last_free_capability;
if (cap == NULL) {
// Can't find a free one, use last_free_capability.
cap = last_free_capability;
}
}
// record the Capability as the one this Task is now assocated with.
......
......@@ -276,6 +276,7 @@ extern void grabCapability (Capability **pCap);
// cause all capabilities to context switch as soon as possible.
void setContextSwitches(void);
INLINE_HEADER void contextSwitchCapability(Capability *cap);
// Free all capabilities
void freeCapabilities (void);
......@@ -322,4 +323,16 @@ discardSparksCap (Capability *cap)
{ return discardSparks(cap->sparks); }
#endif
INLINE_HEADER void
contextSwitchCapability (Capability *cap)
{
// setting HpLim to NULL ensures that the next heap check will
// fail, and the thread will return to the scheduler.
cap->r.rHpLim = NULL;
// But just in case it didn't work (the target thread might be
// modifying HpLim at the same time), we set the end-of-block
// context-switch flag too:
cap->context_switch = 1;
}
#endif /* CAPABILITY_H */
......@@ -23,8 +23,11 @@ import LeaveCriticalSection;
*
* On discovering that a stack or heap check has failed, we do the following:
*
* - If the context_switch flag is set, indicating that there are more
* threads waiting to run, we yield to the scheduler
* - If HpLim==0, indicating that we should context-switch, we yield
* to the scheduler (return ThreadYielding).
*
* - If the context_switch flag is set (the backup plan if setting HpLim
* to 0 didn't trigger a context switch), we yield to the scheduler
* (return ThreadYielding).
*
* - If Hp > HpLim, we've had a heap check failure. This means we've
......@@ -60,6 +63,10 @@ import LeaveCriticalSection;
#define GC_GENERIC \
DEBUG_ONLY(foreign "C" heapCheckFail()); \
if (Hp > HpLim) { \
if (HpLim == 0) { \
R1 = ThreadYielding; \
goto sched; \
} \
Hp = Hp - HpAlloc/*in bytes*/; \
if (HpAlloc <= BLOCK_SIZE \
&& bdescr_link(CurrentNursery) != NULL) { \
......
......@@ -196,6 +196,9 @@ interpretBCO (Capability* cap)
LOAD_STACK_POINTERS;
cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it
// goes to zero we must return to the scheduler.
// ------------------------------------------------------------------------
// Case 1:
//
......@@ -1281,7 +1284,7 @@ run_BCO:
// context switching: sometimes the scheduler can invoke
// the interpreter with context_switch == 1, particularly
// if the -C0 flag has been given on the cmd line.
if (cap->context_switch) {
if (cap->r.rHpLim == NULL) {
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding);
}
......
......@@ -1073,7 +1073,8 @@ forkzh_fast
foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
// switch at the earliest opportunity
// context switch soon, but not immediately: we don't want every
// forkIO to force a context-switch.
Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
......@@ -1102,7 +1103,8 @@ forkOnzh_fast
foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
// switch at the earliest opportunity
// context switch soon, but not immediately: we don't want every
// forkIO to force a context-switch.
Capability_context_switch(MyCapability()) = 1 :: CInt;
RET_P(threadid);
......
......@@ -1268,7 +1268,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
"--<< thread %ld (%s) stopped: HeapOverflow",
(long)t->id, whatNext_strs[t->what_next]);
if (cap->context_switch) {
if (cap->r.rHpLim == NULL || cap->context_switch) {
// Sometimes we miss a context switch, e.g. when calling
// primitives in a tight loop, MAYBE_GC() doesn't check the
// context switch flag, and we end up waiting for a GC.
......
......@@ -28,9 +28,7 @@
ASSERT(Hp != 0); \
ASSERT(Sp != 0); \
ASSERT(SpLim != 0); \
ASSERT(HpLim != 0); \
ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp); \
ASSERT(HpLim >= Hp);
ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp);
/* -----------------------------------------------------------------------------
Returning from the STG world.
......
......@@ -505,8 +505,10 @@ unblockOne_ (Capability *cap, StgTSO *tso,
}
tso->cap = cap;
appendToRunQueue(cap,tso);
// we're holding a newly woken thread, make sure we context switch
// quickly so we can migrate it if necessary.
// context-switch soonish so we can migrate the new thread if
// necessary. NB. not contextSwitchCapability(cap), which would
// force a context switch immediately.
cap->context_switch = 1;
} else {
// we'll try to wake it up on the Capability it was last on.
......@@ -514,6 +516,10 @@ unblockOne_ (Capability *cap, StgTSO *tso,
}
#else
appendToRunQueue(cap,tso);
// context-switch soonish so we can migrate the new thread if
// necessary. NB. not contextSwitchCapability(cap), which would
// force a context switch immediately.
cap->context_switch = 1;
#endif
......
......@@ -214,7 +214,7 @@ generic_handler(int sig USED_IF_THREADS,
stg_exit(EXIT_FAILURE);
}
MainCapability.context_switch = 1;
contextSwitchCapability(&MainCapability);
#endif /* THREADED_RTS */
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment