Commit 59847290 authored by Simon Marlow's avatar Simon Marlow

Fix a lost-wakeup bug in BLACKHOLE handling (#13751)

Summary:
The problem occurred when
* Threads A & B evaluate the same thunk
* Thread A context-switches, so the thunk gets blackholed
* Thread C enters the blackhole, creates a BLOCKING_QUEUE attached to
  the blackhole and thread A's `tso->bq` queue
* Thread B updates the blackhole with a value, overwriting the BLOCKING_QUEUE
* We GC, replacing A's update frame with stg_enter_checkbh
* Throw an exception in A, which ignores the stg_enter_checkbh frame

Now we have C blocked on A's tso->bq queue, but we forgot to check the
queue because the stg_enter_checkbh frame has been thrown away by the
exception.

The solution and alternative designs are discussed in Note [upd-black-hole].

This also exposed a bug in the interpreter, whereby we were sometimes
context-switching without calling `threadPaused()`.  I've fixed this
and added some Notes.

Test Plan:
* `cd testsuite/tests/concurrent && make slow`
* validate

Reviewers: niteria, bgamari, austin, erikd

Reviewed By: erikd

Subscribers: rwbarton, thomie

GHC Trac Issues: #13751

Differential Revision: https://phabricator.haskell.org/D3630
parent bca56bd0
...@@ -275,8 +275,6 @@ RTS_FUN_DECL(stg_PAP_apply); ...@@ -275,8 +275,6 @@ RTS_FUN_DECL(stg_PAP_apply);
RTS_FUN_DECL(stg_gc_noregs); RTS_FUN_DECL(stg_gc_noregs);
RTS_RET(stg_enter_checkbh);
RTS_RET(stg_ret_v); RTS_RET(stg_ret_v);
RTS_RET(stg_ret_p); RTS_RET(stg_ret_p);
RTS_RET(stg_ret_n); RTS_RET(stg_ret_n);
......
...@@ -229,30 +229,6 @@ stg_gc_prim_p_ll ...@@ -229,30 +229,6 @@ stg_gc_prim_p_ll
jump stg_gc_noregs []; jump stg_gc_noregs [];
} }
/* -----------------------------------------------------------------------------
stg_enter_checkbh is just like stg_enter, except that we also call
checkBlockingQueues(). The point of this is that the GC can
replace an stg_marked_upd_frame with an stg_enter_checkbh if it
finds that the BLACKHOLE has already been updated by another
thread. It would be unsafe to use stg_enter, because there might
be an orphaned BLOCKING_QUEUE now.
-------------------------------------------------------------------------- */
/* The stg_enter_checkbh frame has the same shape as an update frame: */
INFO_TABLE_RET ( stg_enter_checkbh, RET_SMALL,
UPDATE_FRAME_FIELDS(W_,P_,info_ptr,ccs,p2,updatee))
return (P_ ret)
{
foreign "C" checkBlockingQueues(MyCapability() "ptr",
CurrentTSO);
// we need to return updatee now. Note that it might be a pointer
// to an indirection or a tagged value, we don't know which, so we
// need to ENTER() rather than return().
ENTER(updatee);
}
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
Info tables for returning values of various types. These are used Info tables for returning values of various types. These are used
when we want to push a frame on the stack that will return a value when we want to push a frame on the stack that will return a value
......
...@@ -115,6 +115,16 @@ ...@@ -115,6 +115,16 @@
cap->r.rRet = (retcode); \ cap->r.rRet = (retcode); \
return cap; return cap;
// Note [avoiding threadPaused]
//
// Switching between the interpreter to compiled code can happen very
// frequently, so we don't want to call threadPaused(), which is
// expensive. BUT we must be careful not to violate the invariant
// that threadPaused() has been called on all threads before we GC
// (see Note [upd-black-hole]. So the scheduler must ensure that when
// we return in this way that we definitely immediately run the thread
// again and don't GC or do something else.
//
#define RETURN_TO_SCHEDULER_NO_PAUSE(todo,retcode) \ #define RETURN_TO_SCHEDULER_NO_PAUSE(todo,retcode) \
SAVE_THREAD_STATE(); \ SAVE_THREAD_STATE(); \
cap->r.rCurrentTSO->what_next = (todo); \ cap->r.rCurrentTSO->what_next = (todo); \
......
...@@ -289,7 +289,9 @@ loop: ...@@ -289,7 +289,9 @@ loop:
recordClosureMutated(cap,(StgClosure*)bq); recordClosureMutated(cap,(StgClosure*)bq);
} }
debugTraceCap(DEBUG_sched, cap, "thread %d blocked on thread %d", debugTraceCap(DEBUG_sched, cap,
"thread %d blocked on existing BLOCKING_QUEUE "
"owned by thread %d",
(W_)msg->tso->id, (W_)owner->id); (W_)msg->tso->id, (W_)owner->id);
// See above, #3838 // See above, #3838
......
...@@ -1215,10 +1215,9 @@ scheduleHandleYield( Capability *cap, StgTSO *t, uint32_t prev_what_next ) ...@@ -1215,10 +1215,9 @@ scheduleHandleYield( Capability *cap, StgTSO *t, uint32_t prev_what_next )
ASSERT(t->_link == END_TSO_QUEUE); ASSERT(t->_link == END_TSO_QUEUE);
// Shortcut if we're just switching evaluators: don't bother // Shortcut if we're just switching evaluators: just run the thread. See
// doing stack squeezing (which can be expensive), just run the // Note [avoiding threadPaused] in Interpreter.c.
// thread. if (t->what_next != prev_what_next) {
if (cap->context_switch == 0 && t->what_next != prev_what_next) {
debugTrace(DEBUG_sched, debugTrace(DEBUG_sched,
"--<< thread %ld (%s) stopped to switch evaluators", "--<< thread %ld (%s) stopped to switch evaluators",
(long)t->id, what_next_strs[t->what_next]); (long)t->id, what_next_strs[t->what_next]);
......
...@@ -127,6 +127,9 @@ stg_returnToSched /* no args: explicit stack layout */ ...@@ -127,6 +127,9 @@ stg_returnToSched /* no args: explicit stack layout */
// current thread. This is used for switching from compiled execution to the // current thread. This is used for switching from compiled execution to the
// interpreter, where calling threadPaused() on every switch would be too // interpreter, where calling threadPaused() on every switch would be too
// expensive. // expensive.
//
// See Note [avoiding threadPaused] in Interpreter.c
//
stg_returnToSchedNotPaused /* no args: explicit stack layout */ stg_returnToSchedNotPaused /* no args: explicit stack layout */
{ {
SAVE_THREAD_STATE(); SAVE_THREAD_STATE();
......
...@@ -34,6 +34,7 @@ StgWord64 whitehole_spin = 0; ...@@ -34,6 +34,7 @@ StgWord64 whitehole_spin = 0;
#if defined(THREADED_RTS) && !defined(PARALLEL_GC) #if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p) #define evacuate(p) evacuate1(p)
#define evacuate_BLACKHOLE(p) evacuate_BLACKHOLE1(p)
#define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p) #define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p)
#endif #endif
...@@ -532,7 +533,6 @@ loop: ...@@ -532,7 +533,6 @@ loop:
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(q), "invalid closure, info=%p", q->header.info); ASSERTM(LOOKS_LIKE_CLOSURE_PTR(q), "invalid closure, info=%p", q->header.info);
if (!HEAP_ALLOCED_GC(q)) { if (!HEAP_ALLOCED_GC(q)) {
if (!major_gc) return; if (!major_gc) return;
info = get_itbl(q); info = get_itbl(q);
...@@ -872,6 +872,68 @@ loop: ...@@ -872,6 +872,68 @@ loop:
barf("evacuate"); barf("evacuate");
} }
/* -----------------------------------------------------------------------------
Evacuate a pointer that is guaranteed to point to a BLACKHOLE.
This is used for evacuating the updatee of an update frame on the stack. We
want to copy the blackhole even if it has been updated by another thread and
is now an indirection, because the original update frame still needs to
update it.
See also Note [upd-black-hole] in sm/Scav.c.
-------------------------------------------------------------------------- */
void
evacuate_BLACKHOLE(StgClosure **p)
{
bdescr *bd;
uint32_t gen_no;
StgClosure *q;
const StgInfoTable *info;
q = *p;
// closure is required to be a heap-allocated BLACKHOLE
ASSERT(HEAP_ALLOCED_GC(q));
ASSERT(GET_CLOSURE_TAG(q) == 0);
bd = Bdescr((P_)q);
// blackholes can't be in a compact, or large
ASSERT((bd->flags & (BF_COMPACT | BF_LARGE)) == 0);
if (bd->flags & BF_EVACUATED) {
if (bd->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
}
return;
}
if (bd->flags & BF_MARKED) {
if (!is_marked((P_)q,bd)) {
mark((P_)q,bd);
push_mark_stack((P_)q);
}
return;
}
gen_no = bd->dest_no;
info = q->header.info;
if (IS_FORWARDING_PTR(info))
{
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
*p = e;
if (gen_no < gct->evac_gen_no) { // optimisation
if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = true;
TICK_GC_FAILED_PROMOTION();
}
}
return;
}
ASSERT(INFO_PTR_TO_STRUCT(info)->type == BLACKHOLE);
copy(p,info,q,sizeofW(StgInd),gen_no);
}
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
Evaluate a THUNK_SELECTOR if possible. Evaluate a THUNK_SELECTOR if possible.
......
...@@ -34,6 +34,9 @@ ...@@ -34,6 +34,9 @@
REGPARM1 void evacuate (StgClosure **p); REGPARM1 void evacuate (StgClosure **p);
REGPARM1 void evacuate1 (StgClosure **p); REGPARM1 void evacuate1 (StgClosure **p);
void evacuate_BLACKHOLE(StgClosure **p);
void evacuate_BLACKHOLE1(StgClosure **p);
extern W_ thunk_selector_depth; extern W_ thunk_selector_depth;
#include "EndPrivate.h" #include "EndPrivate.h"
...@@ -39,6 +39,7 @@ static void scavenge_large_bitmap (StgPtr p, ...@@ -39,6 +39,7 @@ static void scavenge_large_bitmap (StgPtr p,
#if defined(THREADED_RTS) && !defined(PARALLEL_GC) #if defined(THREADED_RTS) && !defined(PARALLEL_GC)
# define evacuate(a) evacuate1(a) # define evacuate(a) evacuate1(a)
# define evacuate_BLACKHOLE(a) evacuate_BLACKHOLE1(a)
# define scavenge_loop(a) scavenge_loop1(a) # define scavenge_loop(a) scavenge_loop1(a)
# define scavenge_block(a) scavenge_block1(a) # define scavenge_block(a) scavenge_block1(a)
# define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g) # define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g)
...@@ -1891,6 +1892,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end) ...@@ -1891,6 +1892,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
case UPDATE_FRAME: case UPDATE_FRAME:
// Note [upd-black-hole] // Note [upd-black-hole]
//
// In SMP, we can get update frames that point to indirections // In SMP, we can get update frames that point to indirections
// when two threads evaluate the same thunk. We do attempt to // when two threads evaluate the same thunk. We do attempt to
// discover this situation in threadPaused(), but it's // discover this situation in threadPaused(), but it's
...@@ -1906,35 +1908,57 @@ scavenge_stack(StgPtr p, StgPtr stack_end) ...@@ -1906,35 +1908,57 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
// Now T is an indirection, and the update frame is already // Now T is an indirection, and the update frame is already
// marked on A's stack, so we won't traverse it again in // marked on A's stack, so we won't traverse it again in
// threadPaused(). We could traverse the whole stack again // threadPaused(). We could traverse the whole stack again
// before GC, but that seems like overkill. // before GC, but that would be too expensive.
// //
// Scavenging this update frame as normal would be disastrous; // Scavenging this update frame as normal would be disastrous;
// the updatee would end up pointing to the value. So we // the indirection will be shorted out, and the updatee would
// check whether the value after evacuation is a BLACKHOLE, // end up pointing to the value. The update code will then
// and if not, we change the update frame to an stg_enter // overwrite the value, instead of the BLACKHOLE it is
// frame that simply returns the value. Hence, blackholing is // expecting to write to.
// compulsory (otherwise we would have to check for thunks //
// too). // One way we could try to fix this is to detect when the
// BLACKHOLE has been updated by another thread, and then
// replace this update frame with a special frame that just
// enters the value. But this introduces some other
// complexities:
//
// - we must be careful to call checkBlockingQueues() in this
// special frame, because we might otherwise miss wakeups
// for threads that blocked on the original BLACKHOLE,
// - we must spot this frame when we're stripping the stack in
// raiseAsync() and raiseExceptionHelper(), and arrange to call
// checkBlockingQueues() there too.
//
// This is hard to get right, indeed we previously got it
// wrong (see #13751). So we now take a different approach:
// always copy the BLACKHOLE, even if it is actually an
// indirection. This way we keep the update frame, we're
// guaranteed to still perform the update, and check for
// missed wakeups even when stripping the stack in
// raiseAsync() and raiseExceptionHelper(). This is also a
// little more efficient, because evacuating a known BLACKHOLE
// is faster than evacuating an unknown closure.
//
// NOTE: for the reasons above, blackholing (either lazy or
// eager) is NOT optional. See also Note [avoiding
// threadPaused] in Interpreter.c.
// //
// One slight hiccup is that the THUNK_SELECTOR machinery can // There are a couple of alternative solutions:
// overwrite the updatee with an IND. In parallel GC, this // - if we see an update frame that points to an indirection,
// could even be happening concurrently, so we can't check for // arrange to call checkBlockingQueues() on that thread
// the IND. Fortunately if we assume that blackholing is // after GC.
// happening (either lazy or eager), then we can be sure that // - spot a BLOCKING_QUEUE that points to a value and
// the updatee is never a THUNK_SELECTOR and we're ok. // arrange to wake it up after the GC.
// NB. this is a new invariant: blackholing is not optional. //
// These are more difficult to implement, requiring an extra
// list to be maintained during GC. They also rely on more
// subtle invariants than the solution implemented here.
//
{ {
StgUpdateFrame *frame = (StgUpdateFrame *)p; StgUpdateFrame *frame = (StgUpdateFrame *)p;
StgClosure *v;
evacuate(&frame->updatee); evacuate_BLACKHOLE(&frame->updatee);
v = frame->updatee;
if (GET_CLOSURE_TAG(v) != 0 ||
(get_itbl(v)->type != BLACKHOLE)) {
// blackholing is compulsory, see above.
frame->header.info = (const StgInfoTable*)&stg_enter_checkbh_info;
}
ASSERT(v->header.info != &stg_TSO_info);
p += sizeofW(StgUpdateFrame); p += sizeofW(StgUpdateFrame);
continue; continue;
} }
......
...@@ -266,6 +266,7 @@ test('hs_try_putmvar001', ...@@ -266,6 +266,7 @@ test('hs_try_putmvar001',
# This one should work for both threaded and non-threaded RTS # This one should work for both threaded and non-threaded RTS
test('hs_try_putmvar002', test('hs_try_putmvar002',
[pre_cmd('$MAKE -s --no-print-directory hs_try_putmvar002_setup'), [pre_cmd('$MAKE -s --no-print-directory hs_try_putmvar002_setup'),
omit_ways(['ghci']),
extra_run_opts('1 8 10000')], extra_run_opts('1 8 10000')],
compile_and_run, ['hs_try_putmvar002_c.c']) compile_and_run, ['hs_try_putmvar002_c.c'])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment