Commit c5cafbcc authored by Simon Marlow's avatar Simon Marlow

Fix #3429: a tricky race condition

There were two bugs, and had it not been for the first one we would
not have noticed the second one, so this is quite fortunate.

The first bug is in stg_unblockAsyncExceptionszh_ret, when we found a
pending exception to raise, but don't end up raising it, there was a
missing adjustment to the stack pointer.  

The second bug was that this case was actually happening at all: it
ought to be incredibly rare, because the pending exception thread
would have to be killed between us finding it and attempting to raise
the exception.  This made me suspicious.  It turned out that there was
a race condition on the tso->flags field; multiple threads were
updating this bitmask field non-atomically (one of the bits is the
dirty-bit for the generational GC).  The fix is to move the dirty bit
into its own field of the TSO, making the TSO one word larger (sadly).
parent 0dffc568
......@@ -293,6 +293,7 @@ main(int argc, char *argv[])
closure_field(StgTSO, saved_errno);
closure_field(StgTSO, trec);
closure_field(StgTSO, flags);
closure_field(StgTSO, dirty);
closure_field_("StgTSO_CCCS", StgTSO, prof.CCCS);
tso_field(StgTSO, sp);
tso_field_offset(StgTSO, stack);
......
......@@ -230,19 +230,7 @@
/*
* Flags for the tso->flags field.
*
* The TSO_DIRTY flag indicates that this TSO's stack should be
* scanned during garbage collection. The link field of a TSO is
* always scanned, so we don't have to dirty a TSO just for linking
* it on a different list.
*
* TSO_DIRTY is set by
* - schedule(), just before running a thread,
* - raiseAsync(), because it modifies a thread's stack
* - resumeThread(), just before running the thread again
* and unset by the garbage collector (only).
*/
#define TSO_DIRTY 1
/*
* TSO_LOCKED is set when a TSO is locked to a particular Capability.
......
......@@ -25,7 +25,6 @@ typedef struct {
*/
typedef StgWord32 StgThreadID;
#define tsoDirty(tso) ((tso)->flags & TSO_DIRTY)
#define tsoLocked(tso) ((tso)->flags & TSO_LOCKED)
/*
......@@ -90,6 +89,25 @@ typedef struct StgTSO_ {
struct StgTSO_* global_link; /* Links all threads together */
StgWord dirty; /* non-zero => dirty */
/*
* The tso->dirty flag indicates that this TSO's stack should be
* scanned during garbage collection. It also indicates that this
* TSO is on the mutable list.
*
* NB. The dirty flag gets a word to itself, so that it can be set
* safely by multiple threads simultaneously (the flags field is
* not safe for this purpose; see #3429). It is harmless for the
* TSO to be on the mutable list multiple times.
*
* tso->dirty is set by dirty_TSO(), and unset by the garbage
* collector (only).
*
* The link field has a separate dirty bit of its own, namely the
* bit TSO_LINK_DIRTY in the tso->flags field, set by
* setTSOLink().
*/
StgWord16 what_next; /* Values defined in Constants.h */
StgWord16 why_blocked; /* Values defined in Constants.h */
StgWord32 flags;
......
......@@ -49,6 +49,8 @@ import ghczmprim_GHCziBool_True_closure;
-------------------------------------------------------------------------- */
STRING(stg_unblockAsync_err_str, "unblockAsyncExceptions#_ret")
INFO_TABLE_RET( stg_unblockAsyncExceptionszh_ret, RET_SMALL )
{
CInt r;
......@@ -81,6 +83,14 @@ INFO_TABLE_RET( stg_unblockAsyncExceptionszh_ret, RET_SMALL )
jump %ENTRY_CODE(Sp(0));
}
}
else {
/*
the thread might have been removed from the
blocked_exception list by someone else in the meantime.
Just restore the stack pointer and continue.
*/
Sp_adj(2);
}
}
Sp_adj(1);
......
......@@ -1173,7 +1173,7 @@ stg_takeMVarzh
tso = StgMVar_head(mvar);
PerformPut(tso,StgMVar_value(mvar));
if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
if (TO_W_(StgTSO_dirty(tso)) == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
......@@ -1249,7 +1249,7 @@ stg_tryTakeMVarzh
/* actually perform the putMVar for the thread that we just woke up */
tso = StgMVar_head(mvar);
PerformPut(tso,StgMVar_value(mvar));
if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
if (TO_W_(StgTSO_dirty(tso)) == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
......@@ -1329,7 +1329,7 @@ stg_putMVarzh
/* actually perform the takeMVar */
tso = StgMVar_head(mvar);
PerformTake(tso, val);
if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
if (TO_W_(StgTSO_dirty(tso)) == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
......@@ -1398,7 +1398,7 @@ stg_tryPutMVarzh
/* actually perform the takeMVar */
tso = StgMVar_head(mvar);
PerformTake(tso, R2);
if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
if (TO_W_(StgTSO_dirty(tso)) == 0) {
foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
}
......
......@@ -581,7 +581,7 @@ checkGlobalTSOList (rtsBool checkTSOs)
// If this TSO is dirty and in an old generation, it better
// be on the mutable list.
if (tso->what_next == ThreadRelocated) continue;
if (tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) {
if (tso->dirty || (tso->flags & TSO_LINK_DIRTY)) {
ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED));
tso->flags &= ~TSO_MARKED;
}
......
......@@ -75,7 +75,8 @@ createThread(Capability *cap, nat size)
tso->why_blocked = NotBlocked;
tso->blocked_exceptions = END_TSO_QUEUE;
tso->flags = TSO_DIRTY;
tso->flags = 0;
tso->dirty = 1;
tso->saved_errno = 0;
tso->bound = NULL;
......@@ -377,7 +378,7 @@ printThreadStatus(StgTSO *t)
default:
printThreadBlockage(t);
}
if (t->flags & TSO_DIRTY) {
if (t->dirty) {
debugBelch(" (TSO_DIRTY)");
} else if (t->flags & TSO_LINK_DIRTY) {
debugBelch(" (TSO_LINK_DIRTY)");
......
......@@ -89,10 +89,10 @@ scavengeTSO (StgTSO *tso)
scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
if (gct->failed_to_evac) {
tso->flags |= TSO_DIRTY;
tso->dirty = 1;
scavenge_TSO_link(tso);
} else {
tso->flags &= ~TSO_DIRTY;
tso->dirty = 0;
scavenge_TSO_link(tso);
if (gct->failed_to_evac) {
tso->flags |= TSO_LINK_DIRTY;
......@@ -1454,7 +1454,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
continue;
case TSO: {
StgTSO *tso = (StgTSO *)p;
if ((tso->flags & TSO_DIRTY) == 0) {
if (tso->dirty == 0) {
// Must be on the mutable list because its link
// field is dirty.
ASSERT(tso->flags & TSO_LINK_DIRTY);
......
......@@ -850,7 +850,7 @@ void
setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
{
bdescr *bd;
if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
tso->flags |= TSO_LINK_DIRTY;
bd = Bdescr((StgPtr)tso);
if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
......@@ -862,11 +862,11 @@ void
dirty_TSO (Capability *cap, StgTSO *tso)
{
bdescr *bd;
if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
bd = Bdescr((StgPtr)tso);
if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
}
tso->flags |= TSO_DIRTY;
tso->dirty = 1;
}
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment