From c1c0985416a6f9766c03d361449f556905bf8e1d Mon Sep 17 00:00:00 2001 From: Ben Gamari <bgamari.foss@gmail.com> Date: Mon, 3 Jul 2017 19:10:07 -0400 Subject: [PATCH] Eagerly blackhole AP_STACKs This fixes #13615. See the rather lengthy Note [AP_STACKs must be eagerly blackholed] for details. Reviewers: simonmar, austin, erikd, dfeuer Subscribers: duog, dfeuer, hsyl20, rwbarton, thomie GHC Trac Issues: #13615 Differential Revision: https://phabricator.haskell.org/D3695 (cherry picked from commit fd7a7a6363d8dde1813bc23cb4ef00ebb70a49c0) --- rts/Apply.cmm | 180 +++++++++++++++++++++++++++++++++++++++++++++ rts/RaiseAsync.c | 1 + rts/ThreadPaused.c | 16 +++- 3 files changed, 193 insertions(+), 4 deletions(-) diff --git a/rts/Apply.cmm b/rts/Apply.cmm index 4c34f0f3d84d..ceeddde6ef1e 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -454,6 +454,172 @@ for: directly to a function, so we have to enter it using stg_ap_0. -------------------------------------------------------------------------- */ +/* + Note [AP_STACKs must be eagerly blackholed] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Trac #13615 describes a nasty concurrency issue where we can enter into the +middle of an ST action multiple times, resulting in duplication of effects. +In short, the construction of an AP_STACK allows us to suspend a computation +which should not be duplicated. When running with lazy blackholing, we can then +enter this AP_STACK multiple times, duplicating the computation with potentially +disasterous consequences. + +For instance, consider the case of a simple ST program which computes a sum +using in─place mutation, + + inplaceSum :: Num a => [a] ─> a + inplaceSum xs0 = runST $ do + y <─ newSTRef 0 + let go [] = readSTRef y + go (x : xs) = do + modifySTRef y (+x) + go xs + go xs0 + +Of course, it is fine if we enter an inplaceSum thunk more than once: the two +threads will inhabit different worlds with different STRefs. However, if we +suspend some part of inplaceSum (for instance, due to the heap check at the +beginning of go) and then multiple threads resume that suspension (as is safe in +pure computation) we will have multiple threads concurrently mutating the same +STRef. Disaster! + +Let's consider how this might happen: Consider this situation, + + ┌─────────┠┌───────┠┌───────┠┌─────────┠+ │ TSO 1 │ â•────→│ go │ │ fun │ │ TSO 2 │ + └─────────┘ │ └───────┘ └───────┘ └─────────┘ + │ │ + ┌─────────┠│ │ ┌─────────┠+ │ │──────╯ ╰──────────────│ │ + ├─────────┤ ┌─────────┠├─────────┤ + │ UPDATE_ │──────────→│ THUNK A │ â•────────│ UPDATE_ │ + │ FRAME │ updatee └─────────┘ │updatee │ FRAME │ + ├─────────┤ │ ├─────────┤ + │ ... │ │ │ etc. │ + ├─────────┤ updatee ┌─────────┠│ + │ UPDATE_ │─────────────────────→│ THUNK B │â†â”€â”€â”€â•¯ + │ FRAME │ └─────────┘ + ├─────────┤ + │ etc. │ + +Here we have two threads (TSO 1 and TSO 2) which are in currently pausing (e.g. +in threadPaused). Since they are pausing, their stacks are headed by a pointer +to the continuation code which we will run on resumption (go and fun, +respectively). We also see that there are two thunks on the heap: THUNK A and +THUNK B where THUNK A is reachable from THUNK B (for instance, as an argument or +free variable). We see that thread 1 has THUNK A under evaluation, and both +threads have THUNK B under evaluation. + +As each thread enters threadPaused, threadPaused will walk its stack looking for +duplicate computation (see Note [suspend duplicate work], although there is some +background described below as well). Let's consider what this check does: + +Say that TSO 2 begins this check first. The check will walk TSO 2's stack, until +it finds the first update frame, which updates THUNK B. Upon finding this frame, +it will try to lock THUNK B, replacing it with a BLACKHOLE owned by its TSO. We +now have, + + ┌─────────┠┌───────┠┌───────┠┌─────────┠+ │ TSO 1 │ â•────→│ go │ │ fun │ â•────────→│ TSO 2 │ + └─────────┘ │ └───────┘ └───────┘ │ └─────────┘ + │ ↑ â•─────╯ + ┌─────────┠│ │ │ ┌─────────┠+ │ │──────╯ ╰─────────────────│ │ + ├─────────┤ updatee ┌─────────┠│ ├─────────┤ + │ UPDATE_ │──────────→│ THUNK A │ │ â•──────────│ UPDATE_ │ + │ FRAME │ └─────────┘ │ │ updatee │ FRAME │ + ├─────────┤ │ │ ├─────────┤ + │ ... │ owner│ │ │ etc. │ + ├─────────┤ updatee ┌────────────┠│ + │ UPDATE_ │──────────────────→│ BLACKHOLE │â†â”€â•¯ + │ FRAME │ └────────────┘ + ├─────────┤ + │ etc. │ + +Now consider what happens when TSO 1 runs its duplicate-computation check. +Again, we start walking the stack from the top, where we we find the update +frame updating THUNK A. We will lock this thunk, replacing it with a BLACKHOLE +owned by its TSO. We now have, + + ┌─────────┠┌───────┠┌───────┠┌─────────┠+ │ TSO 1 │â†â”€â”€â•® â•────→│ go │ │ fun │ â•────────→│ TSO 2 │ + └─────────┘ │ │ └───────┘ └───────┘ │ └─────────┘ + │ │ ↑ â•─────╯ + ┌─────────┠╰──│─────────╮ │ │ ┌─────────┠+ │ │──────╯ │owner ╰─────────────────│ │ + ├─────────┤ ┌───────────┠│ ├─────────┤ + │ UPDATE_ │──────────→│ BLACKHOLE │ │ â•──────────│ UPDATE_ │ + │ FRAME │ updatee └───────────┘ │ │ updatee │ FRAME │ + ├─────────┤ │ │ ├─────────┤ + │ ... │ owner│ │ │ etc. │ + ├─────────┤ updatee ┌────────────┠│ + │ UPDATE_ │──────────────────→│ BLACKHOLE │â†â”€â•¯ + │ FRAME │ └────────────┘ + ├─────────┤ + │ etc. │ + +Now we will continue walking down TSO 1's stack, next coming across the second +update frame, pointing to the now-BLACKHOLE'd THUNK B. At this point +threadPaused will correctly conclude that TSO 1 is duplicating a computation +being carried out by TSO 2 and attempt to suspend it. + +The suspension process proceeds by invoking raiseAsync, which walks the stack +from the top looking for update frames. For each update frame we take any stack +frames preceeding it and construct an AP_STACK heap object from them. We then +replace the updatee of the frame with an indirection pointing to the AP_STACK. +So, after suspending the first update frame we have, + + ┌─────────┠┌───────┠┌───────┠┌─────────┠+ │ TSO 1 │ â•────────→│ go │â†â”€â•® │ fun │ â•───────→│ TSO 2 │ + └─────────┘ │ └───────┘ │ └───────┘ │ └─────────┘ + │ ┌───────────┠│ ↑ â•─────╯ + ┌─────────┠│ │ AP_STACK │ │ │ │ ┌─────────┠+ │ │──╯ ├───────────┤ │ ╰────────────────│ │ + ├─────────┤ │ │─╯ │ ├─────────┤ + │ UPDATE_ │───────╮ └───────────┘ │ â•──────────│ UPDATE_ │ + │ FRAME │updatee│ ↑ │ │ updatee │ FRAME │ + ├─────────┤ │ │indirectee │ │ ├─────────┤ + │ ... │ ╰→┌───────────┠│ │ │ etc. │ + ├─────────┤updatee │ BLACKHOLE │ │ │ + │ UPDATE_ │──╮ └───────────┘ owner│ │ + │ FRAME │ │ ┌────────────┠│ + ├─────────┤ ╰───────────────→│ BLACKHOLE │â†â”€â•¯ + │ etc. │ └────────────┘ + +Finally, we will replace the second update frame with a blackhole so that TSO 1 +will block on TSO 2's computation of THUNK B, + + ┌─────────┠┌───────┠┌───────┠┌─────────┠+ │ TSO 1 │ â•────────→│ go │â†â”€â•® │ fun │ â•───────→│ TSO 2 │ + └─────────┘ │ └───────┘ │ └───────┘ │ └─────────┘ + │ ┌───────────┠│ ↑ â•─────╯ + ┌─────────┠│ │ AP_STACK │ │ │ │ ┌─────────┠+ │ │──╯ ├───────────┤ │ ╰────────────────│ │ + ├─────────┤ │ │─╯ │ ├─────────┤ + │ UPDATE_ │───────╮ └───────────┘ │ â•──────────│ UPDATE_ │ + │ FRAME │updatee│ ↑ │ │ updatee │ FRAME │ + ├─────────┤ │ │indirectee │ │ ├─────────┤ + │ ... │ ╰→┌───────────┠│ │ │ etc. │ + ├─────────┤ │ BLACKHOLE │ │ │ + │ BLACK │ └───────────┘ owner│ │ + │ HOLE │───────────╮ ┌────────────┠│ + ├─────────┤indirectee ╰──────→│ BLACKHOLE │â†â”€â•¯ + │ etc. │ └────────────┘ + +At first glance there's still nothing terribly alarming here. However, consider +what would happen if some other closure held a reference to THUNK A. We would +now have leaked an AP_STACK capturing the state of a potentially +non-duplicatable computation to heap. Even worse, if two threads had references +to THUNK A and both attempted to enter at the same time, they would both succeed +if we allowed AP_STACKs to be lazily blackholed. This is the reason why we must +be very careful when entering AP_STACKS: they introduce the possibility that we +duplicate a computation which could never otherwise be duplicated. + +For this reason we employ an atomic blackholing strategy when entering AP_STACK +closures. + */ + + INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") /* no args => explicit stack */ { @@ -477,6 +643,20 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1); Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words); + /* + * It is imperative that we blackhole lest we may duplicate computation which + * must not be duplicated. See Note [AP_STACKs must be eagerly blackholed]. + */ + W_ old_info; + (old_info) = prim %cmpxchgW(ap, stg_AP_STACK_info, stg_WHITEHOLE_info); + if (old_info != stg_AP_STACK_info) { + /* someone else beat us to it */ + jump ENTRY_LBL(stg_WHITEHOLE) (ap); + } + StgInd_indirectee(ap) = CurrentTSO; + prim_write_barrier; + SET_INFO(ap, __stg_EAGER_BLACKHOLE_info); + TICK_ENT_AP(); LDV_ENTER(ap); ENTER_CCS_THUNK(ap); diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index b399d4167587..69d28e03cd0a 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -872,6 +872,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, ap->size = words; ap->fun = (StgClosure *)sp[0]; + sp++; for(i=0; i < words; ++i) { ap->payload[i] = (StgClosure *)*sp++; diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index c270e69a51e6..7af45d25631c 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -275,10 +275,12 @@ threadPaused(Capability *cap, StgTSO *tso) // deadlocked on itself. See #5226 for an instance of // this bug. // - if ((bh_info == &stg_WHITEHOLE_info || - bh_info == &stg_BLACKHOLE_info) - && - ((StgInd*)bh)->indirectee != (StgClosure*)tso) + // Note that great care is required when entering computations + // suspended by this mechanism. See Note [AP_STACKs must be eagerly + // blackholed] for details. + if (((bh_info == &stg_BLACKHOLE_info) + && ((StgInd*)bh)->indirectee != (StgClosure*)tso) + || (bh_info == &stg_WHITEHOLE_info)) { debugTrace(DEBUG_squeeze, "suspending duplicate work: %ld words of stack", @@ -304,6 +306,12 @@ threadPaused(Capability *cap, StgTSO *tso) continue; } + // We should never have made it here in the event of blackholes that + // we already own; they should have been marked when we blackholed + // them and consequently we should have stopped our stack walk + // above. + ASSERT(!((bh_info == &stg_BLACKHOLE_info) + && (((StgInd*)bh)->indirectee == (StgClosure*)tso))); // zero out the slop so that the sanity checker can tell // where the next closure is. -- GitLab