runInBoundThread is uninterruptible, this should either be fixed or documented better
Test.hs
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Main where
import Control.Monad
import Control.Exception
import Control.Concurrent
import Control.Concurrent.STM
import Control.Concurrent.Async
import System.IO
runInBoundThread' action = withAsyncBound action wait
type MThread = MVar ThreadId
acquire = putMVar
release = takeMVar
newLock = newEmptyMVar
putFlush x = putStrLn x >> hFlush stdout
withException :: Exception e => IO a -> (e -> IO b) -> IO a
withException io what = io `catch` \e -> what e >> throwIO e
testLock :: MThread -> IO MThread
testLock env = mask_ $ do
s <- getMaskingState
t <- myThreadId
putStrLn $ "---- testLock: locking: " <> show s <> " by thread " <> show t
-- we observe thread getting stuck here, because the lock was already
-- taken by "closeEnv" in the main thread in its cleanup function. lockEnv in
-- the next line then blocks.
lockEnv env `onException` (putFlush "testLock: lock failed")
putFlush "---- testLock: locked"
pure env
lockEnv :: MThread -> IO ()
lockEnv env = do
tid <- myThreadId
acquire env tid
unlockEnv :: MThread -> IO ()
unlockEnv env = mask_ $ do
self <- myThreadId
mask_ $ do
owner <- release env
unless (self == owner) $ do
acquire env owner -- oops!
error "calling thread does not own the lock!"
testLockUnlock :: MThread -> IO ()
testLockUnlock env = do
putFlush ("-- testLockUnlock opening")
test `finally` (putFlush "-- testLockUnlock ended")
where
test = runInBoundThread $ bracket (testLock env) unlockEnv $ \_ -> do
putFlush "---- testLockUnlock inside tx 0"
threadDelay 1000000
putFlush "---- testLockUnlock inside tx 1"
openEnv :: IO MThread
openEnv = do
putFlush "openEnv starting, creating lock"
newLock
closeEnv :: MThread -> IO ()
closeEnv env = do
lockEnv env
putFlush "closeEnv finished, lock locked"
main :: IO ()
main = do
children <- newTVarIO []
let intercept (e :: SomeException) = do
putFlush $ "main: got exception: " <> (show e)
chh <- readTVarIO children
putFlush $ "main: will cancel " <> (show $ length chh) <> " children"
forM chh $ \ch -> do
putFlush $ "main: cancelling thread " <> (show (asyncThreadId ch))
uninterruptibleMask_ (cancel ch)
let realMain = do
bracket openEnv closeEnv $ \db -> do
a <- asyncLog "main" async $ do
forever $ testLockUnlock db
atomically $ modifyTVar' children (a :)
forever $ threadDelay 5000000
realMain
`withException` intercept
`finally` (putFlush "main: wait 10s then will exit... " >> threadDelay 10000000)
asyncLog :: String -> (forall x. IO x -> IO (Async x)) -> IO x -> IO (Async x)
asyncLog name fork action = do
a <- async action
link a
putFlush $ "async: spawned thread " <> name <> " as " <> show (asyncThreadId a)
pure a
The test code above (click to expand):
- spawns a child thread, which repeatedly acquires and releases a lock inside
runInBoundThread
(which itself spawns a thread each time it is called) - the main thread waits for 50 seconds. during this time, if an exception is thrown it will cancel the child thread.
There are two problems with this:
-
runInBoundThread
never communicates with its own spawned-worker-thread, e.g. to pass on received asynchronous exceptions. - While it is waiting for the own spawned-worker-thread (via
forkOS_entry_reimported
) it is uninterruptible and does not receive asynchronous exceptions. This causes the test code to hang, when the cleanup function tries to cancel the child thread.
Both of these problems are fixed in the test code by using runInBoundThread'
instead (which is defined as flip withAsyncBound wait
). However I think the GHC base library should fix this anti-pattern itself - either by documenting this behaviour and recommended people not use it, or by fixing it - i.e. making runInBoundThread
behave similarly to runInBoundThread'
.
"In the real world" this causes issues like https://github.com/dmbarbour/haskell-lmdb/issues/4 and https://github.com/verement/lmdb-simple/issues/6.