runInBoundThread is uninterruptible, this should either be fixed or documented better

Test.hs

{-# LANGUAGE RankNTypes          #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Main where

import Control.Monad
import Control.Exception

import Control.Concurrent
import Control.Concurrent.STM
import Control.Concurrent.Async

import System.IO

runInBoundThread' action = withAsyncBound action wait

type MThread = MVar ThreadId
acquire = putMVar
release = takeMVar
newLock = newEmptyMVar

putFlush x = putStrLn x >> hFlush stdout

withException :: Exception e => IO a -> (e -> IO b) -> IO a
withException io what = io `catch` \e -> what e >> throwIO e

testLock :: MThread -> IO MThread
testLock env = mask_ $ do
  s <- getMaskingState
  t <- myThreadId
  putStrLn $ "---- testLock: locking: " <> show s <> " by thread " <> show t
  -- we observe thread getting stuck here, because the lock was already
  -- taken by "closeEnv" in the main thread in its cleanup function. lockEnv in
  -- the next line then blocks.
  lockEnv env `onException` (putFlush "testLock: lock failed")
  putFlush "---- testLock: locked"
  pure env

lockEnv :: MThread -> IO ()
lockEnv env = do
  tid <- myThreadId
  acquire env tid

unlockEnv :: MThread -> IO ()
unlockEnv env = mask_ $ do
  self <- myThreadId
  mask_ $ do
    owner <- release env
    unless (self == owner) $ do
      acquire env owner -- oops!
      error "calling thread does not own the lock!"

testLockUnlock :: MThread -> IO ()
testLockUnlock env = do
  putFlush ("-- testLockUnlock opening")
  test `finally` (putFlush "-- testLockUnlock ended")
 where
  test = runInBoundThread $ bracket (testLock env) unlockEnv $ \_ -> do
    putFlush "---- testLockUnlock inside tx 0"
    threadDelay 1000000
    putFlush "---- testLockUnlock inside tx 1"

openEnv :: IO MThread
openEnv = do
  putFlush "openEnv starting, creating lock"
  newLock

closeEnv :: MThread -> IO ()
closeEnv env = do
  lockEnv env
  putFlush "closeEnv finished, lock locked"

main :: IO ()
main = do
  children <- newTVarIO []

  let intercept (e :: SomeException) = do
        putFlush $ "main: got exception: " <> (show e)
        chh <- readTVarIO children
        putFlush $ "main: will cancel " <> (show $ length chh) <> " children"
        forM chh $ \ch -> do
          putFlush $ "main: cancelling thread " <> (show (asyncThreadId ch))
          uninterruptibleMask_ (cancel ch)

  let realMain = do
        bracket openEnv closeEnv $ \db -> do
          a <- asyncLog "main" async $ do
            forever $ testLockUnlock db
          atomically $ modifyTVar' children (a :)
          forever $ threadDelay 5000000

  realMain
    `withException` intercept
    `finally`       (putFlush "main: wait 10s then will exit... " >> threadDelay 10000000)

asyncLog :: String -> (forall x. IO x -> IO (Async x)) -> IO x -> IO (Async x)
asyncLog name fork action = do
  a <- async action
  link a
  putFlush $ "async: spawned thread " <> name <> " as " <> show (asyncThreadId a)
  pure a

The test code above (click to expand):

spawns a child thread, which repeatedly acquires and releases a lock inside runInBoundThread (which itself spawns a thread each time it is called)
the main thread waits for 50 seconds. during this time, if an exception is thrown it will cancel the child thread.

There are two problems with this:

runInBoundThread never communicates with its own spawned-worker-thread, e.g. to pass on received asynchronous exceptions.
While it is waiting for the own spawned-worker-thread (via forkOS_entry_reimported) it is uninterruptible and does not receive asynchronous exceptions. This causes the test code to hang, when the cleanup function tries to cancel the child thread.

Both of these problems are fixed in the test code by using runInBoundThread' instead (which is defined as flip withAsyncBound wait). However I think the GHC base library should fix this anti-pattern itself - either by documenting this behaviour and recommended people not use it, or by fixing it - i.e. making runInBoundThread behave similarly to runInBoundThread'.

"In the real world" this causes issues like https://github.com/dmbarbour/haskell-lmdb/issues/4 and https://github.com/verement/lmdb-simple/issues/6.

Edited Jun 18, 2019 by infinity0

To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information