diff --git a/libraries/ghc-internal/ghc-internal.cabal b/libraries/ghc-internal/ghc-internal.cabal
index ec3b46c8939cd6d84154d733c812a35b378ee2b2..ae16f7c4ed5f07dd6c0bd8571d7fd22b8cea112d 100644
--- a/libraries/ghc-internal/ghc-internal.cabal
+++ b/libraries/ghc-internal/ghc-internal.cabal
@@ -445,6 +445,17 @@ Library
             GHC.JS.Prim.Internal.Build
             GHC.JS.Foreign.Callback
 
+    if arch(wasm32)
+        exposed-modules:
+            GHC.Wasm.Prim
+        other-modules:
+            GHC.Wasm.Prim.Conc
+            GHC.Wasm.Prim.Conc.Internal
+            GHC.Wasm.Prim.Exports
+            GHC.Wasm.Prim.Flag
+            GHC.Wasm.Prim.Imports
+            GHC.Wasm.Prim.Types
+
     -- We need to set the unit id to ghc-internal (without a version number)
     -- as it's magic.
     ghc-options: -this-unit-id ghc-internal
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim.hs
new file mode 100644
index 0000000000000000000000000000000000000000..bba5330161bb216fe005ea19786cace25616539f
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim.hs
@@ -0,0 +1,23 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module GHC.Wasm.Prim (
+  -- User-facing JSVal type and freeJSVal
+  JSVal (..),
+  freeJSVal,
+
+  -- The JSString type and conversion from/to Haskell String
+  JSString (..),
+  fromJSString,
+  toJSString,
+
+  -- Exception types related to JSFFI
+  JSException (..),
+  WouldBlockException (..),
+  PromisePendingException (..),
+
+  -- Is JSFFI used in the current wasm module?
+  isJSFFIUsed
+) where
+
+import GHC.Wasm.Prim.Flag
+import GHC.Wasm.Prim.Types
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim/Conc.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim/Conc.hs
new file mode 100644
index 0000000000000000000000000000000000000000..8496847d3495ed35a10a10b864abf1259a25c5b9
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim/Conc.hs
@@ -0,0 +1,68 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module GHC.Wasm.Prim.Conc (
+  threadDelay
+) where
+
+import GHC.Base
+import GHC.IO.Unsafe
+import GHC.Stable
+
+{-
+
+Note [threadDelay on wasm]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When you compile Haskell to wasm32-wasi and use wasmtime to run it,
+threadDelay works, because the poll_oneoff syscall is properly
+implemented, so the awaitEvent() logic in the single-threaded RTS
+works. However, given constraints of the browser environment, wasi
+implementations in pure JavaScript tend to not implement poll_oneoff
+at all, so we can't pretend the old posix style polling works at all.
+
+Since we can do JSFFI async imports now, it's super easy to implement
+threadDelay using setTimeout() in JavaScript, and an implementation
+has indeed been added in GHC.Wasm.Prim.Conc.Internal. But how do we
+make the user-facing GHC.Conc.IO.threadDelay switch to it in browser
+environments and fall back to the stg_delay# implementation otherwise?
+
+A first step is the isJSFFIUsed magic boolean which fetches a C
+boolean global variable. If JSFFI is used, then JSFFI.o would be
+included by wasm-ld, and the ctor there will handle RTS initialization
+as well as setting that boolean to true. This all takes place before
+actual Haskell evaluation, so you can use isJSFFIUsed in Haskell to
+observe whether the current project makes use of JSFFI (aka targets a
+JavaScript host like browsers) at all.
+
+However, merely referring to the right threadDelay implementation
+based on the isJSFFIUsed guard is not enough! wasm-ld will
+transitively include the version that uses JSFFI in the final module
+as well as a bunch of other JSFFI related stuff, so the resulting wasm
+module will now unconditionally contain non-wasi imports :( This
+violates our principle of "don't pay for JavaScript when you don't use
+it". So we need a bit of dependency injection here.
+
+We use a global stable pointer variable in C to point to the actual
+JSFFI-based threadDelay function closure. When JSFFI is not used, it
+defaults to NULL, but it's okay since it'll not be used at runtime.
+When JSFFI is used, the ctor in JSFFI.o will inject the right
+closure's stable pointer into that variable, which will be
+dereferenced here the first time threadDelay is called. This way, we
+can ensure the JSFFI based threadDelay is used in browsers, while not
+contaminating standalone wasm32-wasi modules with JSFFI stuff.
+
+And yes, it's safe to free the stable pointer here, once the function
+closure has been fetched. This allows the CAF to be garbage collected
+when user code no longer uses threadDelay.
+
+-}
+
+{-# OPAQUE threadDelay #-}
+threadDelay :: Int -> IO ()
+threadDelay = unsafeDupablePerformIO $ do
+  f <- deRefStablePtr rts_threadDelay_sp
+  freeStablePtr rts_threadDelay_sp
+  pure f
+
+foreign import ccall unsafe "rts_threadDelay_sp"
+  rts_threadDelay_sp :: StablePtr (Int -> IO ())
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim/Conc/Internal.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim/Conc/Internal.hs
new file mode 100644
index 0000000000000000000000000000000000000000..1697286276306da911c16049ad21f04a11392e2e
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim/Conc/Internal.hs
@@ -0,0 +1,16 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module GHC.Wasm.Prim.Conc.Internal (
+  threadDelay
+) where
+
+import GHC.Base
+import GHC.IO
+import GHC.Wasm.Prim.Imports ()
+
+foreign import javascript safe "new Promise(res => setTimeout(res, $1 / 1000))"
+  js_delay :: Int -> IO ()
+
+-- See Note [threadDelay on wasm] for details.
+threadDelay :: Int -> IO ()
+threadDelay t = evaluate =<< js_delay t
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim/Exports.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim/Exports.hs
new file mode 100644
index 0000000000000000000000000000000000000000..4b26b366495dee9a7a33739794485957b00ae0cb
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim/Exports.hs
@@ -0,0 +1,138 @@
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+module GHC.Wasm.Prim.Exports (
+  mkJSCallback,
+  runIO,
+  runNonIO,
+  js_promiseResolveUnit,
+  js_promiseResolveJSVal,
+  js_promiseResolveChar,
+  js_promiseResolveInt,
+  js_promiseResolveInt8,
+  js_promiseResolveInt16,
+  js_promiseResolveInt32,
+  js_promiseResolveInt64,
+  js_promiseResolveWord,
+  js_promiseResolveWord8,
+  js_promiseResolveWord16,
+  js_promiseResolveWord32,
+  js_promiseResolveWord64,
+  js_promiseResolvePtr,
+  js_promiseResolveFunPtr,
+  js_promiseResolveFloat,
+  js_promiseResolveDouble,
+  js_promiseResolveStablePtr,
+  js_promiseResolveBool,
+  js_promiseReject
+) where
+
+import GHC.Base
+import GHC.Exception.Type
+import GHC.Exts
+import GHC.IO
+import GHC.Int
+import GHC.Stable
+import GHC.TopHandler (flushStdHandles)
+import GHC.Wasm.Prim.Types
+import GHC.Word
+
+mkJSCallback :: (StablePtr a -> IO JSVal) -> a -> IO JSVal
+mkJSCallback adjustor f = do
+  sp@(StablePtr sp#) <- newStablePtr f
+  JSVal v w _ <- adjustor sp
+  let r = JSVal v w sp#
+  js_callback_register r sp
+  pure r
+
+foreign import javascript unsafe "__ghc_wasm_jsffi_finalization_registry.register($1, $2, $1)"
+  js_callback_register :: JSVal -> StablePtr a -> IO ()
+
+runIO :: (JSVal -> a -> IO ()) -> IO a -> IO JSVal
+runIO res m = do
+  p <- js_promiseWithResolvers
+  let topHandler :: SomeException -> IO ()
+      topHandler err = catch (realHandler err) topHandler
+      realHandler :: SomeException -> IO ()
+      realHandler (SomeException err) = do
+        let tmp@(JSString tmp_v) = toJSString $ displayException err
+        js_promiseReject p tmp
+        freeJSVal tmp_v
+  IO $ \s0 -> case fork# (unIO $ catch (res p =<< m) topHandler *> flushStdHandles) s0 of
+    (# s1, _ #) -> case stg_scheduler_loop# s1 of
+      (# s2, _ #) -> (# s2, p #)
+
+runNonIO :: (JSVal -> a -> IO ()) -> a -> IO JSVal
+runNonIO res a = runIO res $ pure a
+
+foreign import javascript unsafe "let res, rej; const p = new Promise((resolve, reject) => { res = resolve; rej = reject; }); p.resolve = res; p.reject = rej; return p;"
+  js_promiseWithResolvers :: IO JSVal
+
+foreign import prim "stg_scheduler_loopzh"
+  stg_scheduler_loop# :: State# RealWorld -> (# State# RealWorld, () #)
+
+js_promiseResolveUnit :: JSVal -> () -> IO ()
+js_promiseResolveUnit p _ = js_promiseResolveUnit' p
+
+foreign import javascript unsafe "$1.resolve()"
+  js_promiseResolveUnit' :: JSVal -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveJSVal :: JSVal -> JSVal -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveChar :: JSVal -> Char -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveInt :: JSVal -> Int -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveInt8 :: JSVal -> Int8 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveInt16 :: JSVal -> Int16 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveInt32 :: JSVal -> Int32 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveInt64 :: JSVal -> Int64 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveWord :: JSVal -> Word -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveWord8 :: JSVal -> Word8 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveWord16 :: JSVal -> Word16 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveWord32 :: JSVal -> Word32 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveWord64 :: JSVal -> Word64 -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolvePtr :: JSVal -> Ptr a -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveFunPtr :: JSVal -> FunPtr a -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveFloat :: JSVal -> Float -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveDouble :: JSVal -> Double -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveStablePtr :: JSVal -> StablePtr a -> IO ()
+
+foreign import javascript unsafe "$1.resolve($2)"
+  js_promiseResolveBool :: JSVal -> Bool -> IO ()
+
+foreign import javascript unsafe "$1.reject(new WebAssembly.RuntimeError($2))"
+  js_promiseReject :: JSVal -> JSString -> IO ()
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim/Flag.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim/Flag.hs
new file mode 100644
index 0000000000000000000000000000000000000000..c6712aea659e37f0b0f7e71a148a16f233219ba8
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim/Flag.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module GHC.Wasm.Prim.Flag
+  ( isJSFFIUsed,
+  )
+where
+
+import GHC.Base
+
+foreign import ccall unsafe "rts_JSFFI_used" isJSFFIUsed :: Bool
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim/Imports.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim/Imports.hs
new file mode 100644
index 0000000000000000000000000000000000000000..f1e1da36dc149629e6ba03d66a8f7ce560038c38
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim/Imports.hs
@@ -0,0 +1,171 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+module GHC.Wasm.Prim.Imports (
+  stg_blockPromise,
+  stg_messagePromiseUnit,
+  stg_messagePromiseJSVal,
+  stg_messagePromiseChar,
+  stg_messagePromiseInt,
+  stg_messagePromiseInt8,
+  stg_messagePromiseInt16,
+  stg_messagePromiseInt32,
+  stg_messagePromiseInt64,
+  stg_messagePromiseWord,
+  stg_messagePromiseWord8,
+  stg_messagePromiseWord16,
+  stg_messagePromiseWord32,
+  stg_messagePromiseWord64,
+  stg_messagePromisePtr,
+  stg_messagePromiseFunPtr,
+  stg_messagePromiseFloat,
+  stg_messagePromiseDouble,
+  stg_messagePromiseStablePtr,
+  stg_messagePromiseBool
+) where
+
+import GHC.Base
+import GHC.Exception
+import GHC.Exts
+import GHC.IO.Unsafe
+import GHC.Stable
+import GHC.Wasm.Prim.Types
+
+{-# OPAQUE raiseJSException #-}
+raiseJSException :: JSVal -> a
+raiseJSException v = throw $ JSException v
+
+{-
+
+Note [stg_blockPromise]
+~~~~~~~~~~~~~~~~~~~~~~~
+
+When desugaring a JSFFI async import, we first emit a sync import: it
+returns a JSVal that represents a Promise. Now we need to wrap it in a
+thunk with the same return type as the user written import, so that
+when the thunk is forced, the thread will be suspended and only
+resumed later when the Promise fulfills. This is done by
+stg_blockPromise.
+
+stg_blockPromise takes two arguments: the Promise, and a "message
+promise" function which is a JSFFI sync import that sends a message to
+the Promise via invoking promise.then(). When the Promise resolves
+with a result, the callback passed in .then() invokes RTS API which
+needs to box the JavaScript result with the correct rts_mk* function,
+so for each possible return type, we need to define a distinct
+"message promise" function. This is an implementation detail, not end
+user's concern, the desugar logic picks the right one to be passed to
+stg_blockPromise.
+
+Once the thunk is forced, we first check if we're inside a C FFI
+export's main thread and if so, throw WouldBlockException. Then we pin
+the current TSO via a stable pointer and call the "message promise"
+function. At this point, the Promise fulfill logic that resumes the
+thread in the future has been set up, we can drop the Promise eagerly,
+then arrange the current thread to block.
+
+Blocking is done by calling stg_jsffi_block: it pushes a
+stg_jsffi_block frame and suspends the thread. The payload of
+stg_jsffi_block frame is a single pointer field that holds the return
+value. When the Promise is resolved with the result, the RTS fetches
+the TSO indexed by the stable pointer passed earlier, checks for the
+top stack frame to see if it's still a stg_jsffi_block frame (could be
+stripped by an async exception), fills in the boxed result and
+restarts execution. In case of a Promise rejection, the closure being
+filled is generated via raiseJSException.
+
+-}
+
+stg_blockPromise :: JSVal -> (JSVal -> StablePtr Any -> IO ()) -> r
+stg_blockPromise p msg_p = unsafeDupablePerformIO $ IO $ \s0 ->
+  case stg_jsffi_check (unsafeCoerce# $ toException WouldBlockException) s0 of
+    (# s1 #) -> case myThreadId# s1 of
+      (# s2, tso #) -> case makeStablePtr# tso s2 of
+        (# s3, sp #) ->
+          case unIO (msg_p p $ StablePtr $ unsafeCoerce# sp) s3 of
+            -- Since we eagerly free the Promise here, we must return
+            -- an updatable thunk in stg_blockPromise that can't be
+            -- evaluated more than once, regardless of optimization
+            -- level, otherwise runtime panic may happen. This
+            -- property holds because:
+            -- 1. We're using the single threaded RTS
+            -- 2. Once the thunk is evaluated the first time and that
+            --    thread is paused, lazy blackholing does happen
+            -- 3. unsafeDupablePerformIO applies lazy to the result
+            --    and prevents dmdanal from being naughty
+            (# s4, _ #) -> case unIO (freeJSVal p) s4 of
+              (# s5, _ #) ->
+                -- raiseJSException_closure is used by the RTS in case
+                -- the Promise is rejected, and it is likely a CAF. So
+                -- we need to keep it alive when we block waiting for
+                -- the Promise to resolve or reject, and also mark it
+                -- as OPAQUE just to be sure.
+                keepAlive# raiseJSException s5 $
+                  stg_jsffi_block $
+                    throw PromisePendingException
+
+foreign import prim "stg_jsffi_check"
+  stg_jsffi_check :: Any -> State# RealWorld -> (# State# RealWorld #)
+
+foreign import prim "stg_jsffi_block"
+  stg_jsffi_block :: Any -> State# RealWorld -> (# State# RealWorld, r #)
+
+foreign import javascript unsafe "$1.then(() => __exports.rts_promiseResolveUnit($2), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseUnit :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveJSVal($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseJSVal :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveChar($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseChar :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveInt($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseInt :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveInt8($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseInt8 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveInt16($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseInt16 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveInt32($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseInt32 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveInt64($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseInt64 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveWord($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseWord :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveWord8($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseWord8 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveWord16($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseWord16 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveWord32($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseWord32 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveWord64($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseWord64 :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolvePtr($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromisePtr :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveFunPtr($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseFunPtr :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveFloat($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseFloat :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveDouble($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseDouble :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveStablePtr($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseStablePtr :: JSVal -> StablePtr Any -> IO ()
+
+foreign import javascript unsafe "$1.then(res => __exports.rts_promiseResolveBool($2, res), err => __exports.rts_promiseReject($2, err))"
+  stg_messagePromiseBool :: JSVal -> StablePtr Any -> IO ()
diff --git a/libraries/ghc-internal/src/GHC/Wasm/Prim/Types.hs b/libraries/ghc-internal/src/GHC/Wasm/Prim/Types.hs
new file mode 100644
index 0000000000000000000000000000000000000000..c46813ff307506842e30c3fe0120e8ea73ac2e56
--- /dev/null
+++ b/libraries/ghc-internal/src/GHC/Wasm/Prim/Types.hs
@@ -0,0 +1,152 @@
+{-# LANGUAGE GHC2021 #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE UnliftedNewtypes #-}
+
+module GHC.Wasm.Prim.Types (
+  JSVal# (..),
+  JSVal (..),
+  freeJSVal,
+  JSString (..),
+  fromJSString,
+  toJSString,
+  JSException (..),
+  WouldBlockException (..),
+  PromisePendingException (..)
+) where
+
+import GHC.Base
+import GHC.Exception.Type
+import GHC.Exts
+import GHC.Foreign
+import GHC.ForeignPtr
+import GHC.IO
+import GHC.IO.Encoding
+import GHC.Num
+import GHC.Show
+import GHC.Stable
+
+{-
+
+Note [JSVal representation for wasm]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On wasm, the Haskell heap lives in the linear memory space, and it can
+only contain bit patterns, not opaque references of the host
+JavaScript heap. As long as we have two heaps that coexist in this
+way, the best we can do is representing JavaScript references as
+unique ids in the Haskell heap.
+
+In JavaScript, we have a JSValManager which exposes some interfaces as
+wasm imports. The JSValManager is in charge of allocating unique ids
+and managing the mapping from ids to the actual JavaScript values. In
+fact we can implement the entire JSValManager in wasm, using a wasm
+table with externref elements to hold the JavaScript values and a
+special allocator to manage free slots in the table. That'll take more
+work to implement though, with one more caveat: browsers typically
+limit max wasm table size to 10000000 which may not be large enough
+for some use cases. We can workaround the table size restriction by
+managing a pool or tree of wasm tables, but at this point we really
+should ditch the idea of doing everything in wasm just because we can.
+
+Next, we have the unlifted JSVal# type, defined in jsval.cmm and
+contains one non-pointer word which is the id allocated by
+JSValManager. On top of JSVal#, we have the user-facing lifted JSVal
+type, which carries the JSVal#, as well as a weak pointer and a stable
+pointer.
+
+The weak pointer is used to garbage collect JSVals. Its key is the
+JSVal# closure, and it has a C finalizer that tells the JSValManager
+to drop the mapping when the JSVal# closure is collected. Since we
+want to provide freeJSVal to allow eager freeing of JSVals, we need to
+carry it as a field of JSVal.
+
+The stable pointer field is NULL for normal JSVals created via foreign
+import results or foreign export arguments. But for JSFFI dynamic
+exports that wraps a Haskell function closure as a JavaScript callback
+and returns that callback's JSVal, it is a stable pointer that pins
+that Haskell function closure. If this JSVal is garbage collected,
+then we can only rely on a JavaScript FinalizerRegistry to free the
+stable pointer in the future, but if we eagerly free the callback with
+freeJSVal, then we can eagerly free this stable pointer as well.
+
+The lifted JSVal type is meant to be an abstract type. Its creation
+and consumption is mainly handled by the RTS API functions rts_mkJSVal
+and rts_getJSVal, which are used in C stub files generated when
+desugaring JSFFI foreign imports/exports.
+
+-}
+
+newtype JSVal#
+  = JSVal# (Any :: UnliftedType)
+
+data JSVal
+  = forall a . JSVal JSVal# (Weak# JSVal#) (StablePtr# a)
+
+freeJSVal :: JSVal -> IO ()
+freeJSVal v@(JSVal _ w sp) = do
+  case sp `eqStablePtr#` unsafeCoerce# nullAddr# of
+    0# -> do
+      js_callback_unregister v
+      freeStablePtr $ StablePtr sp
+    _ -> pure ()
+  IO $ \s0 -> case finalizeWeak# w s0 of
+    (# s1, _, _ #) -> (# s1, () #)
+
+foreign import javascript unsafe "if (!__ghc_wasm_jsffi_finalization_registry.unregister($1)) { throw new WebAssembly.RuntimeError('js_callback_unregister'); }"
+  js_callback_unregister :: JSVal -> IO ()
+
+newtype JSString
+  = JSString JSVal
+
+fromJSString :: JSString -> String
+fromJSString s = unsafeDupablePerformIO $ do
+  l <- js_stringLength s
+  fp <- mallocPlainForeignPtrBytes $ l * 3
+  withForeignPtr fp $ \buf -> do
+    l' <- js_encodeInto s buf $ l * 3
+    peekCStringLen utf8 (buf, l')
+
+foreign import javascript unsafe "$1.length"
+  js_stringLength :: JSString -> IO Int
+
+foreign import javascript unsafe "(new TextEncoder()).encodeInto($1, new Uint8Array(__exports.memory.buffer, $2, $3)).written"
+  js_encodeInto :: JSString -> Ptr a -> Int -> IO Int
+
+toJSString :: String -> JSString
+toJSString s = unsafeDupablePerformIO $ withCStringLen utf8 s $ \(buf, len) -> js_toJSString buf len
+
+foreign import javascript unsafe "(new TextDecoder('utf-8', {fatal: true})).decode(new Uint8Array(__exports.memory.buffer, $1, $2))"
+  js_toJSString :: Ptr a -> Int -> IO JSString
+
+newtype JSException
+  = JSException JSVal
+
+instance Show JSException where
+  showsPrec p e =
+    showParen (p >= 11) $ showString "JSException " . showsPrec 11 (jsErrorString e)
+
+jsErrorString :: JSException -> String
+jsErrorString e = unsafeDupablePerformIO $ do
+  s@(JSString v) <- js_errorToString e
+  r <- evaluate $ fromJSString s
+  freeJSVal v
+  pure r
+
+foreign import javascript unsafe "`${$1.stack ? $1.stack : $1}`"
+  js_errorToString :: JSException -> IO JSString
+
+instance Exception JSException
+
+data WouldBlockException
+  = WouldBlockException
+  deriving (Show)
+
+instance Exception WouldBlockException
+
+data PromisePendingException
+  = PromisePendingException
+  deriving (Show)
+
+instance Exception PromisePendingException
diff --git a/rts/rts.cabal b/rts/rts.cabal
index 23901437d7ff913798c9d3ac247d718abb0ab64c..fc1b784b0b36f6d596527ee8a6665caa1aea7930 100644
--- a/rts/rts.cabal
+++ b/rts/rts.cabal
@@ -519,7 +519,12 @@ library
                    wasm/GetTime.c
                    wasm/OSMem.c
                    wasm/OSThreads.c
+                   wasm/JSFFI.c
+                   wasm/JSFFIGlobals.c
                    posix/Select.c
+        cmm-sources: wasm/jsval.cmm
+                     wasm/blocker.cmm
+                     wasm/scheduler.cmm
       else
          c-sources: posix/GetEnv.c
                     posix/GetTime.c
diff --git a/rts/wasm/JSFFI.c b/rts/wasm/JSFFI.c
new file mode 100644
index 0000000000000000000000000000000000000000..20777b991ffccd62e8d27742cd041fcaddbc971c
--- /dev/null
+++ b/rts/wasm/JSFFI.c
@@ -0,0 +1,227 @@
+#include "Rts.h"
+#include "Prelude.h"
+#include "Schedule.h"
+#include "sm/Sanity.h"
+
+#if defined(__wasm_reference_types__)
+
+extern HsBool rts_JSFFI_flag;
+extern HsStablePtr rts_threadDelay_impl;
+extern StgClosure ghczminternal_GHCziWasmziPrimziConcziInternal_threadDelay_closure;
+
+int __main_void(void);
+
+int __main_argc_argv(int, char*[]);
+
+int __main_argc_argv(int argc, char *argv[]) {
+  RtsConfig __conf = defaultRtsConfig;
+  __conf.rts_opts_enabled = RtsOptsAll;
+  __conf.rts_hs_main = false;
+  hs_init_ghc(&argc, &argv, __conf);
+  // See Note [threadDelay on wasm] for details.
+  rts_JSFFI_flag = HS_BOOL_TRUE;
+  rts_threadDelay_impl = getStablePtr((StgPtr)&ghczminternal_GHCziWasmziPrimziConcziInternal_threadDelay_closure);
+  return 0;
+}
+
+// Note [JSFFI initialization]
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//
+// In the wasi preview1 spec, there are two kinds of wasm32-wasi
+// modules: commands and reactors. A command module exports _start()
+// that's intended to be invoked exactly once, after which all
+// instance state is invalidated. A reactor module supports
+// user-defined entrypoints that can be called multiple times after
+// _initialize() has been called. In Haskell, when JSFFI is used, we
+// expect user-defined JSFFI exports to be entrypoints that can be
+// called from JavaScript multiple times, in which case the user needs
+// to target wasm32-wasi reactor modules by passing the correct
+// link-time options, see user manual for details.
+//
+// What about RTS initialization? We can tell users to export
+// functions like hs_init() as well and call that before calling JSFFI
+// exports, but this is very inconvenient. So in this case, we choose
+// to do it via a ctor defined here. When JSFFI is not used, JSFFI.o
+// will not be included by wasm-ld so this ctor will not get in the
+// way. When JSFFI is indeed used, this ctor will be called by the
+// linker generated __wasm_call_ctors() function, which is called by
+// _initialize(), so the user only needs to call _initialize() once
+// and then they can call user exported functions directly.
+//
+// When it comes to ctors, we must pay close attention to ctor
+// priorities to guarantee they are invoked in the correct order. Both
+// wasi-libc and emscripten libc makes use of ctors to initialize some
+// libc state, and we want them to be invoked first, so priority range
+// [0..100] has been fully booked. If priority is ommited, it defaults
+// the lowest value 65535, and now there's a problem: there are other
+// ctors generated by the GHC codegen (e.g. registering foreign export
+// closures as GC roots), and we must ensure those ctors are invoked
+// before our RTS initialization logic kicks in!
+//
+// Therefore, on wasm32, we designate priority 101 to ctors generated
+// by the GHC codegen, and priority 102 to the initialization logic
+// here to ensure hs_init_ghc() sees everything it needs to see.
+__attribute__((constructor(102))) static void __ghc_wasm_jsffi_init(void) {
+  // See
+  // https://gitlab.haskell.org/ghc/wasi-libc/-/blob/main/libc-bottom-half/sources/__main_void.c
+  // for its definition. It initializes some libc state, then calls
+  // __main_argc_argv defined above.
+  __main_void();
+}
+
+typedef __externref_t HsJSVal;
+typedef StgWord JSValKey;
+
+extern const StgInfoTable stg_JSVAL_info;
+extern const StgInfoTable ghczminternal_GHCziWasmziPrimziTypes_JSVal_con_info;
+
+// See Note [JSVal representation for wasm] for detailed explanation.
+
+__attribute__((import_module("ghc_wasm_jsffi"), import_name("newJSVal")))
+JSValKey __imported_newJSVal(HsJSVal);
+
+__attribute__((import_module("ghc_wasm_jsffi"),
+               import_name("freeJSVal"))) void __imported_freeJSVal(JSValKey);
+
+HaskellObj rts_mkJSVal(Capability*, HsJSVal);
+HaskellObj rts_mkJSVal(Capability *cap, HsJSVal v) {
+  JSValKey k = __imported_newJSVal(v);
+
+  HaskellObj p = (HaskellObj)allocate(cap, CONSTR_sizeW(0, 1));
+  SET_HDR(p, &stg_JSVAL_info, CCS_SYSTEM);
+  p->payload[0] = (HaskellObj)k;
+
+  StgCFinalizerList *cfin =
+      (StgCFinalizerList *)allocate(cap, sizeofW(StgCFinalizerList));
+  SET_HDR(cfin, &stg_C_FINALIZER_LIST_info, CCS_SYSTEM);
+  cfin->link = &stg_NO_FINALIZER_closure;
+  cfin->fptr = (void (*)(void))__imported_freeJSVal;
+  cfin->ptr = (void *)k;
+  cfin->flag = 0;
+
+  StgWeak *w = (StgWeak *)allocate(cap, sizeofW(StgWeak));
+  SET_HDR(w, &stg_WEAK_info, CCS_SYSTEM);
+  w->cfinalizers = (StgClosure *)cfin;
+  w->key = p;
+  w->value = Unit_closure;
+  w->finalizer = &stg_NO_FINALIZER_closure;
+  w->link = cap->weak_ptr_list_hd;
+  cap->weak_ptr_list_hd = w;
+  if (cap->weak_ptr_list_tl == NULL) {
+    cap->weak_ptr_list_tl = w;
+  }
+
+  HaskellObj box = (HaskellObj)allocate(cap, CONSTR_sizeW(3, 0));
+  SET_HDR(box, &ghczminternal_GHCziWasmziPrimziTypes_JSVal_con_info, CCS_SYSTEM);
+  box->payload[0] = p;
+  box->payload[1] = (HaskellObj)w;
+  box->payload[2] = NULL;
+  return TAG_CLOSURE(1, box);
+}
+
+__attribute__((import_module("ghc_wasm_jsffi"), import_name("getJSVal")))
+HsJSVal __imported_getJSVal(JSValKey);
+
+STATIC_INLINE HsJSVal rts_getJSValzh(HaskellObj p) {
+  ASSERT(p->header.info == &stg_JSVAL_info);
+  return __imported_getJSVal((JSValKey)p->payload[0]);
+}
+
+HsJSVal rts_getJSVal(HaskellObj);
+HsJSVal rts_getJSVal(HaskellObj box) {
+  ASSERT(UNTAG_CLOSURE(box)->header.info == &ghczminternal_GHCziWasmziPrimziTypes_JSVal_con_info);
+  return rts_getJSValzh(UNTAG_CLOSURE(box)->payload[0]);
+}
+
+INLINE_HEADER void pushClosure   (StgTSO *tso, StgWord c) {
+  tso->stackobj->sp--;
+  tso->stackobj->sp[0] = (W_) c;
+}
+
+extern const StgInfoTable stg_jsffi_block_info;
+extern const StgInfoTable stg_scheduler_loop_info;
+extern StgClosure ghczminternal_GHCziWasmziPrimziImports_raiseJSException_closure;
+
+// schedule a future round of RTS scheduler loop via setImmediate(),
+// to avoid jamming the JavaScript main thread
+
+__attribute__((import_module("ghc_wasm_jsffi"), import_name("scheduleWork")))
+void __imported_scheduleWork(void);
+
+void rts_scheduleWork(void);
+void rts_scheduleWork(void) {
+  __imported_scheduleWork();
+}
+
+__attribute__((export_name("rts_schedulerLoop")))
+void rts_schedulerLoop(void);
+void rts_schedulerLoop(void) {
+  Capability *cap = rts_lock();
+  StgTSO *tso = createThread(cap, RESERVED_STACK_WORDS);
+  pushClosure(tso, (StgWord)&stg_scheduler_loop_info);
+  scheduleWaitThread(tso, NULL, &cap);
+  rts_checkSchedStatus("rts_schedulerLoop", cap);
+  rts_unlock(cap);
+}
+
+#define mk_rtsPromiseCallback(obj)                         \
+  {                                                        \
+  Capability *cap = &MainCapability;                       \
+  StgTSO *tso = (StgTSO*)deRefStablePtr(sp);               \
+  IF_DEBUG(sanity, checkTSO(tso));                         \
+  hs_free_stable_ptr(sp);                                  \
+                                                           \
+  StgStack *stack = tso->stackobj;                         \
+  IF_DEBUG(sanity, checkSTACK(stack));                     \
+                                                           \
+  if (stack->sp[0] == (StgWord)&stg_jsffi_block_info) {    \
+    dirty_TSO(cap, tso);                                   \
+    dirty_STACK(cap, stack);                               \
+    stack->sp[1] = (StgWord)(obj);                         \
+  }                                                        \
+  scheduleThreadNow(cap, tso);                             \
+  rts_schedulerLoop();                                     \
+  }
+
+#define mk_rtsPromiseResolve(T)                            \
+  __attribute__((export_name("rts_promiseResolve"#T)))     \
+  void rts_promiseResolve##T(HsStablePtr, Hs##T);          \
+  void rts_promiseResolve##T(HsStablePtr sp, Hs##T js_res) \
+  mk_rtsPromiseCallback(rts_mk##T(cap, js_res))
+
+__attribute__((export_name("rts_promiseResolveUnit")))
+void rts_promiseResolveUnit(HsStablePtr);
+void rts_promiseResolveUnit(HsStablePtr sp)
+  mk_rtsPromiseCallback(TAG_CLOSURE(1, Unit_closure))
+
+mk_rtsPromiseResolve(JSVal)
+mk_rtsPromiseResolve(Char)
+mk_rtsPromiseResolve(Int)
+mk_rtsPromiseResolve(Int8)
+mk_rtsPromiseResolve(Int16)
+mk_rtsPromiseResolve(Int32)
+mk_rtsPromiseResolve(Int64)
+mk_rtsPromiseResolve(Word)
+mk_rtsPromiseResolve(Word8)
+mk_rtsPromiseResolve(Word16)
+mk_rtsPromiseResolve(Word32)
+mk_rtsPromiseResolve(Word64)
+mk_rtsPromiseResolve(Ptr)
+mk_rtsPromiseResolve(FunPtr)
+mk_rtsPromiseResolve(Float)
+mk_rtsPromiseResolve(Double)
+mk_rtsPromiseResolve(StablePtr)
+mk_rtsPromiseResolve(Bool)
+
+__attribute__((export_name("rts_promiseReject")))
+void rts_promiseReject(HsStablePtr, HsJSVal);
+void rts_promiseReject(HsStablePtr sp, HsJSVal js_err)
+  mk_rtsPromiseCallback(rts_apply(cap, &ghczminternal_GHCziWasmziPrimziImports_raiseJSException_closure, rts_mkJSVal(cap, js_err)))
+
+__attribute__((export_name("rts_freeStablePtr")))
+void rts_freeStablePtr(HsStablePtr);
+void rts_freeStablePtr(HsStablePtr sp) {
+  hs_free_stable_ptr(sp);
+}
+
+#endif // __wasm_reference_types__
diff --git a/rts/wasm/JSFFIGlobals.c b/rts/wasm/JSFFIGlobals.c
new file mode 100644
index 0000000000000000000000000000000000000000..8e8f6d0620cafc9028604faace6719940d1d3e3a
--- /dev/null
+++ b/rts/wasm/JSFFIGlobals.c
@@ -0,0 +1,17 @@
+#include "Rts.h"
+
+// See Note [threadDelay on wasm] for details.
+
+HsBool rts_JSFFI_flag = HS_BOOL_FALSE;
+
+HsBool rts_JSFFI_used(void);
+HsBool rts_JSFFI_used(void) {
+  return rts_JSFFI_flag;
+}
+
+HsStablePtr rts_threadDelay_impl = NULL;
+
+HsStablePtr rts_threadDelay_sp(void);
+HsStablePtr rts_threadDelay_sp(void) {
+  return rts_threadDelay_impl;
+}
diff --git a/rts/wasm/blocker.cmm b/rts/wasm/blocker.cmm
new file mode 100644
index 0000000000000000000000000000000000000000..b449b5fa2264eedf1a347c93a448886b808f6170
--- /dev/null
+++ b/rts/wasm/blocker.cmm
@@ -0,0 +1,44 @@
+#include "Cmm.h"
+
+#if !defined(UnregisterisedCompiler)
+import CLOSURE STK_CHK_ctr;
+import CLOSURE stg_jsffi_block_info;
+#endif
+
+// The ret field will be the boxed result that the JSFFI async import
+// actually returns. Or a bottom closure that throws JSException in
+// case of Promise rejection.
+INFO_TABLE_RET ( stg_jsffi_block, RET_SMALL, W_ info_ptr, P_ ret )
+  return ()
+{
+  jump %ENTRY_CODE(Sp(0)) (ret);
+}
+
+// Push a stg_jsffi_block frame and suspend the current thread. bottom
+// is a placeholder that throws PromisePendingException, though in
+// theory the user should never see PromisePendingException since that
+// indicates a thread blocked for async JSFFI is mistakenly resumed
+// somehow.
+stg_jsffi_block (P_ bottom)
+{
+  Sp_adj(-2);
+  Sp(0) = stg_jsffi_block_info;
+  Sp(1) = bottom;
+
+  ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp);
+
+  jump stg_block_noregs ();
+}
+
+// Check that we're in a forked thread at the moment, since main
+// threads that are bound to an InCall frame cannot block waiting for
+// a Promise to fulfill. err is the SomeException closure of
+// WouldBlockException.
+stg_jsffi_check (P_ err)
+{
+  if (StgTSO_bound(CurrentTSO) != NULL) {
+    jump stg_raisezh (err);
+  }
+
+  return ();
+}
diff --git a/rts/wasm/jsval.cmm b/rts/wasm/jsval.cmm
new file mode 100644
index 0000000000000000000000000000000000000000..f8feb85ed5efab5dadbf894586fc9597a98e6ee3
--- /dev/null
+++ b/rts/wasm/jsval.cmm
@@ -0,0 +1,10 @@
+#include "Cmm.h"
+
+// This defines the unlifted JSVal# type. See Note [JSVal
+// representation for wasm] for detailed explanation.
+
+INFO_TABLE(stg_JSVAL, 0, 1, PRIM, "JSVAL", "JSVAL")
+  (P_ node)
+{
+  return (node);
+}
diff --git a/rts/wasm/scheduler.cmm b/rts/wasm/scheduler.cmm
new file mode 100644
index 0000000000000000000000000000000000000000..68b04ce67bad2c5cee7c2f92094fed82b95b06fc
--- /dev/null
+++ b/rts/wasm/scheduler.cmm
@@ -0,0 +1,195 @@
+// Note [Async JSFFI scheduler]
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//
+// The biggest challenge of implementing JSFFI for the GHC wasm
+// backend, is how to make asynchronous JavaScript computation work
+// with existing RTS API, which is fundamentally synchronous.
+//
+// When a Haskell function is exported via C FFI, the generated C
+// function would call one of the rts_eval* functions, which will run
+// the main thread synchronously till completion, fetch the result and
+// return it to the C function's caller. The main thread may block in
+// between, but it's fine, since the RTS scheduler is still able to
+// make progress by running other threads and when the run queue is
+// drained, block on polling the pending I/O file descriptors until
+// I/O result is available (on single-threaded RTS, that is).
+//
+// So far so good, except it doesn't work for wasm JSFFI at all.
+// Suppose a Haskell function is exported via JSFFI, and it calls an
+// async import and needs to block waiting for the result. We need a
+// way to gracefully exit the RTS while leaving the current thread in
+// the suspended state, only to be resumed later when the async import
+// result is available. But alas, that's not supported by the RTS API
+// as of today. rts_eval* as well as the scheduler loop as implemented
+// in schedule() will not exit until the main thread is either
+// complete or killed. And given JavaScript's event loop model, we
+// can't stay in the current RTS API invocation and "poll" for
+// progress in the JavaScript world, since if we don't exit, the
+// JavaScript world would be unable to make any progress!
+//
+// It is surely possible to refactor the RTS scheduler and add async
+// flavours of the RTS API that doesn't insist waiting for the main
+// thread to complete, but it takes a lot of work. There exists other
+// hacks (e.g. web worker or asyncify) to make async JSFFI sorta work,
+// though I'll not elaborate them here and they're not satisfactory
+// anyway. However, all hope is not lost! There are a few key
+// observations that lead to our current implementation:
+//
+// 1. Forked threads have some chance of execution, but the RTS
+//    scheduler only waits for the main thread to complete and won't
+//    wait for forked threads.
+// 2. There exists the yield# primop which simply yields to other
+//    threads in the queue for execution, if there's any.
+// 3. In Haskell/Cmm, you can manipulate not just TSOs, but the thread
+//    run queue as well! With a tiny bit of patch, you can easily
+//    check whether the queue is empty or not, explicitly
+//    enqueue/dequeue TSOs, etc.
+//
+// tl;dr for a showerthought pieced together from the above
+// observations: yo dawg i heard you like concurrency so i put a
+// scheduler on top of your scheduler :)
+//
+// Meme aside, the complete solution can be summarized as follows:
+//
+// 1. Each JSFFI export function is run in a forked thread and upon
+//    completion, resolves a Promise value with the result. This is
+//    handled by the runIO/runNonIO top handler functions.
+// 2. The main thread forks the computation and returns the Promise to
+//    the caller. It'll also run a special "scheduler loop" to make
+//    some progress in the forked thread, though with no guarantee
+//    that the forked thread runs to completion.
+// 3. The main thread "scheduler loop" does one simple thing: check if
+//    the thread run queue is non-empty and if so, yield to other
+//    threads for execution, otherwise exit the loop.
+// 4. When a thread blocks for a JSFFI async import result, it pins
+//    the current TSO via a stable pointer, and calls Promise.then()
+//    on the particular Promise it's blocked on. When that Promise is
+//    fulfilled in the future, it will call back into the RTS, fetches
+//    the TSO indexed by that stable pointer, passes the result and
+//    wakes up the TSO, then finally does another round of scheduler
+//    loop. This is handled by stg_blockPromise.
+//
+// The async JSFFI scheduler is idempotent, it's safe to run it
+// multiple times, now or later, though it's not safe to forget to run
+// it when there's still thread that needs to make progress.
+//
+// A design issue with the async JSFFI scheduler is how often to yield
+// back to JavaScript. Even when the run queue is not drained yet, in
+// each iteration after some progress has been made in other threads,
+// we can choose to either keep looping, or exit immediately and
+// schedule future work to happen via setImmediate() so to avoid
+// jamming the main JavaScript thread. Given each iteration of JSFFI
+// scheduler loop has the overhead of allocating a new thread, maybe
+// it's not a good idea to always yield. In the current
+// implementation, we check the monotonic clock and yield if 15ms or
+// more has passed; this is not a hard guarantee that yielding does
+// happen around 15ms, and it's subject to future change.
+//
+// Another issue we need to deal with in the async JSFFI scheduler is
+// re-entrance. Async JSFFI mechanism allows re-entrance, JavaScript
+// may call into Haskell that calls back into JavaScript that calls
+// into Haskell again, the inception never ends. But you do not want
+// two JSFFI scheduler loops running at once, since that'll lead to an
+// infinite loop! And given we start the scheduler loop eagerly
+// instead of asynchronously, re-entrance is a real danger here.
+// Therefore, we have a global variable that is set to tso->id for the
+// TSO that runs the scheduler loop. If at some point we enter the
+// scheduler loop again and figure out another thread is already doing
+// so, we can safely exit immediately.
+//
+// The async JSFFI scheduler can be implemented in either Haskell or
+// Cmm. For efficiency reasons, it's written in Cmm for the time
+// being, though it can also be called in Haskell via a foreign import
+// prim.
+
+#include "Cmm.h"
+
+// Yield back to JavaScript and schedule future work via
+// setImmediate() after BUSY_YIELD_NS nanoseconds have passed. If it's
+// not defined, the async JSFFI scheduler will only return when the
+// thread run queue is drained, and will not check the monotonic clock
+// at all. Undefining it is useful when the wasm module is run in a
+// separate worker thread and there's no concern of blocking the
+// JavaScript main thread.
+#define BUSY_YIELD_NS 15000000
+
+import CLOSURE ghczmprim_GHCziTupleziPrim_Z0T_closure;
+#if !defined(UnregisterisedCompiler)
+import CLOSURE stg_scheduler_loop_epoch;
+import CLOSURE stg_scheduler_loop_tid;
+#endif
+
+section "data" {
+  stg_scheduler_loop_epoch: I64;
+}
+
+section "data" {
+  stg_scheduler_loop_tid: I64 0 :: I64;
+}
+
+// After creating a new thread with only a stop frame on the stack,
+// push a stg_scheduler_loop frame to make it a scheduler thread. We
+// could omit this and use C FFI to export a Haskell function that
+// invokes the scheduler loop via a foreign import prim, but that is
+// of course less efficient.
+INFO_TABLE_RET (stg_scheduler_loop, RET_SMALL, W_ info_ptr)
+  return ()
+{
+  jump stg_scheduler_loopzh ();
+}
+
+// This always returns () in R1 at the end. If only run via a foreign
+// import prim, it's fine to not return anything, but when run via a
+// stg_scheduler_loop stack frame, then the stop frame expects a valid
+// closure to be returned from R1 and placed in that frame, otherwise
+// the garbage collector can be unhappy.
+stg_scheduler_loopzh ()
+{
+  I64 epoch, now;
+
+  // Only meant to be run from a "main thread" (aka bound to an InCall
+  // frame), not from a forked thread!
+  if (StgTSO_bound(CurrentTSO) == NULL) {
+    return (ghczmprim_GHCziTupleziPrim_Z0T_closure);
+  }
+
+  // Entering the scheduler loop for the first time.
+  if (I64[stg_scheduler_loop_tid] == 0 :: I64) {
+#if defined(BUSY_YIELD_NS)
+    (epoch) = ccall getMonotonicNSec();
+    I64[stg_scheduler_loop_epoch] = epoch;
+#endif
+    I64[stg_scheduler_loop_tid] = StgTSO_id(CurrentTSO);
+    goto work;
+  }
+
+  // Someone else is running the loop, not my business anymore.
+  if (I64[stg_scheduler_loop_tid] != StgTSO_id(CurrentTSO)) {
+    return (ghczmprim_GHCziTupleziPrim_Z0T_closure);
+  }
+
+work:
+  // The thread run queue is drained.
+  if (Capability_n_run_queue(MyCapability()) == 0 :: I32) {
+    goto cleanup;
+  }
+
+  // Make progress in other threads.
+  call stg_yieldzh ();
+
+#if defined(BUSY_YIELD_NS)
+  (now) = ccall getMonotonicNSec();
+
+  if ((now - I64[stg_scheduler_loop_epoch]) < BUSY_YIELD_NS :: I64) {
+    jump stg_scheduler_loopzh ();
+  }
+
+  ccall rts_scheduleWork();
+#else
+  jump stg_scheduler_loopzh ();
+#endif
+
+cleanup:
+  I64[stg_scheduler_loop_tid] = 0 :: I64;
+  return (ghczmprim_GHCziTupleziPrim_Z0T_closure);
+}