diff --git a/rts/IOManager.c b/rts/IOManager.c
index 3e6761b219a9657e9a578a5f37518a67fef3003a..a818a69110f74f819872dcb6670d741426e3f4d8 100644
--- a/rts/IOManager.c
+++ b/rts/IOManager.c
@@ -27,9 +27,18 @@
 #include "posix/Signals.h"
 #endif
 
-#if defined(mingw32_HOST_OS)
+#if defined(IOMGR_ENABLED_MIO_WIN32)
 #include "win32/ThrIOManager.h"
 #include "win32/AsyncMIO.h"
+#endif
+
+#if defined(IOMGR_ENABLED_WIN32_LEGACY)
+#include "win32/AsyncMIO.h"
+#include "win32/MIOManager.h"
+#endif
+
+#if defined(IOMGR_ENABLED_WINIO)
+#include "win32/ThrIOManager.h"
 #include "win32/AsyncWinIO.h"
 #endif
 
@@ -492,8 +501,48 @@ void syncIOWaitReady(Capability   *cap USED_IF_NOT_THREADS,
             barf("waitRead# / waitWrite# not available for current I/O manager");
     }
 }
-#pragma GCC diagnostic pop
 
+void syncDelay(Capability *cap, StgTSO *tso, HsInt us_delay)
+{
+    ASSERT(tso->why_blocked == NotBlocked);
+    switch (iomgr_type) {
+#if defined(IOMGR_ENABLED_SELECT)
+        case IO_MANAGER_SELECT:
+        {
+            LowResTime target = getDelayTarget(us_delay);
+            tso->block_info.target = target;
+            RELEASE_STORE(&tso->why_blocked, BlockedOnDelay);
+            insertIntoSleepingQueue(cap, tso, target);
+            break;
+        }
+#endif
+#if defined(IOMGR_ENABLED_WIN32_LEGACY)
+        case IO_MANAGER_WIN32_LEGACY:
+            /* It would be nice to allocate this on the heap instead as it
+             * would make the primops more consistent.
+             */
+        {
+            StgAsyncIOResult *ares = stgMallocBytes(sizeof(StgAsyncIOResult),
+                                                    "syncDelay");
+            ares->reqID   = addDelayRequest(us_delay);
+            ares->len     = 0;
+            ares->errCode = 0;
+            tso->block_info.async_result = ares;
+
+            /* Having all async-blocked threads reside on the blocked_queue
+             * simplifies matters, so set the status to OnDoProc and put the
+             * delayed thread on the blocked_queue.
+             */
+            RELEASE_STORE(&tso->why_blocked, BlockedOnDoProc);
+            appendToIOBlockedQueue(cap, tso);
+            break;
+        }
+#endif
+        default:
+            barf("syncDelay not supported for I/O manager %d", iomgr_type);
+    }
+}
+#pragma GCC diagnostic pop
 
 #if defined(IOMGR_ENABLED_SELECT) || defined(IOMGR_ENABLED_WIN32_LEGACY)
 void appendToIOBlockedQueue(Capability *cap, StgTSO *tso)
diff --git a/rts/IOManager.h b/rts/IOManager.h
index df9d1b1d6f827d6480884f0afd1bc129c8854efd..c599e72f80be163e62bc0cdf46eea418c2a4a6fd 100644
--- a/rts/IOManager.h
+++ b/rts/IOManager.h
@@ -277,8 +277,15 @@ void markCapabilityIOManager(evac_fn evac, void *user, CapIOManager *iomgr);
  */
 typedef enum { IORead, IOWrite } IOReadOrWrite;
 
+/* Synchronous operations: I/O and delays. As synchronous operations they
+ * necessarily operate on threads. The thread is suspended until the operation
+ * completes.
+ */
+
 void syncIOWaitReady(Capability *cap, StgTSO *tso, IOReadOrWrite rw, HsInt fd);
 
+void syncDelay(Capability *cap, StgTSO *tso, HsInt us_delay);
+
 #if !defined(THREADED_RTS)
 /* Add a thread to the end of the queue of threads blocked on I/O.
  *
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 46c6c926b1e4d6c514024e6d7b31eac6df47cea0..68ea90a08cc38cc49c21d733d2dfe00e7f583e5b 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -2562,49 +2562,19 @@ stg_waitWritezh ( W_ fd )
 
 stg_delayzh ( W_ us_delay )
 {
-#if defined(mingw32_HOST_OS)
-    W_ ares;
-    CInt reqID;
-#else
-    W_ t, prev, target;
-#endif
-
-#if defined(THREADED_RTS)
-    ccall barf("delay# on threaded RTS") never returns;
-#else
-
-    ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I32);
-
-#if defined(mingw32_HOST_OS)
-
-    /* could probably allocate this on the heap instead */
-    ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
-                                        "stg_delayzh");
-    (reqID) = ccall addDelayRequest(us_delay);
-    StgAsyncIOResult_reqID(ares)   = reqID;
-    StgAsyncIOResult_len(ares)     = 0;
-    StgAsyncIOResult_errCode(ares) = 0;
-    StgTSO_block_info(CurrentTSO)  = ares;
+    ccall syncDelay(MyCapability() "ptr", CurrentTSO "ptr", us_delay);
 
-    /* Having all async-blocked threads reside on the blocked_queue
-     * simplifies matters, so change the status to OnDoProc put the
-     * delayed thread on the blocked_queue.
+    /* Annoyingly, we cannot be consistent with how we wait and resume the
+     * blocked thread. The reason is that the win32 legacy I/O manager
+     * allocates a StgAsyncIOResult struct on the C heap which has to be
+     * freed when the thread resumes. It's a bit awkward to arrange to
+     * allocate it on the GC heap instead, so that's how it is for now. Sigh.
      */
-    %release StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I32;
-    ccall appendToIOBlockedQueue(MyCapability() "ptr", CurrentTSO "ptr");
+#if defined(mingw32_HOST_OS)
     jump stg_block_async_void();
-
 #else
-
-    %relaxed StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I32;
-    (target) = ccall getDelayTarget(us_delay);
-
-    StgTSO_block_info(CurrentTSO) = target;
-
-    ccall insertIntoSleepingQueue(MyCapability() "ptr", CurrentTSO "ptr", target);
     jump stg_block_noregs();
 #endif
-#endif /* !THREADED_RTS */
 }