From 615eb855416ce536e02ed935ecc5a6f25519ae16 Mon Sep 17 00:00:00 2001
From: Cheng Shao <terrorjack@type.dance>
Date: Thu, 18 Jan 2024 04:00:16 +0000
Subject: [PATCH] compiler: make genSym use C-based atomic increment on non-JS
 32-bit platforms

The pure Haskell implementation causes i386 regression in unrelated
work that can be fixed by using C-based atomic increment, see added
comment for details.
---
 compiler/GHC/Types/Unique/Supply.hs | 38 +++++++++++++++++++++++++----
 compiler/cbits/genSym.c             | 11 +++++++++
 rts/RtsSymbols.c                    |  1 -
 rts/include/stg/SMP.h               | 29 ----------------------
 4 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/compiler/GHC/Types/Unique/Supply.hs b/compiler/GHC/Types/Unique/Supply.hs
index a2bd26c2d211..a809163c070b 100644
--- a/compiler/GHC/Types/Unique/Supply.hs
+++ b/compiler/GHC/Types/Unique/Supply.hs
@@ -7,6 +7,7 @@
 {-# LANGUAGE MagicHash #-}
 {-# LANGUAGE PatternSynonyms #-}
 {-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
 
 module GHC.Types.Unique.Supply (
         -- * Main data type
@@ -48,9 +49,9 @@ import Foreign.Storable
 #define NO_FETCH_ADD
 #endif
 
-#if defined(NO_FETCH_ADD)
+#if defined(javascript_HOST_ARCH)
 import GHC.Exts ( atomicCasWord64Addr#, eqWord64# )
-#else
+#elif !defined(NO_FETCH_ADD)
 import GHC.Exts( fetchAddWordAddr#, word64ToWord#, wordToWord64# )
 #endif
 
@@ -232,9 +233,8 @@ mkSplitUniqSupply c
         (# s4, MkSplitUniqSupply (tag .|. u) x y #)
         }}}}
 
-#if defined(NO_FETCH_ADD)
--- GHC currently does not provide this operation on 32-bit platforms,
--- hence the CAS-based implementation.
+#if defined(javascript_HOST_ARCH)
+-- CAS-based pure Haskell implementation
 fetchAddWord64Addr# :: Addr# -> Word64# -> State# RealWorld
                     -> (# State# RealWorld, Word64# #)
 fetchAddWord64Addr# = go
@@ -246,7 +246,35 @@ fetchAddWord64Addr# = go
             (# s2, res #)
               | 1# <- res `eqWord64#` n0 -> (# s2, n0 #)
               | otherwise -> go ptr inc s2
+
+#elif defined(NO_FETCH_ADD)
+
+-- atomic_inc64 is defined in compiler/cbits/genSym.c. This is of
+-- course not ideal, but we need to live with it for now given the
+-- current situation:
+-- 1. There's no Haskell primop fetchAddWord64Addr# on 32-bit
+--    platforms yet
+-- 2. The Cmm %fetch_add64 primop syntax is only present in ghc 9.8
+--    but we currently bootstrap from older ghc in our CI
+-- 3. The Cmm MO_AtomicRMW operation with 64-bit width is well
+--    supported on 32-bit platforms already, but the plumbing from
+--    either Haskell or Cmm doesn't work yet because of 1 or 2
+-- 4. There's hs_atomic_add64 in ghc-prim cbits that we ought to use,
+--    but it's only available on 32-bit starting from ghc 9.8
+-- 5. The pure Haskell implementation causes mysterious i386
+--    regression in unrelated ghc work that can only be fixed by the C
+--    version here
+
+foreign import ccall unsafe "atomic_inc64" atomic_inc64 :: Addr# -> Word64# -> IO Word64
+
+fetchAddWord64Addr# :: Addr# -> Word64# -> State# RealWorld
+                    -> (# State# RealWorld, Word64# #)
+fetchAddWord64Addr# addr inc s0 =
+    case unIO (atomic_inc64 addr inc) s0 of
+      (# s1, W64# res #) -> (# s1, res #)
+
 #else
+
 fetchAddWord64Addr# :: Addr# -> Word64# -> State# RealWorld
                     -> (# State# RealWorld, Word64# #)
 fetchAddWord64Addr# addr inc s0 =
diff --git a/compiler/cbits/genSym.c b/compiler/cbits/genSym.c
index de7dd8409419..e307432d9960 100644
--- a/compiler/cbits/genSym.c
+++ b/compiler/cbits/genSym.c
@@ -16,3 +16,14 @@ HsWord64 ghc_unique_counter64 = 0;
 HsInt ghc_unique_inc     = 1;
 #endif
 
+// Only used on 32-bit non-JS platforms
+#if WORD_SIZE_IN_BITS != 64
+StgWord64 atomic_inc64(StgWord64 volatile* p, StgWord64 incr)
+{
+#if defined(HAVE_C11_ATOMICS)
+    return __atomic_fetch_add(p, incr, __ATOMIC_SEQ_CST);
+#else
+    return __sync_fetch_and_add(p, incr);
+#endif
+}
+#endif
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index baf93cc38f28..a596cc33d171 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -927,7 +927,6 @@ extern char **environ;
       SymI_HasProto(getUserEra)                                         \
       SymI_HasProto(requestHeapCensus)                                  \
       SymI_HasProto(atomic_inc)                                         \
-      SymI_HasProto(atomic_inc64)                                       \
       SymI_HasProto(atomic_dec)                                         \
       SymI_HasProto(hs_spt_lookup)                                      \
       SymI_HasProto(hs_spt_insert)                                      \
diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h
index 33ad109fbf40..1b9995557324 100644
--- a/rts/include/stg/SMP.h
+++ b/rts/include/stg/SMP.h
@@ -81,16 +81,6 @@ EXTERN_INLINE StgWord cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n
  */
 EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord n);
 
-
-/*
- * Atomic 64-bit addition of by the provided quantity
- *
- * atomic_inc64(p, n) {
- *   return ((*p) += n);
- * }
- */
-EXTERN_INLINE StgWord64 atomic_inc64(StgWord64 volatile* p, StgWord64 n);
-
 /*
  * Atomic decrement
  *
@@ -523,16 +513,6 @@ atomic_inc(StgVolatilePtr p, StgWord incr)
 #endif
 }
 
-EXTERN_INLINE StgWord64
-atomic_inc64(StgWord64 volatile* p, StgWord64 incr)
-{
-#if defined(HAVE_C11_ATOMICS)
-    return __atomic_add_fetch(p, incr, __ATOMIC_SEQ_CST);
-#else
-    return __sync_add_and_fetch(p, incr);
-#endif
-}
-
 EXTERN_INLINE StgWord
 atomic_dec(StgVolatilePtr p)
 {
@@ -686,15 +666,6 @@ atomic_inc(StgVolatilePtr p, StgWord incr)
     return ((*p) += incr);
 }
 
-
-EXTERN_INLINE StgWord64 atomic_inc64(StgWord64 volatile* p, StgWord64 incr);
-EXTERN_INLINE StgWord64
-atomic_inc64(StgWord64 volatile* p, StgWord64 incr)
-{
-    return ((*p) += incr);
-}
-
-
 INLINE_HEADER StgWord
 atomic_dec(StgVolatilePtr p)
 {
-- 
GitLab