diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs index 0e7a267afbd9d023512b2a327e0163a39114d6d4..81abe1a5d7b34a50b0c9fb439a8a5fc60889347a 100644 --- a/compiler/GHC/Cmm/MachOp.hs +++ b/compiler/GHC/Cmm/MachOp.hs @@ -727,6 +727,7 @@ data CallishMachOp | MO_AcquireFence | MO_ReleaseFence + | MO_SeqCstFence -- | Atomic read-modify-write. Arguments are @[dest, n]@. | MO_AtomicRMW Width AtomicMachOp diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 07847af274277ac6215bb94fe78343421970d7fa..18c34896cd052ba0f18d158a6884f690e4c314b7 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1133,6 +1133,8 @@ callishMachOps platform = listToUFM $ -- with an overlapping token ('acquire') in the lexer. ( "fence_acquire", (MO_AcquireFence,)), ( "fence_release", (MO_ReleaseFence,)), + ( "fence_seq_cst", (MO_SeqCstFence,)), + ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), ( "memset", memcpyLikeTweakArgs MO_Memset ), ( "memmove", memcpyLikeTweakArgs MO_Memmove ), diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index b922195404ed8c3fd2e09384c33dad6582d09b1b..65c1098202dda3637a5a1c6de3ef53adef8a4eac 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -1730,6 +1730,7 @@ genCCall target dest_regs arg_regs bid = do -- Memory Ordering MO_AcquireFence -> return (unitOL DMBISH, Nothing) MO_ReleaseFence -> return (unitOL DMBISH, Nothing) + MO_SeqCstFence -> return (unitOL DMBISH, Nothing) MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers) -- Prefetch MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint. diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index 9082616bead42f10a06c3d2452340bb8d007fbc4..3877e33e5520c5a04a170452ebddd9ad6c1d193f 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -1132,6 +1132,8 @@ genCCall (PrimTarget MO_AcquireFence) _ _ = return $ unitOL LWSYNC genCCall (PrimTarget MO_ReleaseFence) _ _ = return $ unitOL LWSYNC +genCCall (PrimTarget MO_SeqCstFence) _ _ + = return $ unitOL HWSYNC genCCall (PrimTarget MO_Touch) _ _ = return $ nilOL @@ -2098,6 +2100,7 @@ genCCall' config gcp target dest_regs args MO_U_Mul2 {} -> unsupported MO_AcquireFence -> unsupported MO_ReleaseFence -> unsupported + MO_SeqCstFence -> unsupported MO_Touch -> unsupported MO_Prefetch_Data _ -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop diff --git a/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs b/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs index d1f4d203f26ccaad0d1cc8373e502d69da5ea5ce..92e96ab83bf28224a3dd89c77ae1b633f769e8dc 100644 --- a/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs +++ b/compiler/GHC/CmmToAsm/Wasm/FromCmm.hs @@ -1189,6 +1189,7 @@ lower_CallishMachOp lbl MO_F32_Sqrt rs xs = lower_CMO_Un_Homo lbl "sqrtf" rs xs lower_CallishMachOp lbl (MO_UF_Conv w0) rs xs = lower_MO_UF_Conv lbl w0 rs xs lower_CallishMachOp _ MO_AcquireFence _ _ = pure $ WasmStatements WasmNop lower_CallishMachOp _ MO_ReleaseFence _ _ = pure $ WasmStatements WasmNop +lower_CallishMachOp _ MO_SeqCstFence _ _ = pure $ WasmStatements WasmNop lower_CallishMachOp _ MO_Touch _ _ = pure $ WasmStatements WasmNop lower_CallishMachOp _ (MO_Prefetch_Data {}) _ _ = pure $ WasmStatements WasmNop lower_CallishMachOp lbl (MO_Memcpy {}) [] xs = do diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index afbc72f97236b73d7044f3e2e39820bbb0aa2e50..a1744566addce84d2f50258ad02fa8b61aa71afa 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -2403,6 +2403,7 @@ genSimplePrim bid (MO_Memcmp align) [res] [dst,src,n] = genMemCmp bid a genSimplePrim bid (MO_Memset align) [] [dst,c,n] = genMemSet bid align dst c n genSimplePrim _ MO_AcquireFence [] [] = return nilOL -- barriers compile to no code on x86/x86-64; genSimplePrim _ MO_ReleaseFence [] [] = return nilOL -- we keep it this long in order to prevent earlier optimisations. +genSimplePrim _ MO_SeqCstFence [] [] = return $ unitOL MFENCE genSimplePrim _ MO_Touch [] [_] = return nilOL genSimplePrim _ (MO_Prefetch_Data n) [] [src] = genPrefetchData n src genSimplePrim _ (MO_BSwap width) [dst] [src] = genByteSwap width dst src @@ -4667,4 +4668,3 @@ genPred64 cond dst x y = do , SETCC cond (OpReg dst_r) , MOVZxL II8 (OpReg dst_r) (OpReg dst_r) ] - diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs index e848dbca0aec5a17fa2f2c8911a0a63ec32ced51..e646ef1e709b7f30b49f9cdf4f89c1d69f30252c 100644 --- a/compiler/GHC/CmmToC.hs +++ b/compiler/GHC/CmmToC.hs @@ -265,6 +265,8 @@ pprStmt platform stmt = text "__atomic_thread_fence(__ATOMIC_RELEASE);" CmmUnsafeForeignCall (PrimTarget MO_AcquireFence) [] [] -> text "__atomic_thread_fence(__ATOMIC_ACQUIRE);" + CmmUnsafeForeignCall (PrimTarget MO_SeqCstFence) [] [] -> + text "__atomic_thread_fence(__ATOMIC_SEQ_CST);" CmmUnsafeForeignCall target@(PrimTarget op) results args -> fn_call @@ -959,6 +961,7 @@ pprCallishMachOp_for_C mop MO_F32_Fabs -> text "fabsf" MO_AcquireFence -> unsupported MO_ReleaseFence -> unsupported + MO_SeqCstFence -> unsupported MO_Memcpy _ -> text "__builtin_memcpy" MO_Memset _ -> text "__builtin_memset" MO_Memmove _ -> text "__builtin_memmove" diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs index 168437ae9bf68545a1558b6c3d1506961930626e..6d94f2c90506c89a3d77518b7ef0f8c435c63b7b 100644 --- a/compiler/GHC/CmmToLlvm/CodeGen.hs +++ b/compiler/GHC/CmmToLlvm/CodeGen.hs @@ -180,6 +180,8 @@ genCall (PrimTarget MO_AcquireFence) _ _ = runStmtsDecls $ statement $ Fence False SyncAcquire genCall (PrimTarget MO_ReleaseFence) _ _ = runStmtsDecls $ statement $ Fence False SyncRelease +genCall (PrimTarget MO_SeqCstFence) _ _ = runStmtsDecls $ + statement $ Fence False SyncSeqCst genCall (PrimTarget MO_Touch) _ _ = return (nilOL, []) @@ -992,8 +994,11 @@ cmmPrimOpFunctions mop = do -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the -- appropriate case of genCall. MO_U_Mul2 {} -> unsupported + MO_ReleaseFence -> unsupported MO_AcquireFence -> unsupported + MO_SeqCstFence -> unsupported + MO_Touch -> unsupported MO_UF_Conv _ -> unsupported diff --git a/rts/include/Cmm.h b/rts/include/Cmm.h index 598b5de8a00ab59dfc706f7b64d47f1e506651a2..780a6eb8f1fac9f65dcefd1cc3f3757193d69079 100644 --- a/rts/include/Cmm.h +++ b/rts/include/Cmm.h @@ -696,6 +696,7 @@ // See Note [ThreadSanitizer and fences] #define RELEASE_FENCE prim %fence_release(); #define ACQUIRE_FENCE prim %fence_acquire(); +#define SEQ_CST_FENCE prim %fence_seq_cst(); #if TSAN_ENABLED // This is may be efficient than a fence but TSAN can reason about it.