Commit bd41eb2a authored by Ben Gamari's avatar Ben Gamari Committed by Ben Gamari

LLVM: Implement atomic operations in terms of LLVM primitives

This fixes Trac #7883.

This adds proper support for,
  * `MO_AtomicRMW`
  * `MO_AtomicWrite`
  * `MO_CmpXChg`

Test Plan: Validate

Reviewers: rrnewton, austin

Subscribers: thomie

Differential Revision: https://phabricator.haskell.org/D1282

GHC Trac Issues: #7883
parent b29f20ed
......@@ -20,6 +20,9 @@ module Llvm (
LlvmBlocks, LlvmBlock(..), LlvmBlockId,
LlvmParamAttr(..), LlvmParameter,
-- * Atomic operations
LlvmAtomicOp(..),
-- * Fence synchronization
LlvmSyncOrdering(..),
......
......@@ -87,6 +87,22 @@ data LlvmSyncOrdering
| SyncSeqCst
deriving (Show, Eq)
-- | LLVM atomic operations. Please see the @atomicrmw@ instruction in
-- the LLVM documentation for a complete description.
data LlvmAtomicOp
= LAO_Xchg
| LAO_Add
| LAO_Sub
| LAO_And
| LAO_Nand
| LAO_Or
| LAO_Xor
| LAO_Max
| LAO_Min
| LAO_Umax
| LAO_Umin
deriving (Show, Eq)
-- | Llvm Statements
data LlvmStatement
{- |
......@@ -258,6 +274,28 @@ data LlvmExpression
-}
| Cast LlvmCastOp LlvmVar LlvmType
{- |
Atomic read-modify-write operation
* op: Atomic operation
* addr: Address to modify
* operand: Operand to operation
* ordering: Ordering requirement
-}
| AtomicRMW LlvmAtomicOp LlvmVar LlvmVar LlvmSyncOrdering
{- |
Compare-and-exchange operation
* addr: Address to modify
* old: Expected value
* new: New value
* suc_ord: Ordering required in success case
* fail_ord: Ordering required in failure case, can be no stronger than
suc_ord
Result is an @i1@, true if store was successful.
-}
| CmpXChg LlvmVar LlvmVar LlvmVar LlvmSyncOrdering LlvmSyncOrdering
{- |
Call a function. The result is the value of the expression.
* tailJumps: CallType to signal if the function should be tail called
......
......@@ -245,6 +245,8 @@ ppLlvmExpression expr
Load ptr -> ppLoad ptr
ALoad ord st ptr -> ppALoad ord st ptr
Malloc tp amount -> ppMalloc tp amount
AtomicRMW aop tgt src ordering -> ppAtomicRMW aop tgt src ordering
CmpXChg addr old new s_ord f_ord -> ppCmpXChg addr old new s_ord f_ord
Phi tp precessors -> ppPhi tp precessors
Asm asm c ty v se sk -> ppAsm asm c ty v se sk
MExpr meta expr -> ppMetaExpr meta expr
......@@ -327,6 +329,30 @@ ppSyncOrdering SyncRelease = text "release"
ppSyncOrdering SyncAcqRel = text "acq_rel"
ppSyncOrdering SyncSeqCst = text "seq_cst"
ppAtomicOp :: LlvmAtomicOp -> SDoc
ppAtomicOp LAO_Xchg = text "xchg"
ppAtomicOp LAO_Add = text "add"
ppAtomicOp LAO_Sub = text "sub"
ppAtomicOp LAO_And = text "and"
ppAtomicOp LAO_Nand = text "nand"
ppAtomicOp LAO_Or = text "or"
ppAtomicOp LAO_Xor = text "xor"
ppAtomicOp LAO_Max = text "max"
ppAtomicOp LAO_Min = text "min"
ppAtomicOp LAO_Umax = text "umax"
ppAtomicOp LAO_Umin = text "umin"
ppAtomicRMW :: LlvmAtomicOp -> LlvmVar -> LlvmVar -> LlvmSyncOrdering -> SDoc
ppAtomicRMW aop tgt src ordering =
text "atomicrmw" <+> ppAtomicOp aop <+> ppr tgt <> comma
<+> ppr src <+> ppSyncOrdering ordering
ppCmpXChg :: LlvmVar -> LlvmVar -> LlvmVar
-> LlvmSyncOrdering -> LlvmSyncOrdering -> SDoc
ppCmpXChg addr old new s_ord f_ord =
text "cmpxchg" <+> ppr addr <> comma <+> ppr old <> comma <+> ppr new
<+> ppSyncOrdering s_ord <+> ppSyncOrdering f_ord
-- XXX: On x86, vector types need to be 16-byte aligned for aligned access, but
-- we have no way of guaranteeing that this is true with GHC (we would need to
-- modify the layout of the stack and closures, change the storage manager,
......
......@@ -262,7 +262,7 @@ pLift LMVoid = error "Voids are unliftable"
pLift LMMetadata = error "Metadatas are unliftable"
pLift x = LMPointer x
-- | Lower a variable of 'LMPointer' type.
-- | Lift a variable to 'LMPointer' type.
pVarLift :: LlvmVar -> LlvmVar
pVarLift (LMGlobalVar s t l x a c) = LMGlobalVar s (pLift t) l x a c
pVarLift (LMLocalVar s t ) = LMLocalVar s (pLift t)
......
......@@ -15,7 +15,6 @@ import BlockId
import CodeGen.Platform ( activeStgRegs, callerSaves )
import CLabel
import Cmm
import CPrim
import PprCmm
import CmmUtils
import CmmSwitch
......@@ -226,16 +225,58 @@ genCall t@(PrimTarget (MO_Ctz w)) dsts args =
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
(addrVar, stmts1, decls1) <- exprToVar addr
(nVar, stmts2, decls2) <- exprToVar n
let targetTy = widthToLlvmInt width
ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
(ptrVar, stmt3) <- doExpr (pLift targetTy) ptrExpr
dstVar <- getCmmReg (CmmLocal dst)
let op = case amop of
AMO_Add -> LAO_Add
AMO_Sub -> LAO_Sub
AMO_And -> LAO_And
AMO_Nand -> LAO_Nand
AMO_Or -> LAO_Or
AMO_Xor -> LAO_Xor
(retVar, stmt4) <- doExpr targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
let stmt5 = Store retVar dstVar
let stmts = stmts1 `appOL` stmts2 `snocOL`
stmt3 `snocOL` stmt4 `snocOL` stmt5
return (stmts, decls1++decls2)
genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
dstV <- getCmmReg (CmmLocal dst)
(v1, stmts, top) <- genLoad True addr (localRegType dst)
let stmt1 = Store v1 dstV
return (stmts `snocOL` stmt1, top)
-- TODO: implement these properly rather than calling to RTS functions.
-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined
-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined
-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined
genCall (PrimTarget (MO_Cmpxchg _width)) [dst] [addr, old, new] = do
(addrVar, stmts1, decls1) <- exprToVar addr
(oldVar, stmts2, decls2) <- exprToVar old
(newVar, stmts3, decls3) <- exprToVar new
let targetTy = getVarType oldVar
ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
(ptrVar, stmt4) <- doExpr (pLift targetTy) ptrExpr
dstVar <- getCmmReg (CmmLocal dst)
(retVar, stmt5) <- doExpr (LMStructU [targetTy,i1])
$ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
(retVar', stmt6) <- doExpr targetTy $ ExtractV retVar 0
let stmt7 = Store retVar' dstVar
stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL`
stmt4 `snocOL` stmt5 `snocOL` stmt6 `snocOL` stmt7
return (stmts, decls1 ++ decls2 ++ decls3)
genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = do
(addrVar, stmts1, decls1) <- exprToVar addr
(valVar, stmts2, decls2) <- exprToVar val
let ptrTy = pLift $ getVarType valVar
ptrExpr = Cast LM_Inttoptr addrVar ptrTy
(ptrVar, stmt3) <- doExpr ptrTy ptrExpr
let stmts4 = unitOL $ Expr
$ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst
stmts = stmts1 `appOL` stmts2 `snocOL` stmt3 `appOL` stmts4
return (stmts, decls1++decls2)
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
......@@ -715,10 +756,9 @@ cmmPrimOpFunctions mop = do
MO_UF_Conv _ -> unsupported
MO_AtomicRead _ -> unsupported
MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
MO_AtomicRMW _ _ -> unsupported
MO_AtomicWrite _ -> unsupported
MO_Cmpxchg _ -> unsupported
-- | Tail function calls
genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment