Commit d8abf85f authored by tibbe's avatar tibbe

Add more primops for atomic ops on byte arrays

Summary:
Add more primops for atomic ops on byte arrays

Adds the following primops:

 * atomicReadIntArray#
 * atomicWriteIntArray#
 * fetchSubIntArray#
 * fetchOrIntArray#
 * fetchXorIntArray#
 * fetchAndIntArray#

Makes these pre-existing out-of-line primops inline:

 * fetchAddIntArray#
 * casIntArray#
parent a4a79b5a
......@@ -19,6 +19,9 @@ module CmmMachOp
-- CallishMachOp
, CallishMachOp(..), callishMachOpHints
, pprCallishMachOp
-- Atomic read-modify-write
, AtomicMachOp(..)
)
where
......@@ -547,8 +550,24 @@ data CallishMachOp
| MO_PopCnt Width
| MO_BSwap Width
-- Atomic read-modify-write.
| MO_AtomicRMW Width AtomicMachOp
| MO_AtomicRead Width
| MO_AtomicWrite Width
| MO_Cmpxchg Width
deriving (Eq, Show)
-- | The operation to perform atomically.
data AtomicMachOp =
AMO_Add
| AMO_Sub
| AMO_And
| AMO_Nand
| AMO_Or
| AMO_Xor
deriving (Eq, Show)
pprCallishMachOp :: CallishMachOp -> SDoc
pprCallishMachOp mo = text (show mo)
......
......@@ -650,6 +650,10 @@ data AbsMem
-- perhaps we ought to have a special annotation for calls that can
-- modify heap/stack memory. For now we just use the conservative
-- definition here.
--
-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and
-- therefore we should never float any memory operations across one of
-- these calls.
bothMems :: AbsMem -> AbsMem -> AbsMem
......
......@@ -753,6 +753,10 @@ pprCallishMachOp_for_C mop
MO_Memmove -> ptext (sLit "memmove")
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop)
(MO_Cmpxchg w) -> ptext (sLit $ cmpxchgLabel w)
(MO_AtomicRead w) -> ptext (sLit $ atomicReadLabel w)
(MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w)
(MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w)
MO_S_QuotRem {} -> unsupported
......
......@@ -769,6 +769,25 @@ emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res
emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
-- Atomic read-modify-write
emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
doAtomicRMW res AMO_Add mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
doAtomicRMW res AMO_And mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
doAtomicRMW res AMO_Or mba ix (bWord dflags) n
emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
doAtomicReadByteArray res mba ix (bWord dflags)
emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
doAtomicWriteByteArray mba ix (bWord dflags) val
emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
doCasByteArray res mba ix (bWord dflags) old new
-- The rest just translate straightforwardly
emitPrimOp dflags [res] op [arg]
......@@ -1932,6 +1951,81 @@ doWriteSmallPtrArrayOp addr idx val = do
mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
------------------------------------------------------------------------------
-- Atomic read-modify-write
-- | Emit an atomic modification to a byte array element. The result
-- reg contains that previous value of the element. Implies a full
-- memory barrier.
doAtomicRMW :: LocalReg -- ^ Result reg
-> AtomicMachOp -- ^ Atomic op (e.g. add)
-> CmmExpr -- ^ MutableByteArray#
-> CmmExpr -- ^ Index
-> CmmType -- ^ Type of element by which we are indexing
-> CmmExpr -- ^ Op argument (e.g. amount to add)
-> FCode ()
doAtomicRMW res amop mba idx idx_ty n = do
dflags <- getDynFlags
let width = typeWidth idx_ty
addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
width mba idx
emitPrimCall
[ res ]
(MO_AtomicRMW width amop)
[ addr, n ]
-- | Emit an atomic read to a byte array that acts as a memory barrier.
doAtomicReadByteArray
:: LocalReg -- ^ Result reg
-> CmmExpr -- ^ MutableByteArray#
-> CmmExpr -- ^ Index
-> CmmType -- ^ Type of element by which we are indexing
-> FCode ()
doAtomicReadByteArray res mba idx idx_ty = do
dflags <- getDynFlags
let width = typeWidth idx_ty
addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
width mba idx
emitPrimCall
[ res ]
(MO_AtomicRead width)
[ addr ]
-- | Emit an atomic write to a byte array that acts as a memory barrier.
doAtomicWriteByteArray
:: CmmExpr -- ^ MutableByteArray#
-> CmmExpr -- ^ Index
-> CmmType -- ^ Type of element by which we are indexing
-> CmmExpr -- ^ Value to write
-> FCode ()
doAtomicWriteByteArray mba idx idx_ty val = do
dflags <- getDynFlags
let width = typeWidth idx_ty
addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
width mba idx
emitPrimCall
[ {- no results -} ]
(MO_AtomicWrite width)
[ addr, val ]
doCasByteArray
:: LocalReg -- ^ Result reg
-> CmmExpr -- ^ MutableByteArray#
-> CmmExpr -- ^ Index
-> CmmType -- ^ Type of element by which we are indexing
-> CmmExpr -- ^ Old value
-> CmmExpr -- ^ New value
-> FCode ()
doCasByteArray res mba idx idx_ty old new = do
dflags <- getDynFlags
let width = (typeWidth idx_ty)
addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
width mba idx
emitPrimCall
[ res ]
(MO_Cmpxchg width)
[ addr, old, new ]
------------------------------------------------------------------------------
-- Helpers for emitting function calls
......
......@@ -65,6 +65,8 @@ data LlvmFunction = LlvmFunction {
type LlvmFunctions = [LlvmFunction]
type SingleThreaded = Bool
-- | LLVM ordering types for synchronization purposes. (Introduced in LLVM
-- 3.0). Please see the LLVM documentation for a better description.
data LlvmSyncOrdering
......@@ -223,6 +225,11 @@ data LlvmExpression
-}
| Load LlvmVar
{- |
Atomic load of the value at location ptr
-}
| ALoad LlvmSyncOrdering SingleThreaded LlvmVar
{- |
Navigate in an structure, selecting elements
* inbound: Is the pointer inbounds? (computed pointer doesn't overflow)
......
......@@ -239,6 +239,7 @@ ppLlvmExpression expr
Insert vec elt idx -> ppInsert vec elt idx
GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes
Load ptr -> ppLoad ptr
ALoad ord st ptr -> ppALoad ord st ptr
Malloc tp amount -> ppMalloc tp amount
Phi tp precessors -> ppPhi tp precessors
Asm asm c ty v se sk -> ppAsm asm c ty v se sk
......@@ -327,13 +328,18 @@ ppSyncOrdering SyncSeqCst = text "seq_cst"
-- of specifying alignment.
ppLoad :: LlvmVar -> SDoc
ppLoad var
| isVecPtrVar var = text "load" <+> ppr var <>
comma <+> text "align 1"
| otherwise = text "load" <+> ppr var
ppLoad var = text "load" <+> ppr var <> align
where
isVecPtrVar :: LlvmVar -> Bool
isVecPtrVar = isVector . pLower . getVarType
align | isVector . pLower . getVarType $ var = text ", align 1"
| otherwise = empty
ppALoad :: LlvmSyncOrdering -> SingleThreaded -> LlvmVar -> SDoc
ppALoad ord st var = sdocWithDynFlags $ \dflags ->
let alignment = (llvmWidthInBits dflags $ getVarType var) `quot` 8
align = text ", align" <+> ppr alignment
sThreaded | st = text " singlethread"
| otherwise = empty
in text "load atomic" <+> ppr var <> sThreaded <+> ppSyncOrdering ord <> align
ppStore :: LlvmVar -> LlvmVar -> SDoc
ppStore val dst
......
......@@ -15,6 +15,7 @@ import BlockId
import CodeGen.Platform ( activeStgRegs, callerSaves )
import CLabel
import Cmm
import CPrim
import PprCmm
import CmmUtils
import Hoopl
......@@ -32,6 +33,7 @@ import Unique
import Data.List ( nub )
import Data.Maybe ( catMaybes )
type Atomic = Bool
type LlvmStatements = OrdList LlvmStatement
-- -----------------------------------------------------------------------------
......@@ -228,6 +230,17 @@ genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
dstV <- getCmmReg (CmmLocal dst)
(v1, stmts, top) <- genLoad True addr (localRegType dst)
let stmt1 = Store v1 dstV
return (stmts `snocOL` stmt1, top)
-- TODO: implement these properly rather than calling to RTS functions.
-- genCall t@(PrimTarget (MO_AtomicWrite width)) [] [addr, val] = undefined
-- genCall t@(PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = undefined
-- genCall t@(PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = undefined
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
genCall t@(PrimTarget op) [] args'
......@@ -548,7 +561,6 @@ cmmPrimOpFunctions mop = do
(MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
......@@ -558,6 +570,12 @@ cmmPrimOpFunctions mop = do
MO_Touch -> unsupported
MO_UF_Conv _ -> unsupported
MO_AtomicRead _ -> unsupported
MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
-- | Tail function calls
genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
......@@ -849,7 +867,7 @@ exprToVarOpt opt e = case e of
-> genLit opt lit
CmmLoad e' ty
-> genLoad e' ty
-> genLoad False e' ty
-- Cmmreg in expression is the value, so must load. If you want actual
-- reg pointer, call getCmmReg directly.
......@@ -1268,41 +1286,41 @@ genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!"
-- | Handle CmmLoad expression.
genLoad :: CmmExpr -> CmmType -> LlvmM ExprData
genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
-- First we try to detect a few common cases and produce better code for
-- these then the default case. We are mostly trying to detect Cmm code
-- like I32[Sp + n] and use 'getelementptr' operations instead of the
-- generic case that uses casts and pointer arithmetic
genLoad e@(CmmReg (CmmGlobal r)) ty
= genLoad_fast e r 0 ty
genLoad atomic e@(CmmReg (CmmGlobal r)) ty
= genLoad_fast atomic e r 0 ty
genLoad e@(CmmRegOff (CmmGlobal r) n) ty
= genLoad_fast e r n ty
genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
= genLoad_fast atomic e r n ty
genLoad e@(CmmMachOp (MO_Add _) [
genLoad atomic e@(CmmMachOp (MO_Add _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
ty
= genLoad_fast e r (fromInteger n) ty
= genLoad_fast atomic e r (fromInteger n) ty
genLoad e@(CmmMachOp (MO_Sub _) [
genLoad atomic e@(CmmMachOp (MO_Sub _) [
(CmmReg (CmmGlobal r)),
(CmmLit (CmmInt n _))])
ty
= genLoad_fast e r (negate $ fromInteger n) ty
= genLoad_fast atomic e r (negate $ fromInteger n) ty
-- generic case
genLoad e ty
genLoad atomic e ty
= do other <- getTBAAMeta otherN
genLoad_slow e ty other
genLoad_slow atomic e ty other
-- | Handle CmmLoad expression.
-- This is a special case for loading from a global register pointer
-- offset such as I32[Sp+8].
genLoad_fast :: CmmExpr -> GlobalReg -> Int -> CmmType
-> LlvmM ExprData
genLoad_fast e r n ty = do
genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
-> LlvmM ExprData
genLoad_fast atomic e r n ty = do
dflags <- getDynFlags
(gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
meta <- getTBAARegMeta r
......@@ -1315,7 +1333,7 @@ genLoad_fast e r n ty = do
case grt == ty' of
-- were fine
True -> do
(var, s3) <- doExpr ty' (MExpr meta $ Load ptr)
(var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
return (var, s1 `snocOL` s2 `snocOL` s3,
[])
......@@ -1323,32 +1341,34 @@ genLoad_fast e r n ty = do
False -> do
let pty = pLift ty'
(ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
(var, s4) <- doExpr ty' (MExpr meta $ Load ptr')
(var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
return (var, s1 `snocOL` s2 `snocOL` s3
`snocOL` s4, [])
-- If its a bit type then we use the slow method since
-- we can't avoid casting anyway.
False -> genLoad_slow e ty meta
False -> genLoad_slow atomic e ty meta
where
loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
| otherwise = Load ptr
-- | Handle Cmm load expression.
-- Generic case. Uses casts and pointer arithmetic if needed.
genLoad_slow :: CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
genLoad_slow e ty meta = do
genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
genLoad_slow atomic e ty meta = do
(iptr, stmts, tops) <- exprToVar e
dflags <- getDynFlags
case getVarType iptr of
LMPointer _ -> do
(dvar, load) <- doExpr (cmmToLlvmType ty)
(MExpr meta $ Load iptr)
(MExpr meta $ loadInstr iptr)
return (dvar, stmts `snocOL` load, tops)
i@(LMInt _) | i == llvmWord dflags -> do
let pty = LMPointer $ cmmToLlvmType ty
(ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty
(dvar, load) <- doExpr (cmmToLlvmType ty)
(MExpr meta $ Load ptr)
(MExpr meta $ loadInstr ptr)
return (dvar, stmts `snocOL` cast `snocOL` load, tops)
other -> do dflags <- getDynFlags
......@@ -1357,6 +1377,9 @@ genLoad_slow e ty meta = do
"Size of Ptr: " ++ show (llvmPtrBits dflags) ++
", Size of var: " ++ show (llvmWidthInBits dflags other) ++
", Var: " ++ showSDoc dflags (ppr iptr)))
where
loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
| otherwise = Load ptr
-- | Handle CmmReg expression. This will return a pointer to the stack
......
-- | Generating C symbol names emitted by the compiler.
module CPrim
( popCntLabel
( atomicReadLabel
, atomicWriteLabel
, atomicRMWLabel
, cmpxchgLabel
, popCntLabel
, bSwapLabel
, word2FloatLabel
) where
import CmmType
import CmmMachOp
import Outputable
popCntLabel :: Width -> String
......@@ -31,3 +36,46 @@ word2FloatLabel w = "hs_word2float" ++ pprWidth w
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "word2FloatLabel: Unsupported word width " (ppr w)
atomicRMWLabel :: Width -> AtomicMachOp -> String
atomicRMWLabel w amop = "hs_atomic_" ++ pprFunName amop ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "atomicRMWLabel: Unsupported word width " (ppr w)
pprFunName AMO_Add = "add"
pprFunName AMO_Sub = "sub"
pprFunName AMO_And = "and"
pprFunName AMO_Nand = "nand"
pprFunName AMO_Or = "or"
pprFunName AMO_Xor = "xor"
cmpxchgLabel :: Width -> String
cmpxchgLabel w = "hs_cmpxchg" ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "cmpxchgLabel: Unsupported word width " (ppr w)
atomicReadLabel :: Width -> String
atomicReadLabel w = "hs_atomicread" ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "atomicReadLabel: Unsupported word width " (ppr w)
atomicWriteLabel :: Width -> String
atomicWriteLabel w = "hs_atomicwrite" ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "atomicWriteLabel: Unsupported word width " (ppr w)
......@@ -1160,6 +1160,10 @@ genCCall' dflags gcp target dest_regs args0
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
......
......@@ -654,6 +654,10 @@ outOfLineMachOp_table mop
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
MO_AtomicRead w -> fsLit $ atomicReadLabel w
MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
......
......@@ -1761,6 +1761,93 @@ genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
where
lbl = mkCmmCodeLabel primPackageId (fsLit (word2FloatLabel width))
genCCall dflags _ (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
Amode amode addr_code <- getAmode addr
arg <- getNewRegNat size
arg_code <- getAnyReg n
use_sse2 <- sse2Enabled
let platform = targetPlatform dflags
dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
code <- op_code dst_r arg amode
return $ addr_code `appOL` arg_code arg `appOL` code
where
-- Code for the operation
op_code :: Reg -- ^ The destination reg
-> Reg -- ^ Register containing argument
-> AddrMode -- ^ Address of location to mutate
-> NatM (OrdList Instr)
op_code dst_r arg amode = case amop of
-- In the common case where dst_r is a virtual register the
-- final move should go away, because it's the last use of arg
-- and the first use of dst_r.
AMO_Add -> return $ toOL [ LOCK
, XADD size (OpReg arg) (OpAddr amode)
, MOV size (OpReg arg) (OpReg dst_r)
]
AMO_Sub -> return $ toOL [ NEGI size (OpReg arg)
, LOCK
, XADD size (OpReg arg) (OpAddr amode)
, MOV size (OpReg arg) (OpReg dst_r)
]
AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND size src dst)
AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND size src dst
, NOT size dst
])
AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR size src dst)
AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR size src dst)
where
-- Simulate operation that lacks a dedicated instruction using
-- cmpxchg.
cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
-> NatM (OrdList Instr)
cmpxchg_code instrs = do
lbl <- getBlockIdNat
tmp <- getNewRegNat size
return $ toOL
[ MOV size (OpAddr amode) (OpReg eax)
, JXX ALWAYS lbl
, NEWBLOCK lbl
-- Keep old value so we can return it:
, MOV size (OpReg eax) (OpReg dst_r)
, MOV size (OpReg eax) (OpReg tmp)
]
`appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
[ LOCK
, CMPXCHG size (OpReg tmp) (OpAddr amode)
, JXX NE lbl
]
size = intSize width
genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
load_code <- intLoadCode (MOV (intSize width)) addr
let platform = targetPlatform dflags
use_sse2 <- sse2Enabled
return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
assignMem_IntCode (intSize width) addr val
genCCall dflags _ (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
Amode amode addr_code <- getAmode addr
newval <- getNewRegNat size
newval_code <- getAnyReg new
oldval <- getNewRegNat size
oldval_code <- getAnyReg old
use_sse2 <- sse2Enabled
let platform = targetPlatform dflags
dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
code = toOL
[ MOV size (OpReg oldval) (OpReg eax)
, LOCK
, CMPXCHG size (OpReg newval) (OpAddr amode)
, MOV size (OpReg eax) (OpReg dst_r)
]
return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
`appOL` code
where
size = intSize width
genCCall _ is32Bit target dest_regs args
| is32Bit = genCCall32 target dest_regs args
| otherwise = genCCall64 target dest_regs args
......@@ -2385,6 +2472,11 @@ outOfLineCmmOp mop res args
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
MO_AtomicRMW _ _ -> fsLit "atomicrmw"
MO_AtomicRead _ -> fsLit "atomicread"
MO_AtomicWrite _ -> fsLit "atomicwrite"
MO_Cmpxchg _ -> fsLit "cmpxchg"
MO_UF_Conv _ -> unsupported
MO_S_QuotRem {} -> unsupported
......
......@@ -327,6 +327,10 @@ data Instr
| PREFETCH PrefetchVariant Size Operand -- prefetch Variant, addr size, address to prefetch
-- variant can be NTA, Lvl0, Lvl1, or Lvl2
| LOCK -- lock prefix
| XADD Size Operand Operand -- src (r), dst (r/m)
| CMPXCHG Size Operand Operand -- src (r), dst (r/m), eax implicit
data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
......@@ -337,6 +341,8 @@ data Operand
-- | Returns which registers are read and written as a (read, written)
-- pair.
x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
x86_regUsageOfInstr platform instr
= case instr of
......@@ -428,10 +434,21 @@ x86_regUsageOfInstr platform instr
-- note: might be a better way to do this
PREFETCH _ _ src -> mkRU (use_R src []) []
LOCK -> noUsage
XADD _ src dst -> usageMM src dst
CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
_other -> panic "regUsage: unrecognised instr"