Commit a0d158fd authored by Ben Gamari's avatar Ben Gamari 🐢

Encode alignment in MO_Memcpy and friends

Summary:
Alignment needs to be a compile-time constant. Previously the code
generators had to jump through hoops to ensure this was the case as the
alignment was passed as a CmmExpr in the arguments list. Now we take
care of this up front.

This fixes #8131.
Authored-by: rwbarton's avatarReid Barton <rwbarton@gmail.com>
Dusted-off-by: Ben Gamari's avatarBen Gamari <ben@smart-cactus.org>

Tests for T8131

Test Plan: Validate

Reviewers: rwbarton, austin

Reviewed By: rwbarton, austin

Subscribers: bgamari, carter, thomie

Differential Revision: https://phabricator.haskell.org/D624

GHC Trac Issues: #8131
parents d46fdf25 681973c3
......@@ -21,6 +21,7 @@ module CmmMachOp
-- CallishMachOp
, CallishMachOp(..), callishMachOpHints
, pprCallishMachOp
, machOpMemcpyishAlign
-- Atomic read-modify-write
, AtomicMachOp(..)
......@@ -565,12 +566,12 @@ data CallishMachOp
-- would the majority of use cases in ghc anyways
-- Note that these three MachOps all take 1 extra parameter than the
-- standard C lib versions. The extra (last) parameter contains
-- alignment of the pointers. Used for optimisation in backends.
| MO_Memcpy
| MO_Memset
| MO_Memmove
-- These three MachOps are parameterised by the known alignment
-- of the destination and source (for memcpy/memmove) pointers.
-- This information may be used for optimisation in backends.
| MO_Memcpy Int
| MO_Memset Int
| MO_Memmove Int
| MO_PopCnt Width
| MO_Clz Width
......@@ -600,8 +601,16 @@ pprCallishMachOp mo = text (show mo)
callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
callishMachOpHints op = case op of
MO_Memcpy -> ([], [AddrHint,AddrHint,NoHint,NoHint])
MO_Memset -> ([], [AddrHint,NoHint,NoHint,NoHint])
MO_Memmove -> ([], [AddrHint,AddrHint,NoHint,NoHint])
_ -> ([],[])
MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint])
MO_Memset _ -> ([], [AddrHint,NoHint,NoHint])
MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
_ -> ([],[])
-- empty lists indicate NoHint
-- | The alignment of a 'memcpy'-ish operation.
machOpMemcpyishAlign :: CallishMachOp -> Maybe Int
machOpMemcpyishAlign op = case op of
MO_Memcpy align -> Just align
MO_Memset align -> Just align
MO_Memmove align -> Just align
_ -> Nothing
......@@ -975,22 +975,38 @@ machOps = listToUFM $
( "i2f64", flip MO_SF_Conv W64 )
]
callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr]))
callishMachOps = listToUFM $
map (\(x, y) -> (mkFastString x, y)) [
( "write_barrier", MO_WriteBarrier ),
( "memcpy", MO_Memcpy ),
( "memset", MO_Memset ),
( "memmove", MO_Memmove ),
( "write_barrier", (,) MO_WriteBarrier ),
( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
( "memset", memcpyLikeTweakArgs MO_Memset ),
( "memmove", memcpyLikeTweakArgs MO_Memmove ),
("prefetch0",MO_Prefetch_Data 0),
("prefetch1",MO_Prefetch_Data 1),
("prefetch2",MO_Prefetch_Data 2),
("prefetch3",MO_Prefetch_Data 3)
("prefetch0", (,) $ MO_Prefetch_Data 0),
("prefetch1", (,) $ MO_Prefetch_Data 1),
("prefetch2", (,) $ MO_Prefetch_Data 2),
("prefetch3", (,) $ MO_Prefetch_Data 3)
-- ToDo: the rest, maybe
-- edit: which rest?
-- also: how do we tell CMM Lint how to type check callish macops?
]
where
memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr])
memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument"
memcpyLikeTweakArgs op args@(_:_) =
-- Force alignment with result to ensure pprPgmError fires
align `seq` (op align, args')
where
args' = init args
align = case last args of
CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger
e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e)
-- The alignment of memcpy-ish operations must be a
-- compile-time constant. We verify this here, passing it around
-- in the MO_* constructor. In order to do this, however, we
-- must intercept the arguments in primCall.
parseSafety :: String -> P Safety
parseSafety "safe" = return PlaySafe
......@@ -1207,10 +1223,11 @@ primCall
primCall results_code name args_code
= case lookupUFM callishMachOps name of
Nothing -> fail ("unknown primitive " ++ unpackFS name)
Just p -> return $ do
Just f -> return $ do
results <- sequence results_code
args <- sequence args_code
code (emitPrimCall (map fst results) p args)
let (p, args') = f args
code (emitPrimCall (map fst results) p args')
doStore :: CmmType -> CmmParse CmmExpr -> CmmParse CmmExpr -> CmmParse ()
doStore rep addr_code val_code
......
......@@ -238,13 +238,13 @@ pprStmt stmt =
hargs = zip args arg_hints
fn_call
-- The mem primops carry an extra alignment arg, must drop it.
-- The mem primops carry an extra alignment arg.
-- We could maybe emit an alignment directive using this info.
-- We also need to cast mem primops to prevent conflicts with GCC
-- builtins (see bug #5967).
| op `elem` [MO_Memcpy, MO_Memset, MO_Memmove]
| Just _align <- machOpMemcpyishAlign op
= (ptext (sLit ";EF_(") <> fn <> char ')' <> semi) $$
pprForeignCall fn cconv hresults (init hargs)
pprForeignCall fn cconv hresults hargs
| otherwise
= pprCall fn cconv hresults hargs
......@@ -745,9 +745,9 @@ pprCallishMachOp_for_C mop
MO_F32_Exp -> ptext (sLit "expf")
MO_F32_Sqrt -> ptext (sLit "sqrtf")
MO_WriteBarrier -> ptext (sLit "write_barrier")
MO_Memcpy -> ptext (sLit "memcpy")
MO_Memset -> ptext (sLit "memset")
MO_Memmove -> ptext (sLit "memmove")
MO_Memcpy _ -> ptext (sLit "memcpy")
MO_Memset _ -> ptext (sLit "memset")
MO_Memmove _ -> ptext (sLit "memmove")
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_Clz w) -> ptext (sLit $ clzLabel w)
......
......@@ -1644,8 +1644,7 @@ doCopyByteArrayOp = emitCopyByteArray copy
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
emitMemcpyCall dst_p src_p bytes 1
-- | Takes a source 'MutableByteArray#', an offset in the source
-- array, a destination 'MutableByteArray#', an offset into the
......@@ -1662,8 +1661,8 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts [
getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
getCode $ emitMemmoveCall dst_p src_p bytes 1,
getCode $ emitMemcpyCall dst_p src_p bytes 1
]
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
......@@ -1685,7 +1684,7 @@ doCopyByteArrayToAddrOp src src_off dst_p bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags <- getDynFlags
src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
emitMemcpyCall dst_p src_p bytes 1
-- | Takes a source 'MutableByteArray#', an offset in the source array, a
-- destination 'Addr#', and the number of bytes to copy. Copies the given
......@@ -1702,7 +1701,7 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags <- getDynFlags
dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
emitMemcpyCall dst_p src_p bytes 1
-- ----------------------------------------------------------------------------
......@@ -1716,7 +1715,7 @@ doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
doSetByteArrayOp ba off len c
= do dflags <- getDynFlags
p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
emitMemsetCall p c len (mkIntExpr dflags 1)
emitMemsetCall p c len 1
-- ----------------------------------------------------------------------------
-- Allocating arrays
......@@ -1789,7 +1788,7 @@ doCopyArrayOp = emitCopyArray copy
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
-- | Takes a source 'MutableArray#', an offset in the source array, a
......@@ -1807,9 +1806,9 @@ doCopyMutableArrayOp = emitCopyArray copy
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts [
getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
(mkIntExpr dflags (wORD_SIZE dflags)),
(wORD_SIZE dflags),
getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
]
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
......@@ -1856,7 +1855,7 @@ doCopySmallArrayOp = emitCopySmallArray copy
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
......@@ -1870,9 +1869,9 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
dflags <- getDynFlags
[moveCall, cpyCall] <- forkAlts
[ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
, getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
]
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
......@@ -1937,7 +1936,7 @@ emitCloneArray info_p res_r src src_off n = do
(mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
......@@ -1974,7 +1973,7 @@ emitCloneSmallArray info_p res_r src src_off n = do
(mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
(mkIntExpr dflags (wORD_SIZE dflags))
(wORD_SIZE dflags)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
......@@ -1993,7 +1992,7 @@ emitSetCards dst_start dst_cards_start n = do
emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
(mkIntExpr dflags 1)
(cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
(mkIntExpr dflags 1) -- no alignment (1 byte)
1 -- no alignment (1 byte)
-- Convert an element index to a card index
cardCmm :: DynFlags -> CmmExpr -> CmmExpr
......@@ -2101,29 +2100,29 @@ doCasByteArray res mba idx idx_ty old new = do
-- Helpers for emitting function calls
-- | Emit a call to @memcpy@.
emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemcpyCall dst src n align = do
emitPrimCall
[ {-no results-} ]
MO_Memcpy
[ dst, src, n, align ]
(MO_Memcpy align)
[ dst, src, n ]
-- | Emit a call to @memmove@.
emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemmoveCall dst src n align = do
emitPrimCall
[ {- no results -} ]
MO_Memmove
[ dst, src, n, align ]
(MO_Memmove align)
[ dst, src, n ]
-- | Emit a call to @memset@. The second argument must fit inside an
-- unsigned char.
emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
emitMemsetCall dst c n align = do
emitPrimCall
[ {- no results -} ]
MO_Memset
[ dst, c, n, align ]
(MO_Memset align)
[ dst, c, n ]
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do
......
......@@ -24,7 +24,8 @@ import Hoopl
import DynFlags
import FastString
import ForeignCall
import Outputable
import Outputable hiding (panic, pprPanic)
import qualified Outputable
import Platform
import OrdList
import UniqSupply
......@@ -230,16 +231,13 @@ genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
genCall t@(PrimTarget op) [] args'
| op == MO_Memcpy ||
op == MO_Memset ||
op == MO_Memmove = do
genCall t@(PrimTarget op) [] args
| Just align <- machOpMemcpyishAlign op = do
dflags <- getDynFlags
let (args, alignVal) = splitAlignVal args'
isVolTy = [i1]
let isVolTy = [i1]
isVolVal = [mkIntLit i1 0]
argTy | op == MO_Memset = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
| otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
argTy | MO_Memset _ <- op = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
| otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
......@@ -250,21 +248,12 @@ genCall t@(PrimTarget op) [] args'
(argVars', stmts3) <- castVars $ zip argVars argTy
stmts4 <- getTrashStmts
let arguments = argVars' ++ (alignVal:isVolVal)
let alignVal = mkIntLit i32 align
arguments = argVars' ++ (alignVal:isVolVal)
call = Expr $ Call StdCall fptr arguments []
stmts = stmts1 `appOL` stmts2 `appOL` stmts3
`appOL` stmts4 `snocOL` call
return (stmts, top1 ++ top2)
where
splitAlignVal xs = (init xs, extractLit $ last xs)
-- Fix for trac #6158. Since LLVM 3.1, opt fails when given anything other
-- than a direct constant (i.e. 'i32 8') as the alignment argument for the
-- memcpy & co llvm intrinsic functions. So we handle this directly now.
extractLit (CmmLit (CmmInt i _)) = mkIntLit i32 i
extractLit _other = trace ("WARNING: Non constant alignment value given" ++
" for memcpy! Please report to GHC developers")
mkIntLit i32 0
-- Handle all other foreign calls and prim ops.
genCall target res args = do
......@@ -534,9 +523,9 @@ cmmPrimOpFunctions mop = do
MO_F64_Cosh -> fsLit "cosh"
MO_F64_Tanh -> fsLit "tanh"
MO_Memcpy -> fsLit $ "llvm.memcpy." ++ intrinTy1
MO_Memmove -> fsLit $ "llvm.memmove." ++ intrinTy1
MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2
MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
......@@ -1646,6 +1635,14 @@ toIWord :: Integral a => DynFlags -> a -> LlvmVar
toIWord dflags = mkIntLit (llvmWord dflags)
-- | Error functions
panic :: String -> a
panic s = Outputable.panic $ "LlvmCodeGen.CodeGen." ++ s
pprPanic :: String -> SDoc -> a
pprPanic s d = Outputable.pprPanic ("LlvmCodeGen.CodeGen." ++ s) d
-- | Returns TBAA meta data by unique
getTBAAMeta :: Unique -> LlvmM [MetaAnnot]
getTBAAMeta u = do
......
......@@ -923,7 +923,7 @@ genCCall' _ _ (PrimTarget MO_Touch) _ _
genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
genCCall' dflags gcp target dest_regs args0
genCCall' dflags gcp target dest_regs args
= ASSERT(not $ any (`elem` [II16]) $ map cmmTypeSize argReps)
-- we rely on argument promotion in the codeGen
do
......@@ -978,17 +978,7 @@ genCCall' dflags gcp target dest_regs args0
map (widthInBytes . typeWidth) argReps
GCPLinux -> roundTo 16 finalStack
-- need to remove alignment information
args | PrimTarget mop <- target,
(mop == MO_Memcpy ||
mop == MO_Memset ||
mop == MO_Memmove)
= init args0
| otherwise
= args0
argReps = map (cmmExprType dflags) args0
argReps = map (cmmExprType dflags) args
roundTo a x | x `mod` a == 0 = x
| otherwise = x + a - (x `mod` a)
......@@ -1173,9 +1163,9 @@ genCCall' dflags gcp target dest_regs args0
MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
MO_Memcpy -> (fsLit "memcpy", False)
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)
MO_Memcpy _ -> (fsLit "memcpy", False)
MO_Memset _ -> (fsLit "memset", False)
MO_Memmove _ -> (fsLit "memmove", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
......
......@@ -404,19 +404,8 @@ genCCall (PrimTarget MO_WriteBarrier) _ _
genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
genCCall target dest_regs args0
= do
-- need to remove alignment information
let args | PrimTarget mop <- target,
(mop == MO_Memcpy ||
mop == MO_Memset ||
mop == MO_Memmove)
= init args0
| otherwise
= args0
-- work out the arguments, and assign them to integer regs
genCCall target dest_regs args
= do -- work out the arguments, and assign them to integer regs
argcode_and_vregs <- mapM arg_to_int_vregs args
let (argcodes, vregss) = unzip argcode_and_vregs
let vregs = concat vregss
......@@ -653,9 +642,9 @@ outOfLineMachOp_table mop
MO_UF_Conv w -> fsLit $ word2FloatLabel w
MO_Memcpy -> fsLit "memcpy"
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
MO_Memcpy _ -> fsLit "memcpy"
MO_Memset _ -> fsLit "memset"
MO_Memmove _ -> fsLit "memmove"
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
......
......@@ -1645,10 +1645,8 @@ genCCall
-- Unroll memcpy calls if the source and destination pointers are at
-- least DWORD aligned and the number of bytes to copy isn't too
-- large. Otherwise, call C's memcpy.
genCCall dflags is32Bit (PrimTarget MO_Memcpy) _
[dst, src,
(CmmLit (CmmInt n _)),
(CmmLit (CmmInt align _))]
genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
[dst, src, CmmLit (CmmInt n _)]
| fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat size
......@@ -1694,11 +1692,10 @@ genCCall dflags is32Bit (PrimTarget MO_Memcpy) _
dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
(ImmInteger (n - i))
genCCall dflags _ (PrimTarget MO_Memset) _
genCCall dflags _ (PrimTarget (MO_Memset align)) _
[dst,
CmmLit (CmmInt c _),
CmmLit (CmmInt n _),
CmmLit (CmmInt align _)]
CmmLit (CmmInt n _)]
| fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat size
......@@ -2507,19 +2504,13 @@ outOfLineCmmOp mop res args
let target = ForeignTarget targetExpr
(ForeignConvention CCallConv [] [] CmmMayReturn)
stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args')
stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
where
-- Assume we can call these functions directly, and that they're not in a dynamic library.
-- TODO: Why is this ok? Under linux this code will be in libm.so
-- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
args' = case mop of
MO_Memcpy -> init args
MO_Memset -> init args
MO_Memmove -> init args
_ -> args
fn = case mop of
MO_F32_Sqrt -> fsLit "sqrtf"
MO_F32_Sin -> fsLit "sinf"
......@@ -2553,9 +2544,9 @@ outOfLineCmmOp mop res args
MO_F64_Tanh -> fsLit "tanh"
MO_F64_Pwr -> fsLit "pow"
MO_Memcpy -> fsLit "memcpy"
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
MO_Memcpy _ -> fsLit "memcpy"
MO_Memset _ -> fsLit "memset"
MO_Memmove _ -> fsLit "memmove"
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
......
......@@ -855,7 +855,7 @@
src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
bytes = WDS(n); \
\
prim %memcpy(dst_p, src_p, bytes, WDS(1)); \
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
\
dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
setCards(dst_cards_p, dst_off, n); \
......@@ -875,9 +875,9 @@
bytes = WDS(n); \
\
if ((src) == (dst)) { \
prim %memmove(dst_p, src_p, bytes, WDS(1)); \
prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
} else { \
prim %memcpy(dst_p, src_p, bytes, WDS(1)); \
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
} \
\
dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
......
......@@ -186,7 +186,7 @@ stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
// copy over old content
prim %memcpy(BYTE_ARR_CTS(new_mba), BYTE_ARR_CTS(mba),
StgArrWords_bytes(mba), WDS(1));
StgArrWords_bytes(mba), SIZEOF_W);
return (new_mba);
}
......@@ -438,7 +438,7 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
prim %memcpy(dst_p, src_p, bytes, WDS(1));
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
return ();
}
......@@ -453,9 +453,9 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
if (src == dst) {
prim %memmove(dst_p, src_p, bytes, WDS(1));
prim %memmove(dst_p, src_p, bytes, SIZEOF_W);
} else {
prim %memcpy(dst_p, src_p, bytes, WDS(1));
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
}
return ();
......
TOP=../../..
include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
#include "Cmm.h"
testMemcpy (W_ dst, W_ src, W_ l, W_ sz)
{
prim %memcpy(dst, src, l, sz);
return ();
}
# Tests for code generator and CMM parser
test('T8131', cmm_src, compile_fail, [''])
......@@ -6,7 +6,7 @@ callMemcpy (W_ dst, W_ src)
W_ size;
W_ alig;
size = 16;
alig = 4;
#define alig 4
if (dst != 0) {
prim %memcpy(dst, src, size, alig);
}
......
......@@ -11,23 +11,24 @@ section "rodata" { memmoveErr : bits8[] "Memmove Error Occured\n"; }
memintrinTest (W_ dummy)
{
W_ size, src, dst, off, alignV, set;
W_ size, src, dst, off, set;
bits8 set8;
// Need two versions as memset takes a word for historical reasons
// Need two versions as memset takes a word for historical reasons
// but really its a bits8. We check that setting has ben done correctly
// at the bits8 level, so need bits8 version for checking.
set = 4;
set8 = 4::bits8;
size = 1024;
alignV = 4;
// Alignment must be constant expression
#define alignV 4
("ptr" src) = foreign "C" malloc(size);
("ptr" dst) = foreign "C" malloc(size);
// Test memset
prim %memset(src, set, size, alignV);
prim %memset(src, set, size, alignV);
// Check memset worked
off = 0;
......@@ -100,6 +101,7 @@ while3_end:
return (0);
}
#undef alignV
// ---------------------------------------------------------------------
// Tests for unrolling
......@@ -113,15 +115,14 @@ while3_end:
// has ben done correctly at the bits8 level, so need bits8 version
// for checking.
#define TEST_MEMSET(ALIGN,SIZE) \
W_ size, src, dst, off, alignV, set; \
W_ size, src, dst, off, set; \
bits8 set8; \
set = 4; \
set8 = 4::bits8; \
size = SIZE; \
alignV = ALIGN; \
("ptr" src) = foreign "C" malloc(size); \
("ptr" dst) = foreign "C" malloc(size); \
prim %memset(src, set, size, alignV); \
prim %memset(src, set, size, ALIGN); \
off = 0; \
loop: \
if (off == size) { \
......@@ -164,9 +165,8 @@ testMemset4_7 (W_ dummy) { TEST_MEMSET(4,7); }
testMemset4_8 (W_ dummy) { TEST_MEMSET(4,8); }
#define TEST_MEMCPY(ALIGN,SIZE) \
W_ size, src, dst, off, alignV; \
W_ size, src, dst, off; \
size = SIZE; \
alignV = ALIGN; \
("ptr" src) = foreign "C" malloc(size); \
("ptr" dst) = foreign "C" malloc(size); \
off = 0; \
......@@ -178,7 +178,7 @@ init: \
off = off + 1; \
goto init; \
init_end: \
prim %memcpy(dst, src, size, alignV); \
prim %memcpy(dst, src, size, ALIGN); \
off = 0; \
loop: \
if (off == size) { \
......
{-# LANGUAGE MagicHash, UnboxedTuples #-}
import GHC.Prim
import GHC.IO
main = IO $ \s ->
let (# s1, p0 #) = newByteArray# 10# s
(# s2, p #) = unsafeFreezeByteArray# p0 s1
(# s3, q #) = newByteArray# 10# s2
in (# copyByteArray# p 0# q 0# 10# s, () #)
......@@ -12,4 +12,4 @@ test('T5681', normal, compile, [''])
test('T6158', [reqlib('vector'), reqlib('primitive')], compile, ['-package vector -package primitive'])
test('T7571', cmm_src, compile, [''])
test('T7575', unless(wordsize(32), skip), compile, [''])
test('T8131', [cmm_src, expect_broken(8131)], compile, [''])
test('T8131b', [cmm_src], compile, [''])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment