diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index d812905594acccc3475f7bec31c536658f8c6170..563f6dcc4a5ae495e2f32fe16e8d1f6a9d1b0c4e 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -811,13 +811,16 @@ callishPrimOpSupported dflags op WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags)) | otherwise -> Right (genericWordQuotRem2Op dflags) - WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags)) + WordAdd2Op | (ncg && x86ish) + || llvm -> Left (MO_Add2 (wordWidth dflags)) | otherwise -> Right genericWordAdd2Op - IntAddCOp | ncg && x86ish -> Left (MO_AddIntC (wordWidth dflags)) + IntAddCOp | (ncg && x86ish) + || llvm -> Left (MO_AddIntC (wordWidth dflags)) | otherwise -> Right genericIntAddCOp - IntSubCOp | ncg && x86ish -> Left (MO_SubIntC (wordWidth dflags)) + IntSubCOp | (ncg && x86ish) + || llvm -> Left (MO_SubIntC (wordWidth dflags)) | otherwise -> Right genericIntSubCOp WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags)) @@ -828,7 +831,9 @@ callishPrimOpSupported dflags op ncg = case hscTarget dflags of HscAsm -> True _ -> False - + llvm = case hscTarget dflags of + HscLlvm -> True + _ -> False x86ish = case platformArch (targetPlatform dflags) of ArchX86 -> True ArchX86_64 -> True diff --git a/compiler/llvmGen/Llvm/AbsSyn.hs b/compiler/llvmGen/Llvm/AbsSyn.hs index b0eae2cbfe97ab5421ac0ca26c5503c7272043c1..8a53df00fe19ce7565f20781287ff10d875b8c52 100644 --- a/compiler/llvmGen/Llvm/AbsSyn.hs +++ b/compiler/llvmGen/Llvm/AbsSyn.hs @@ -208,6 +208,14 @@ data LlvmExpression -} | Extract LlvmVar LlvmVar + {- | + Extract a scalar element from a structure + * val: The structure + * idx: The index of the scalar within the structure + Corresponds to "extractvalue" instruction. + -} + | ExtractV LlvmVar Int + {- | Insert a scalar element into a vector * val: The source vector diff --git a/compiler/llvmGen/Llvm/PpLlvm.hs b/compiler/llvmGen/Llvm/PpLlvm.hs index 0b3deac7648aef9cc6dd23863cf645cea152d385..db9ef1fccfd85082ccd7db91b8d5b96be3e20ed2 100644 --- a/compiler/llvmGen/Llvm/PpLlvm.hs +++ b/compiler/llvmGen/Llvm/PpLlvm.hs @@ -239,6 +239,7 @@ ppLlvmExpression expr Cast op from to -> ppCast op from to Compare op left right -> ppCmpOp op left right Extract vec idx -> ppExtract vec idx + ExtractV struct idx -> ppExtractV struct idx Insert vec elt idx -> ppInsert vec elt idx GetElemPtr inb ptr indexes -> ppGetElementPtr inb ptr indexes Load ptr -> ppLoad ptr @@ -430,6 +431,12 @@ ppExtract vec idx = <+> ppr (getVarType vec) <+> ppName vec <> comma <+> ppr idx +ppExtractV :: LlvmVar -> Int -> SDoc +ppExtractV struct idx = + text "extractvalue" + <+> ppr (getVarType struct) <+> ppName struct <> comma + <+> ppr idx + ppInsert :: LlvmVar -> LlvmVar -> LlvmVar -> SDoc ppInsert vec elt idx = text "insertelement" diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs index a9d81a1828838bfcb431a0cb02c252d53b5c3e50..b3b173096b0fd470152ed68ff3247caf564b4141 100644 --- a/compiler/llvmGen/Llvm/Types.hs +++ b/compiler/llvmGen/Llvm/Types.hs @@ -50,7 +50,8 @@ data LlvmType | LMVector Int LlvmType -- ^ A vector of 'LlvmType' | LMLabel -- ^ A 'LlvmVar' can represent a label (address) | LMVoid -- ^ Void type - | LMStruct [LlvmType] -- ^ Structure type + | LMStruct [LlvmType] -- ^ Packed structure type + | LMStructU [LlvmType] -- ^ Unpacked structure type | LMAlias LlvmAlias -- ^ A type alias | LMMetadata -- ^ LLVM Metadata @@ -70,6 +71,7 @@ instance Outputable LlvmType where ppr (LMLabel ) = text "label" ppr (LMVoid ) = text "void" ppr (LMStruct tys ) = text "<{" <> ppCommaJoin tys <> text "}>" + ppr (LMStructU tys ) = text "{" <> ppCommaJoin tys <> text "}" ppr (LMMetadata ) = text "metadata" ppr (LMFunction (LlvmFunctionDecl _ _ _ r varg p _)) @@ -326,6 +328,16 @@ llvmWidthInBits dflags (LMVector n ty) = n * llvmWidthInBits dflags ty llvmWidthInBits _ LMLabel = 0 llvmWidthInBits _ LMVoid = 0 llvmWidthInBits dflags (LMStruct tys) = sum $ map (llvmWidthInBits dflags) tys +llvmWidthInBits _ (LMStructU _) = + -- It's not trivial to calculate the bit width of the unpacked structs, + -- since they will be aligned depending on the specified datalayout ( + -- http://llvm.org/docs/LangRef.html#data-layout ). One way we could support + -- this could be to make the LlvmCodeGen.Ppr.moduleLayout be a data type + -- that exposes the alignment information. However, currently the only place + -- we use unpacked structs is LLVM intrinsics that return them (e.g., + -- llvm.sadd.with.overflow.*), so we don't actually need to compute their + -- bit width. + panic "llvmWidthInBits: not implemented for LMStructU" llvmWidthInBits _ (LMFunction _) = 0 llvmWidthInBits dflags (LMAlias (_,t)) = llvmWidthInBits dflags t llvmWidthInBits _ LMMetadata = panic "llvmWidthInBits: Meta-data has no runtime representation!" diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index ffe9d619f6c068c46b89b123206dd36af362ae23..15350bca7dcbf5f90e29e6fea8cdc2864e2bc3b0 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -30,6 +30,7 @@ import Platform import OrdList import UniqSupply import Unique +import Util import Data.List ( nub ) import Data.Maybe ( catMaybes ) @@ -255,6 +256,20 @@ genCall t@(PrimTarget op) [] args `appOL` stmts4 `snocOL` call return (stmts, top1 ++ top2) +-- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from +-- which we need to extract the actual values. +genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] = + genCallWithOverflow t w [dstV, dstO] [lhs, rhs] +genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] = + genCallWithOverflow t w [dstV, dstO] [lhs, rhs] + +-- Similar to MO_{Add,Sub}IntC, but MO_Add2 expects the first element of the +-- return tuple to be the overflow bit and the second element to contain the +-- actual result of the addition. So we still use genCallWithOverflow but swap +-- the return registers. +genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] = + genCallWithOverflow t w [dstV, dstO] [lhs, rhs] + -- Handle all other foreign calls and prim ops. genCall target res args = do @@ -360,6 +375,68 @@ genCall target res args = do return (allStmts `snocOL` s2 `snocOL` s3 `appOL` retStmt, top1 ++ top2) +-- | Generate a call to an LLVM intrinsic that performs arithmetic operation +-- with overflow bit (i.e., returns a struct containing the actual result of the +-- operation and an overflow bit). This function will also extract the overflow +-- bit and zero-extend it (all the corresponding Cmm PrimOps represent the +-- overflow "bit" as a usual Int# or Word#). +genCallWithOverflow + :: ForeignTarget -> Width -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData +genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do + -- So far this was only tested for the following three CallishMachOps. + MASSERT( (op `elem` [MO_Add2 w, MO_AddIntC w, MO_SubIntC w]) ) + let width = widthToLlvmInt w + -- This will do most of the work of generating the call to the intrinsic and + -- extracting the values from the struct. + (value, overflowBit, (stmts, top)) <- + genCallExtract t w (lhs, rhs) (width, i1) + -- value is i, but overflowBit is i1, so we need to cast (Cmm expects + -- both to be i) + (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width + dstRegV <- getCmmReg (CmmLocal dstV) + dstRegO <- getCmmReg (CmmLocal dstO) + let storeV = Store value dstRegV + storeO = Store overflow dstRegO + return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top) +genCallWithOverflow _ _ _ _ = + panic "genCallExtract: wrong ForeignTarget or number of arguments" + +-- | A helper function for genCallWithOverflow that handles generating the call +-- to the LLVM intrinsic and extracting the result from the struct to LlvmVars. +genCallExtract + :: ForeignTarget -- ^ PrimOp + -> Width -- ^ Width of the operands. + -> (CmmActual, CmmActual) -- ^ Actual arguments. + -> (LlvmType, LlvmType) -- ^ LLLVM types of the returned sturct. + -> LlvmM (LlvmVar, LlvmVar, StmtData) +genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do + let width = widthToLlvmInt w + argTy = [width, width] + retTy = LMStructU [llvmTypeA, llvmTypeB] + + -- Process the arguments. + let args_hints = zip [argA, argB] (snd $ foreignTargetHints target) + (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, []) + (argsV2, args2) <- castVars $ zip argsV1 argTy + + -- Get the function and make the call. + fname <- cmmPrimOpFunctions op + (fptr, _, top2) <- getInstrinct fname retTy argTy + -- We use StdCall for primops. See also the last case of genCall. + (retV, call) <- doExpr retTy $ Call StdCall fptr argsV2 [] + + -- This will result in a two element struct, we need to use "extractvalue" + -- to get them out of it. + (res1, ext1) <- doExpr llvmTypeA (ExtractV retV 0) + (res2, ext2) <- doExpr llvmTypeB (ExtractV retV 1) + + let stmts = args1 `appOL` args2 `snocOL` call `snocOL` ext1 `snocOL` ext2 + tops = top1 ++ top2 + return (res1, res2, (stmts, tops)) + +genCallExtract _ _ _ _ = + panic "genCallExtract: unsupported ForeignTarget" + -- Handle simple function call that only need simple type casting, of the form: -- truncate arg >>= \a -> call(a) >>= zext -- @@ -534,12 +611,16 @@ cmmPrimOpFunctions mop = do (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch" + MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow." + ++ showSDoc dflags (ppr $ widthToLlvmInt w) + MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow." + ++ showSDoc dflags (ppr $ widthToLlvmInt w) + MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow." + ++ showSDoc dflags (ppr $ widthToLlvmInt w) + MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported - MO_Add2 {} -> unsupported - MO_AddIntC {} -> unsupported - MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported diff --git a/testsuite/tests/primops/should_run/T9430.hs b/testsuite/tests/primops/should_run/T9430.hs new file mode 100644 index 0000000000000000000000000000000000000000..571b6db37de7a9e4c5d70c505b5041cdcd1af832 --- /dev/null +++ b/testsuite/tests/primops/should_run/T9430.hs @@ -0,0 +1,75 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} + +module Main where + +import GHC.Exts + +checkI + :: (Int, Int) -- ^ expected results + -> (Int# -> Int# -> (# Int#, Int# #)) -- ^ primop + -> Int -- ^ first argument + -> Int -- ^ second argument + -> Maybe String -- ^ maybe error +checkI (expX, expY) op (I# a) (I# b) = + case op a b of + (# x, y #) + | I# x == expX && I# y == expY -> Nothing + | otherwise -> + Just $ + "Expected " ++ show expX ++ " and " ++ show expY + ++ " but got " ++ show (I# x) ++ " and " ++ show (I# y) +checkW + :: (Word, Word) -- ^ expected results + -> (Word# -> Word# -> (# Word#, Word# #)) -- ^ primop + -> Word -- ^ first argument + -> Word -- ^ second argument + -> Maybe String -- ^ maybe error +checkW (expX, expY) op (W# a) (W# b) = + case op a b of + (# x, y #) + | W# x == expX && W# y == expY -> Nothing + | otherwise -> + Just $ + "Expected " ++ show expX ++ " and " ++ show expY + ++ " but got " ++ show (W# x) ++ " and " ++ show (W# y) + +check :: String -> Maybe String -> IO () +check s (Just err) = error $ "Error for " ++ s ++ ": " ++ err +check _ Nothing = return () + +main :: IO () +main = do + -- First something trivial + check "addIntC# maxBound 0" $ checkI (maxBound, 0) addIntC# maxBound 0 + check "addIntC# 0 maxBound" $ checkI (maxBound, 0) addIntC# 0 maxBound + -- Overflows + check "addIntC# maxBound 1" $ checkI (minBound, 1) addIntC# maxBound 1 + check "addIntC# 1 maxBound" $ checkI (minBound, 1) addIntC# 1 maxBound + check "addIntC# maxBound 2" $ checkI (minBound + 1, 1) addIntC# maxBound 2 + check "addIntC# 2 maxBound" $ checkI (minBound + 1, 1) addIntC# 2 maxBound + check "addIntC# minBound minBound" $ + checkI (0, 1) addIntC# minBound minBound + + -- First something trivial + check "subIntC# minBound 0" $ checkI (minBound, 0) subIntC# minBound 0 + -- Overflows + check "subIntC# minBound 1" $ checkI (maxBound, 1) subIntC# minBound 1 + check "subIntC# minBound 1" $ checkI (maxBound - 1, 1) subIntC# minBound 2 + check "subIntC# 0 minBound" $ checkI (minBound, 1) subIntC# 0 minBound + check "subIntC# -1 minBound" $ checkI (maxBound, 0) subIntC# (-1) minBound + check "subIntC# minBound -1" $ + checkI (minBound + 1, 0) subIntC# minBound (-1) + + -- First something trivial (note that the order of results is different!) + check "plusWord2# maxBound 0" $ checkW (0, maxBound) plusWord2# maxBound 0 + check "plusWord2# 0 maxBound" $ checkW (0, maxBound) plusWord2# 0 maxBound + -- Overflows + check "plusWord2# maxBound 1" $ + checkW (1, minBound) plusWord2# maxBound 1 + check "plusWord2# 1 maxBound" $ + checkW (1, minBound) plusWord2# 1 maxBound + check "plusWord2# maxBound 2" $ + checkW (1, minBound + 1) plusWord2# maxBound 2 + check "plusWord2# 2 maxBound" $ + checkW (1, minBound + 1) plusWord2# 2 maxBound diff --git a/testsuite/tests/primops/should_run/all.T b/testsuite/tests/primops/should_run/all.T index a2c48fe19ac356d0718dd183df6bae72a40bc040..12f94913d3cd82b11493fd62bf3873f9921515c1 100644 --- a/testsuite/tests/primops/should_run/all.T +++ b/testsuite/tests/primops/should_run/all.T @@ -1,3 +1,4 @@ test('T6135', normal, compile_and_run, ['']) - test('T7689', normal, compile_and_run, ['']) +# The test is using unboxed tuples, so omit ghci +test('T9430', omit_ways(['ghci']), compile_and_run, [''])