Add the Float32X4# primitive type and associated primops.

This patch lays the groundwork needed for primop support for SIMD vectors. In
addition to the groundwork, we add support for the FloatX4# primitive type and
associated primops.

 * Add the FloatX4# primitive type and associated primops.
 * Add CodeGen support for Float vectors.
 * Compile vector operations to LLVM vector operations in the LLVM code
   generator.
 * Make the x86 native backend fail gracefully when encountering vector primops.
 * Only generate primop wrappers for vector primops when using LLVM.
parent 6480a35c
......@@ -103,6 +103,17 @@ data MachOp
| MO_SS_Conv Width Width -- Signed int -> Signed int
| MO_UU_Conv Width Width -- unsigned int -> unsigned int
| MO_FF_Conv Width Width -- Float -> Float
-- Vector element insertion and extraction operations
| MO_V_Insert Length Width -- Insert scalar into vector
| MO_V_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations
| MO_VF_Add Length Width
| MO_VF_Sub Length Width
| MO_VF_Neg Length Width -- unary -
| MO_VF_Mul Length Width
| MO_VF_Quot Length Width
deriving (Eq, Show)
pprMachOp :: MachOp -> SDoc
......@@ -338,6 +349,15 @@ machOpResultType dflags mop tys =
MO_FS_Conv _ to -> cmmBits to
MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to
MO_V_Insert {} -> ty1
MO_V_Extract {} -> vecElemType ty1
MO_VF_Add {} -> ty1
MO_VF_Sub {} -> ty1
MO_VF_Mul {} -> ty1
MO_VF_Quot {} -> ty1
MO_VF_Neg {} -> ty1
where
(ty1:_) = tys
......@@ -405,6 +425,15 @@ machOpArgReps dflags op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
MO_VF_Quot _ r -> [r,r]
MO_VF_Neg _ r -> [r]
-----------------------------------------------------------------------------
-- CallishMachOp
-----------------------------------------------------------------------------
......
......@@ -626,6 +626,36 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_U_MulMayOflo"
++ " should have been handled earlier!")
MO_V_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_V_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_V_Insert"
++ " should have been handled earlier!")
MO_V_Extract {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_V_Extract")
(panic $ "PprC.pprMachOp_for_C: MO_V_Extract"
++ " should have been handled earlier!")
MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
++ " should have been handled earlier!")
MO_VF_Sub {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Sub")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Sub"
++ " should have been handled earlier!")
MO_VF_Neg {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Neg")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Neg"
++ " should have been handled earlier!")
MO_VF_Mul {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Mul")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Mul"
++ " should have been handled earlier!")
MO_VF_Quot {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Quot")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Quot"
++ " should have been handled earlier!")
signedOp :: MachOp -> Bool -- Argument type(s) are signed ints
signedOp (MO_S_Quot _) = True
signedOp (MO_S_Rem _) = True
......
This diff is collapsed.
......@@ -879,6 +879,13 @@ genMachOp env _ op [x] = case op of
MO_FF_Conv from to
-> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
MO_VF_Neg len w ->
let ty = widthToLlvmFloat w
vecty = LMVector len ty
all0 = LMFloatLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_FSub
-- Handle unsupported cases explicitly so we get a warning
-- of missing case when new MachOps added
MO_Add _ -> panicOp
......@@ -919,6 +926,14 @@ genMachOp env _ op [x] = case op of
MO_Shl _ -> panicOp
MO_U_Shr _ -> panicOp
MO_S_Shr _ -> panicOp
MO_V_Insert _ _ -> panicOp
MO_V_Extract _ _ -> panicOp
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
MO_VF_Mul _ _ -> panicOp
MO_VF_Quot _ _ -> panicOp
where
dflags = getDflags env
......@@ -984,6 +999,24 @@ genMachOp_fast env opt op r n e
-- This handles all the cases not handle by the specialised genMachOp_fast.
genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData
-- Element extraction
genMachOp_slow env _ (MO_V_Extract {}) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
let (LMVector _ ty) = getVarType vval
(v1, s1) <- doExpr ty $ Extract vval vidx
return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
-- Element insertion
genMachOp_slow env _ (MO_V_Insert {}) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
let ty = getVarType vval
(v1, s1) <- doExpr ty $ Insert vval velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
top1 ++ top2 ++ top3)
-- Binary MachOp
genMachOp_slow env opt op [x, y] = case op of
......@@ -1032,6 +1065,11 @@ genMachOp_slow env opt op [x, y] = case op of
MO_Shl _ -> genBinMach LM_MO_Shl
MO_U_Shr _ -> genBinMach LM_MO_LShr
MO_S_Shr _ -> genBinMach LM_MO_AShr
MO_VF_Add _ _ -> genBinMach LM_MO_FAdd
MO_VF_Sub _ _ -> genBinMach LM_MO_FSub
MO_VF_Mul _ _ -> genBinMach LM_MO_FMul
MO_VF_Quot _ _ -> genBinMach LM_MO_FDiv
MO_Not _ -> panicOp
MO_S_Neg _ -> panicOp
......@@ -1043,6 +1081,11 @@ genMachOp_slow env opt op [x, y] = case op of
MO_UU_Conv _ _ -> panicOp
MO_FF_Conv _ _ -> panicOp
MO_V_Insert {} -> panicOp
MO_V_Extract {} -> panicOp
MO_VF_Neg {} -> panicOp
where
dflags = getDflags env
......
......@@ -602,6 +602,14 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister" (pprMachOp mop)
where
triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
......@@ -694,6 +702,14 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
where
--------------------
......@@ -884,7 +900,9 @@ getRegister' dflags _ (CmmLit lit)
code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
return (Any size code)
getRegister' _ _ other = pprPanic "getRegister(x86)" (ppr other)
getRegister' _ _ other
| isVecExpr other = needLlvm
| otherwise = pprPanic "getRegister(x86)" (ppr other)
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
......@@ -2690,3 +2708,19 @@ sse2NegCode w x = do
]
--
return (Any sz code)
isVecExpr :: CmmExpr -> Bool
isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
isVecExpr (CmmMachOp _ [e]) = isVecExpr e
isVecExpr _ = False
needLlvm :: NatM a
needLlvm =
sorry $ unlines ["The native code generator does not support vector"
,"instructions. Please use -fllvm."]
......@@ -1420,6 +1420,11 @@ typeNatAddTyFamNameKey = mkPreludeTyConUnique 162
typeNatMulTyFamNameKey = mkPreludeTyConUnique 163
typeNatExpTyFamNameKey = mkPreludeTyConUnique 164
-- SIMD vector types (Unique keys)
floatX4PrimTyConKey :: Unique
floatX4PrimTyConKey = mkPreludeTyConUnique 170
---------------- Template Haskell -------------------
-- USES TyConUniques 200-299
-----------------------------------------------------
......
......@@ -73,7 +73,10 @@ module TysPrim(
eqPrimTyCon, -- ty1 ~# ty2
-- * Any
anyTy, anyTyCon, anyTypeOfKind
anyTy, anyTyCon, anyTypeOfKind,
-- * SIMD
floatX4PrimTyCon, floatX4PrimTy
) where
#include "HsVersions.h"
......@@ -135,6 +138,8 @@ primTyCons
, constraintKindTyCon
, superKindTyCon
, anyKindTyCon
, floatX4PrimTyCon
]
mkPrimTc :: FastString -> Unique -> TyCon -> Name
......@@ -144,7 +149,7 @@ mkPrimTc fs unique tycon
(ATyCon tycon) -- Relevant TyCon
UserSyntax -- None are built-in syntax
charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName :: Name
charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, floatX4PrimTyConName :: Name
charPrimTyConName = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon
intPrimTyConName = mkPrimTc (fsLit "Int#") intPrimTyConKey intPrimTyCon
int32PrimTyConName = mkPrimTc (fsLit "Int32#") int32PrimTyConKey int32PrimTyCon
......@@ -172,6 +177,7 @@ stableNamePrimTyConName = mkPrimTc (fsLit "StableName#") stableNamePrimTyC
bcoPrimTyConName = mkPrimTc (fsLit "BCO#") bcoPrimTyConKey bcoPrimTyCon
weakPrimTyConName = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon
threadIdPrimTyConName = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon
floatX4PrimTyConName = mkPrimTc (fsLit "FloatX4#") floatX4PrimTyConKey floatX4PrimTyCon
\end{code}
%************************************************************************
......@@ -729,3 +735,16 @@ anyTyCon = mkSynTyCon anyTyConName kind [kKiVar]
anyTypeOfKind :: Kind -> Type
anyTypeOfKind kind = mkNakedTyConApp anyTyCon [kind]
\end{code}
%************************************************************************
%* *
\subsection{SIMD vector type}
%* *
%************************************************************************
\begin{code}
floatX4PrimTy :: Type
floatX4PrimTy = mkTyConTy floatX4PrimTyCon
floatX4PrimTyCon :: TyCon
floatX4PrimTyCon = pcPrimTyCon0 floatX4PrimTyConName (VecRep 4 FloatElemRep)
\end{code}
......@@ -2201,6 +2201,101 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp
out_of_line = True
------------------------------------------------------------------------
section "Float SIMD Vectors"
{Operations on SIMD vectors of 4 single-precision (32-bit)
floating-point numbers.}
------------------------------------------------------------------------
primtype FloatX4#
primop FloatToFloatX4Op "floatToFloatX4#" GenPrimOp
Float# -> FloatX4#
primop FloatX4PackOp "packFloatX4#" GenPrimOp
Float# -> Float# -> Float# -> Float# -> FloatX4#
primop FloatX4UnpackOp "unpackFloatX4#" GenPrimOp
FloatX4# -> (# Float#, Float#, Float#, Float# #)
primop FloatX4InsertOp "insertFloatX4#" GenPrimOp
FloatX4# -> Float# -> Int# -> FloatX4#
with can_fail = True
primop FloatX4AddOp "plusFloatX4#" Dyadic
FloatX4# -> FloatX4# -> FloatX4#
with commutable = True
primop FloatX4SubOp "minusFloatX4#" Dyadic
FloatX4# -> FloatX4# -> FloatX4#
primop FloatX4MulOp "timesFloatX4#" Dyadic
FloatX4# -> FloatX4# -> FloatX4#
with commutable = True
primop FloatX4DivOp "divideFloatX4#" Dyadic
FloatX4# -> FloatX4# -> FloatX4#
with can_fail = True
primop FloatX4NegOp "negateFloatX4#" Monadic
FloatX4# -> FloatX4#
primop IndexByteArrayOp_FloatX4 "indexFloatX4Array#" GenPrimOp
ByteArray# -> Int# -> FloatX4#
with can_fail = True
primop ReadByteArrayOp_FloatX4 "readFloatX4Array#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #)
with has_side_effects = True
can_fail = True
primop WriteByteArrayOp_FloatX4 "writeFloatX4Array#" GenPrimOp
MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s
with has_side_effects = True
can_fail = True
primop IndexOffAddrOp_FloatX4 "indexFloatX4OffAddr#" GenPrimOp
Addr# -> Int# -> FloatX4#
with can_fail = True
primop ReadOffAddrOp_FloatX4 "readFloatX4OffAddr#" GenPrimOp
Addr# -> Int# -> State# s -> (# State# s, FloatX4# #)
with has_side_effects = True
can_fail = True
primop WriteOffAddrOp_FloatX4 "writeFloatX4OffAddr#" GenPrimOp
Addr# -> Int# -> FloatX4# -> State# s -> State# s
with has_side_effects = True
can_fail = True
primop IndexByteArrayOp_FloatAsFloatX4 "indexFloatArrayAsFloatX4#" GenPrimOp
ByteArray# -> Int# -> FloatX4#
with can_fail = True
primop ReadByteArrayOp_FloatAsFloatX4 "readFloatArrayAsFloatX4#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> (# State# s, FloatX4# #)
with has_side_effects = True
can_fail = True
primop WriteByteArrayOp_FloatAsFloatX4 "writeFloatArrayAsFloatX4#" GenPrimOp
MutableByteArray# s -> Int# -> FloatX4# -> State# s -> State# s
with has_side_effects = True
can_fail = True
primop IndexOffAddrOp_FloatAsFloatX4 "indexFloatOffAddrAsFloatX4#" GenPrimOp
Addr# -> Int# -> FloatX4#
with can_fail = True
primop ReadOffAddrOp_FloatAsFloatX4 "readFloatOffAddrAsFloatX4#" GenPrimOp
Addr# -> Int# -> State# s -> (# State# s, FloatX4# #)
with has_side_effects = True
can_fail = True
primop WriteOffAddrOp_FloatAsFloatX4 "writeFloatOffAddrAsFloatX4#" GenPrimOp
Addr# -> Int# -> FloatX4# -> State# s -> State# s
with has_side_effects = True
can_fail = True
------------------------------------------------------------------------
--- ---
------------------------------------------------------------------------
......
......@@ -502,20 +502,27 @@ gen_latex_doc (Info defaults entries)
gen_wrappers :: Info -> String
gen_wrappers (Info _ entries)
= "{-# LANGUAGE NoImplicitPrelude, UnboxedTuples #-}\n"
= "{-# LANGUAGE CPP, NoImplicitPrelude, UnboxedTuples #-}\n"
-- Dependencies on Prelude must be explicit in libraries/base, but we
-- don't need the Prelude here so we add NoImplicitPrelude.
++ "module GHC.PrimopWrappers where\n"
++ "import qualified GHC.Prim\n"
++ "import GHC.Types (Bool)\n"
++ "import GHC.Tuple ()\n"
++ "import GHC.Prim (" ++ types ++ ")\n"
++ unlines (concatMap f specs)
++ "import GHC.Prim (" ++ concat (intersperse ", " othertycons) ++ ")\n"
++ "#if defined (__GLASGOW_HASKELL_LLVM__)\n"
++ "import GHC.Prim (" ++ concat (intersperse ", " vectycons) ++ ")\n"
++ "#endif /* defined (__GLASGOW_HASKELL_LLVM__) */\n"
++ unlines (concatMap f otherspecs)
++ "#if defined (__GLASGOW_HASKELL_LLVM__)\n"
++ unlines (concatMap f vecspecs)
++ "#endif /* defined (__GLASGOW_HASKELL_LLVM__) */\n"
where
specs = filter (not.dodgy) (filter is_primop entries)
(vecspecs, otherspecs) = partition (llvmOnlyTy . ty) specs
tycons = foldr union [] $ map (tyconsIn . ty) specs
tycons' = filter (`notElem` ["()", "Bool"]) tycons
types = concat $ intersperse ", " tycons'
(vectycons, othertycons) =
(partition llvmOnlyTyCon . filter (`notElem` ["()", "Bool"])) tycons
f spec = let args = map (\n -> "a" ++ show n) [1 .. arity (ty spec)]
src_name = wrap (name spec)
lhs = src_name ++ " " ++ unwords args
......@@ -536,6 +543,16 @@ gen_wrappers (Info _ entries)
"parAtAbs#", "parAtRel#", "parAtForNow#"
]
llvmOnlyTy :: Ty -> Bool
llvmOnlyTy (TyF ty1 ty2) = llvmOnlyTy ty1 || llvmOnlyTy ty2
llvmOnlyTy (TyApp tycon tys) = llvmOnlyTyCon tycon || any llvmOnlyTy tys
llvmOnlyTy (TyVar _) = False
llvmOnlyTy (TyUTup tys) = any llvmOnlyTy tys
llvmOnlyTyCon :: TyCon -> Bool
llvmOnlyTyCon "FloatX4#" = True
llvmOnlyTyCon _ = False
gen_primop_list :: Info -> String
gen_primop_list (Info _ entries)
= unlines (
......@@ -653,6 +670,7 @@ ppType (TyApp "Word64#" []) = "word64PrimTy"
ppType (TyApp "Addr#" []) = "addrPrimTy"
ppType (TyApp "Float#" []) = "floatPrimTy"
ppType (TyApp "Double#" []) = "doublePrimTy"
ppType (TyApp "FloatX4#" []) = "floatX4PrimTy"
ppType (TyApp "ByteArray#" []) = "byteArrayPrimTy"
ppType (TyApp "RealWorld" []) = "realWorldTy"
ppType (TyApp "ThreadId#" []) = "threadIdPrimTy"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment