SIMD primops are now generated using schemas that are polymorphic in

width and element type.

SIMD primops are now polymorphic in vector size and element type, but
only internally to the compiler. More specifically, utils/genprimopcode
has been extended so that it "knows" about SIMD vectors. This allows us
to, for example, write a single definition for the "add two vectors"
primop in primops.txt.pp and have it instantiated at many vector types.
This generates a primop in GHC.Prim for each vector type at which "add
two vectors" is instantiated, but only one data constructor for the
PrimOp data type, so the code generator is much, much simpler.
parent da5a647c
......@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
-- Unsigned vector multiply/divide
| MO_VU_Quot Length Width
| MO_VU_Rem Length Width
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
......@@ -375,6 +379,9 @@ machOpResultType dflags mop tys =
MO_VS_Rem l w -> cmmVec l (cmmBits w)
MO_VS_Neg l w -> cmmVec l (cmmBits w)
MO_VU_Quot l w -> cmmVec l (cmmBits w)
MO_VU_Rem l w -> cmmVec l (cmmBits w)
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
......@@ -461,6 +468,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
MO_VU_Quot _ r -> [r,r]
MO_VU_Rem _ r -> [r,r]
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
......
......@@ -651,6 +651,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
MO_VU_Quot {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VU_Quot")
(panic $ "PprC.pprMachOp_for_C: MO_VU_Quot"
++ " should have been handled earlier!")
MO_VU_Rem {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VU_Rem")
(panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
++ " should have been handled earlier!")
MO_VF_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
......
......@@ -40,7 +40,7 @@ import FastString
import Outputable
import Util
import Control.Monad (liftM)
import Control.Monad (liftM, when)
import Data.Bits
------------------------------------------------------------------------
......@@ -380,14 +380,6 @@ emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp
emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
......@@ -407,14 +399,6 @@ emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (
emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
-- IndexXXXArray
......@@ -434,14 +418,6 @@ emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayO
emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- ReadXXXArray, identical to IndexXXXArray.
......@@ -461,14 +437,6 @@ emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp
emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
-- WriteXXXoffAddr
......@@ -488,14 +456,6 @@ emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (J
emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
-- WriteXXXArray
......@@ -515,14 +475,6 @@ emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayO
emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
......@@ -556,78 +508,136 @@ emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
(MO_UF_Conv W64) [w]
-- SIMD vector packing and unpacking
emitPrimOp _ [res] FloatToFloatX4Op [e] =
doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
-- SIMD primops
emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] =
doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
zeros :: CmmExpr
zeros = CmmLit $ CmmVec (replicate n zero)
zero :: CmmLit
zero = case vcat of
IntVec -> CmmInt 0 w
WordVec -> CmmInt 0 w
FloatVec -> CmmFloat 0 w
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
when (length es /= n) $
panic "emitPrimOp: VecPackOp has wrong number of arguments"
doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
where
zeros :: CmmExpr
zeros = CmmLit $ CmmVec (replicate n zero)
zero :: CmmLit
zero = case vcat of
IntVec -> CmmInt 0 w
WordVec -> CmmInt 0 w
FloatVec -> CmmFloat 0 w
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
when (length res /= n) $
panic "emitPrimOp: VecUnpackOp has wrong number of results"
doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
where
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
doVecPackOp Nothing vec4f32 zero es res
emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] =
doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
doVecUnpackOp Nothing vec4f32 arg res
emitPrimOp _ res (VecIndexByteArrayOp vcat n w) args =
doIndexByteArrayOp Nothing ty res args
where
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
doVecInsertOp Nothing vec4f32 v e i res
emitPrimOp _ res (VecReadByteArrayOp vcat n w) args =
doIndexByteArrayOp Nothing ty res args
where
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
doVecPackOp Nothing vec2f64 zero [e,e] res
emitPrimOp _ res (VecWriteByteArrayOp vcat n w) args =
doWriteByteArrayOp Nothing ty res args
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
doVecPackOp Nothing vec2f64 zero es res
emitPrimOp _ res (VecIndexOffAddrOp vcat n w) args =
doIndexOffAddrOp Nothing ty res args
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
doVecUnpackOp Nothing vec2f64 arg res
emitPrimOp _ res (VecReadOffAddrOp vcat n w) args =
doIndexOffAddrOp Nothing ty res args
where
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
doVecInsertOp Nothing vec2f64 v e i res
emitPrimOp _ res (VecWriteOffAddrOp vcat n w) args =
doWriteOffAddrOp Nothing ty res args
where
ty :: CmmType
ty = vecVmmType vcat n w
emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
emitPrimOp _ res (VecIndexScalarByteArrayOp vcat n w) args =
doIndexByteArrayOpAs Nothing vecty ty res args
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
vecty :: CmmType
vecty = vecVmmType vcat n w
emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
ty :: CmmType
ty = vecCmmCat vcat w
emitPrimOp _ res (VecReadScalarByteArrayOp vcat n w) args =
doIndexByteArrayOpAs Nothing vecty ty res args
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
vecty :: CmmType
vecty = vecVmmType vcat n w
emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
ty :: CmmType
ty = vecCmmCat vcat w
emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
emitPrimOp _ res (VecWriteScalarByteArrayOp vcat _ w) args =
doWriteByteArrayOp Nothing ty res args
where
ty :: CmmType
ty = vecCmmCat vcat w
emitPrimOp _ [res] Int64ToInt64X2Op [e] =
doVecPackOp Nothing vec2b64 zero [e,e] res
emitPrimOp _ res (VecIndexScalarOffAddrOp vcat n w) args =
doIndexOffAddrOpAs Nothing vecty ty res args
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
vecty :: CmmType
vecty = vecVmmType vcat n w
emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
doVecPackOp Nothing vec2b64 zero es res
ty :: CmmType
ty = vecCmmCat vcat w
emitPrimOp _ res (VecReadScalarOffAddrOp vcat n w) args =
doIndexOffAddrOpAs Nothing vecty ty res args
where
zero :: CmmExpr
zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
vecty :: CmmType
vecty = vecVmmType vcat n w
emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
doVecUnpackOp Nothing vec2b64 arg res
ty :: CmmType
ty = vecCmmCat vcat w
emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
doVecInsertOp Nothing vec2b64 v e i res
emitPrimOp _ res (VecWriteScalarOffAddrOp vcat _ w) args =
doWriteOffAddrOp Nothing ty res args
where
ty :: CmmType
ty = vecCmmCat vcat w
-- Prefetch
emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
......@@ -944,33 +954,26 @@ translateOp _ FloatMulOp = Just (MO_F_Mul W32)
translateOp _ FloatDivOp = Just (MO_F_Quot W32)
translateOp _ FloatNegOp = Just (MO_F_Neg W32)
-- Floating point vector ops
translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
-- Vector ops
translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
-- Conversions
......@@ -1182,6 +1185,41 @@ cmmLoadIndexOffExpr dflags off ty base idx_ty idx
setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
------------------------------------------------------------------------------
-- Helpers for translating vector primops.
vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
vecVmmType pocat n w = vec n (vecCmmCat pocat w)
vecCmmCat :: PrimOpVecCat -> Width -> CmmType
vecCmmCat IntVec = cmmBits
vecCmmCat WordVec = cmmBits
vecCmmCat FloatVec = cmmFloat
vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
vecElemInjectCast _ FloatVec _ = Nothing
vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
vecElemInjectCast _ IntVec W64 = Nothing
vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
vecElemInjectCast _ WordVec W64 = Nothing
vecElemInjectCast _ _ _ = Nothing
vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
vecElemProjectCast _ FloatVec _ = Nothing
vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
vecElemProjectCast _ IntVec W64 = Nothing
vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
vecElemProjectCast _ WordVec W64 = Nothing
vecElemProjectCast _ _ _ = Nothing
------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
......
......@@ -250,8 +250,12 @@ PRIMOP_BITS_NAMES = primop-data-decl.hs-incl \
primop-code-size.hs-incl \
primop-can-fail.hs-incl \
primop-strictness.hs-incl \
primop-fixity.hs-incl \
primop-primop-info.hs-incl
primop-fixity.hs-incl \
primop-primop-info.hs-incl \
primop-vector-uniques.hs-incl \
primop-vector-tys.hs-incl \
primop-vector-tys-exports.hs-incl \
primop-vector-tycons.hs-incl
PRIMOP_BITS_STAGE1 = $(addprefix compiler/stage1/build/,$(PRIMOP_BITS_NAMES))
PRIMOP_BITS_STAGE2 = $(addprefix compiler/stage2/build/,$(PRIMOP_BITS_NAMES))
......@@ -290,6 +294,14 @@ compiler/stage$1/build/primop-fixity.hs-incl: compiler/stage$1/build/primops.txt
"$$(genprimopcode_INPLACE)" --fixity < $$< > $$@
compiler/stage$1/build/primop-primop-info.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-primop-info < $$< > $$@
compiler/stage$1/build/primop-vector-uniques.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-vector-uniques < $$< > $$@
compiler/stage$1/build/primop-vector-tys.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-vector-tys < $$< > $$@
compiler/stage$1/build/primop-vector-tys-exports.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-vector-tys-exports < $$< > $$@
compiler/stage$1/build/primop-vector-tycons.hs-incl: compiler/stage$1/build/primops.txt $$$$(genprimopcode_INPLACE)
"$$(genprimopcode_INPLACE)" --primop-vector-tycons < $$< > $$@
# Usages aren't used any more; but the generator
# can still generate them if we want them back
......
......@@ -967,6 +967,9 @@ genMachOp _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp
MO_VU_Quot _ _ -> panicOp
MO_VU_Rem _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
......@@ -1140,6 +1143,9 @@ genMachOp_slow opt op [x, y] = case op of
MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
......
......@@ -610,6 +610,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VU_Quot {} -> needLlvm
MO_VU_Rem {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
......
......@@ -1474,15 +1474,6 @@ typeNatMulTyFamNameKey = mkPreludeTyConUnique 163
typeNatExpTyFamNameKey = mkPreludeTyConUnique 164
typeNatLeqTyFamNameKey = mkPreludeTyConUnique 165
-- SIMD vector types (Unique keys)
floatX4PrimTyConKey, doubleX2PrimTyConKey, int32X4PrimTyConKey,
int64X2PrimTyConKey :: Unique
floatX4PrimTyConKey = mkPreludeTyConUnique 170
doubleX2PrimTyConKey = mkPreludeTyConUnique 171
int32X4PrimTyConKey = mkPreludeTyConUnique 172
int64X2PrimTyConKey = mkPreludeTyConUnique 173
ntTyConKey:: Unique
ntTyConKey = mkPreludeTyConUnique 174
coercibleTyConKey :: Unique
......@@ -1492,6 +1483,12 @@ coercibleTyConKey = mkPreludeTyConUnique 175
-- USES TyConUniques 200-299
-----------------------------------------------------
----------------------- SIMD ------------------------
-- USES TyConUniques 300-399
-----------------------------------------------------
#include "primop-vector-uniques.hs-incl"
unitTyConKey :: Unique
unitTyConKey = mkTupleTyConUnique BoxedTuple 0
\end{code}
......
......@@ -5,7 +5,7 @@
\begin{code}
module PrimOp (
PrimOp(..), allThePrimOps,
PrimOp(..), PrimOpVecCat(..), allThePrimOps,
primOpType, primOpSig,
primOpTag, maxPrimOpTag, primOpOcc,
......@@ -25,6 +25,7 @@ module PrimOp (
import TysPrim
import TysWiredIn
import CmmType
import Demand
import Var ( TyVar )
import OccName ( OccName, pprOccName, mkVarOccFS )
......@@ -64,6 +65,7 @@ primOpTag op = iBox (tagOf_PrimOp op)
-- supplies
-- tagOf_PrimOp :: PrimOp -> FastInt
#include "primop-tag.hs-incl"
tagOf_PrimOp _ = error "tagOf_PrimOp: unknown primop"
instance Eq PrimOp where
......@@ -82,6 +84,12 @@ instance Outputable PrimOp where
ppr op = pprPrimOp op
\end{code}
\begin{code}
data PrimOpVecCat = IntVec
| WordVec
| FloatVec
\end{code}
An @Enum@-derived list would be better; meanwhile... (ToDo)
\begin{code}
......@@ -173,6 +181,7 @@ else, notably a type, can be constructed) for each @PrimOp@.
\begin{code}
primOpInfo :: PrimOp -> PrimOpInfo
#include "primop-primop-info.hs-incl"
primOpInfo _ = error "primOpInfo: unknown primop"
\end{code}
Here are a load of comments from the old primOp info:
......
......@@ -76,11 +76,8 @@ module TysPrim(
-- * Any
anyTy, anyTyCon, anyTypeOfKind,
-- * SIMD
floatX4PrimTyCon, floatX4PrimTy,
doubleX2PrimTyCon, doubleX2PrimTy,
int32X4PrimTyCon, int32X4PrimTy,
int64X2PrimTyCon, int64X2PrimTy
-- * SIMD
#include "primop-vector-tys-exports.hs-incl"
) where
#include "HsVersions.h"
......@@ -144,10 +141,7 @@ primTyCons
, superKindTyCon
, anyKindTyCon
, floatX4PrimTyCon
, doubleX2PrimTyCon
, int32X4PrimTyCon
, int64X2PrimTyCon
#include "primop-vector-tycons.hs-incl"
]
mkPrimTc :: FastString -> Unique -> TyCon -> Name
......@@ -157,7 +151,7 @@ mkPrimTc fs unique tycon
(ATyCon tycon) -- Relevant TyCon
UserSyntax -- None are built-in syntax
charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName, floatX4PrimTyConName, doubleX2PrimTyConName, int32X4PrimTyConName, int64X2PrimTyConName :: Name
charPrimTyConName, intPrimTyConName, int32PrimTyConName, int64PrimTyConName, wordPrimTyConName, word32PrimTyConName, word64PrimTyConName, addrPrimTyConName, floatPrimTyConName, doublePrimTyConName, statePrimTyConName, realWorldTyConName, arrayPrimTyConName, arrayArrayPrimTyConName, byteArrayPrimTyConName, mutableArrayPrimTyConName, mutableByteArrayPrimTyConName, mutableArrayArrayPrimTyConName, mutVarPrimTyConName, mVarPrimTyConName, tVarPrimTyConName, stablePtrPrimTyConName, stableNamePrimTyConName, bcoPrimTyConName, weakPrimTyConName, threadIdPrimTyConName, eqPrimTyConName, eqReprPrimTyConName :: Name
charPrimTyConName = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon
intPrimTyConName = mkPrimTc (fsLit "Int#") intPrimTyConKey intPrimTyCon
int32PrimTyConName = mkPrimTc (fsLit "Int32#") int32PrimTyConKey int32PrimTyCon
......@@ -186,10 +180,6 @@ stableNamePrimTyConName = mkPrimTc (fsLit "StableName#") stableNamePrimTyC
bcoPrimTyConName = mkPrimTc (fsLit "BCO#") bcoPrimTyConKey bcoPrimTyCon
weakPrimTyConName = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon
threadIdPrimTyConName = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon
floatX4PrimTyConName = mkPrimTc (fsLit "FloatX4#") floatX4PrimTyConKey floatX4PrimTyCon
doubleX2PrimTyConName = mkPrimTc (fsLit "DoubleX2#") doubleX2PrimTyConKey doubleX2PrimTyCon
int32X4PrimTyConName = mkPrimTc (fsLit "Int32X4#") int32X4PrimTyConKey int32X4PrimTyCon
int64X2PrimTyConName = mkPrimTc (fsLit "Int64X2#") int64X2PrimTyConKey int64X2PrimTyCon
\end{code}
%************************************************************************
......@@ -766,28 +756,10 @@ anyTypeOfKind kind = TyConApp anyTyCon [kind]
%************************************************************************
%* *
\subsection{SIMD vector type}
\subsection{SIMD vector types}
%* *