Add support for passing SSE vectors in registers.

This patch adds support for 6 XMM registers on x86-64 which overlap with the F
and D registers and may hold 128-bit wide SIMD vectors. Because there is not a
good way to attach type information to STG registers, we aggressively bitcast in
the LLVM back-end.
parent 18114408
...@@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) ...@@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float | isFloatType ty = float
| otherwise = int | otherwise = int
where vec = (assts, (r:rs)) where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss)) (W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
(W32, (vs, f:fs, ds, ls, ss)) (W32, (vs, f:fs, ds, ls, ss))
......
...@@ -336,6 +336,9 @@ data GlobalReg ...@@ -336,6 +336,9 @@ data GlobalReg
| LongReg -- long int registers (64-bit, really) | LongReg -- long int registers (64-bit, really)
{-# UNPACK #-} !Int -- its number {-# UNPACK #-} !Int -- its number
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
-- STG registers -- STG registers
| Sp -- Stack ptr; points to last occupied stack location. | Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit | SpLim -- Stack limit
...@@ -371,6 +374,7 @@ instance Eq GlobalReg where ...@@ -371,6 +374,7 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
Sp == Sp = True Sp == Sp = True
SpLim == SpLim = True SpLim == SpLim = True
Hp == Hp = True Hp == Hp = True
...@@ -392,6 +396,7 @@ instance Ord GlobalReg where ...@@ -392,6 +396,7 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
compare Sp Sp = EQ compare Sp Sp = EQ
compare SpLim SpLim = EQ compare SpLim SpLim = EQ
compare Hp Hp = EQ compare Hp Hp = EQ
...@@ -413,6 +418,8 @@ instance Ord GlobalReg where ...@@ -413,6 +418,8 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT compare (LongReg _) _ = LT
compare _ (LongReg _) = GT compare _ (LongReg _) = GT
compare (XmmReg _) _ = LT
compare _ (XmmReg _) = GT
compare Sp _ = LT compare Sp _ = LT
compare _ Sp = GT compare _ Sp = GT
compare SpLim _ = LT compare SpLim _ = LT
...@@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags ...@@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64 globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags globalRegType dflags Hp = gcWord dflags
-- The initialiser for all -- The initialiser for all
-- dynamically allocated closures -- dynamically allocated closures
...@@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True ...@@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True
isArgReg (FloatReg {}) = True isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
isArgReg _ = False isArgReg _ = False
...@@ -118,6 +118,10 @@ data MachOp ...@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width | MO_VS_Rem Length Width
| MO_VS_Neg Length Width | MO_VS_Neg Length Width
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations -- Floating point vector operations
| MO_VF_Add Length Width | MO_VF_Add Length Width
| MO_VF_Sub Length Width | MO_VF_Sub Length Width
...@@ -360,22 +364,25 @@ machOpResultType dflags mop tys = ...@@ -360,22 +364,25 @@ machOpResultType dflags mop tys =
MO_SF_Conv _ to -> cmmFloat to MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to MO_FF_Conv _ to -> cmmFloat to
MO_V_Insert {} -> ty1 MO_V_Insert l w -> cmmVec l (cmmBits w)
MO_V_Extract {} -> vecElemType ty1 MO_V_Extract _ w -> cmmBits w
MO_V_Add {} -> ty1 MO_V_Add l w -> cmmVec l (cmmBits w)
MO_V_Sub {} -> ty1 MO_V_Sub l w -> cmmVec l (cmmBits w)
MO_V_Mul {} -> ty1 MO_V_Mul l w -> cmmVec l (cmmBits w)
MO_VS_Quot {} -> ty1 MO_VS_Quot l w -> cmmVec l (cmmBits w)
MO_VS_Rem {} -> ty1 MO_VS_Rem l w -> cmmVec l (cmmBits w)
MO_VS_Neg {} -> ty1 MO_VS_Neg l w -> cmmVec l (cmmBits w)
MO_VF_Add {} -> ty1 MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Sub {} -> ty1 MO_VF_Extract _ w -> cmmFloat w
MO_VF_Mul {} -> ty1
MO_VF_Quot {} -> ty1 MO_VF_Add l w -> cmmVec l (cmmFloat w)
MO_VF_Neg {} -> ty1 MO_VF_Sub l w -> cmmVec l (cmmFloat w)
MO_VF_Mul l w -> cmmVec l (cmmFloat w)
MO_VF_Quot l w -> cmmVec l (cmmFloat w)
MO_VF_Neg l w -> cmmVec l (cmmFloat w)
where where
(ty1:_) = tys (ty1:_) = tys
...@@ -443,8 +450,8 @@ machOpArgReps dflags op = ...@@ -443,8 +450,8 @@ machOpArgReps dflags op =
MO_FS_Conv from _ -> [from] MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from] MO_FF_Conv from _ -> [from]
MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
MO_V_Add _ r -> [r,r] MO_V_Add _ r -> [r,r]
MO_V_Sub _ r -> [r,r] MO_V_Sub _ r -> [r,r]
...@@ -454,6 +461,9 @@ machOpArgReps dflags op = ...@@ -454,6 +461,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r] MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r] MO_VS_Neg _ r -> [r]
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
MO_VF_Add _ r -> [r,r] MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r] MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r] MO_VF_Mul _ r -> [r,r]
......
...@@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of ...@@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg" (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!") ++ " should have been handled earlier!")
MO_VF_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
++ " should have been handled earlier!")
MO_VF_Extract {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Extract")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
++ " should have been handled earlier!")
MO_VF_Add {} -> pprTrace "offending mop:" MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add") (ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add" (panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
......
...@@ -255,6 +255,7 @@ pprGlobalReg gr ...@@ -255,6 +255,7 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
Sp -> ptext (sLit "Sp") Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim") SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp") Hp -> ptext (sLit "Hp")
......
...@@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags ...@@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")") baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
......
...@@ -1183,8 +1183,11 @@ doVecPackOp maybe_pre_write_cast ty z es res = do ...@@ -1183,8 +1183,11 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
vecPack src (e : es) i = do vecPack src (e : es) i = do
dst <- newTemp ty dst <- newTemp ty
emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) if isFloatType (vecElemType ty)
[CmmReg (CmmLocal src), cast e, iLit]) then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1) vecPack dst es (i + 1)
where where
-- vector indices are always 32-bits -- vector indices are always 32-bits
...@@ -1214,8 +1217,11 @@ doVecUnpackOp maybe_post_read_cast ty e res = ...@@ -1214,8 +1217,11 @@ doVecUnpackOp maybe_post_read_cast ty e res =
return () return ()
vecUnpack (r : rs) i = do vecUnpack (r : rs) i = do
emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid) if isFloatType (vecElemType ty)
[e, iLit])) then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
[e, iLit]))
else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
[e, iLit]))
vecUnpack rs (i + 1) vecUnpack rs (i + 1)
where where
-- vector indices are always 32-bits -- vector indices are always 32-bits
...@@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do ...@@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
-- vector indices are always 32-bits -- vector indices are always 32-bits
let idx' :: CmmExpr let idx' :: CmmExpr
idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx] idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx']) if isFloatType (vecElemType ty)
then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
where where
cast :: CmmExpr -> CmmExpr cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of cast val = case maybe_pre_write_cast of
......
...@@ -131,11 +131,12 @@ llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar] ...@@ -131,11 +131,12 @@ llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
llvmFunArgs dflags live = llvmFunArgs dflags live =
map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform)) map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
where platform = targetPlatform dflags where platform = targetPlatform dflags
isLive r = not (isFloat r) || r `elem` alwaysLive || r `elem` live isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
isPassed r = not (isFloat r) || isLive r isPassed r = not (isSSE r) || isLive r
isFloat (FloatReg _) = True isSSE (FloatReg _) = True
isFloat (DoubleReg _) = True isSSE (DoubleReg _) = True
isFloat _ = False isSSE (XmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes -- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr] llvmStdFunAttrs :: [LlvmFuncAttr]
......
...@@ -470,6 +470,7 @@ castVar dflags v t ...@@ -470,6 +470,7 @@ castVar dflags v t
(vt, _) | isInt vt && isPointer t -> LM_Inttoptr (vt, _) | isInt vt && isPointer t -> LM_Inttoptr
(vt, _) | isPointer vt && isInt t -> LM_Ptrtoint (vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
(vt, _) | isPointer vt && isPointer t -> LM_Bitcast (vt, _) | isPointer vt && isPointer t -> LM_Bitcast
(vt, _) | isVector vt && isVector t -> LM_Bitcast
(vt, _) -> panic $ "castVars: Can't cast this type (" (vt, _) -> panic $ "castVars: Can't cast this type ("
++ show vt ++ ") to (" ++ show t ++ ")" ++ show vt ++ ") to (" ++ show t ++ ")"
...@@ -582,16 +583,21 @@ genAssign env reg val = do ...@@ -582,16 +583,21 @@ genAssign env reg val = do
let stmts = stmts1 `appOL` stmts2 let stmts = stmts1 `appOL` stmts2
let ty = (pLower . getVarType) vreg let ty = (pLower . getVarType) vreg
case isPointer ty && getVarType vval == llvmWord dflags of case ty of
-- Some registers are pointer types, so need to cast value to pointer -- Some registers are pointer types, so need to cast value to pointer
True -> do LMPointer _ | getVarType vval == llvmWord dflags -> do
(v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
let s2 = Store v vreg let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2) return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
False -> do LMVector _ _ -> do
let s1 = Store vval vreg (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
return (env2, stmts `snocOL` s1, top1 ++ top2) let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
_ -> do
let s1 = Store vval vreg
return (env2, stmts `snocOL` s1, top1 ++ top2)
-- | CmmStore operation -- | CmmStore operation
...@@ -884,14 +890,14 @@ genMachOp env _ op [x] = case op of ...@@ -884,14 +890,14 @@ genMachOp env _ op [x] = case op of
vecty = LMVector len ty vecty = LMVector len ty
all0 = LMIntLit (-0) ty all0 = LMIntLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0) all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_Sub in negateVec vecty all0s LM_MO_Sub
MO_VF_Neg len w -> MO_VF_Neg len w ->
let ty = widthToLlvmFloat w let ty = widthToLlvmFloat w
vecty = LMVector len ty vecty = LMVector len ty
all0 = LMFloatLit (-0) ty all0 = LMFloatLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0) all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_FSub in negateVec vecty all0s LM_MO_FSub
-- Handle unsupported cases explicitly so we get a warning -- Handle unsupported cases explicitly so we get a warning
-- of missing case when new MachOps added -- of missing case when new MachOps added
...@@ -943,6 +949,9 @@ genMachOp env _ op [x] = case op of ...@@ -943,6 +949,9 @@ genMachOp env _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp MO_VS_Rem _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
MO_VF_Add _ _ -> panicOp MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp MO_VF_Sub _ _ -> panicOp
...@@ -957,6 +966,12 @@ genMachOp env _ op [x] = case op of ...@@ -957,6 +966,12 @@ genMachOp env _ op [x] = case op of
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
return (env', v1, stmts `snocOL` s1, top) return (env', v1, stmts `snocOL` s1, top)
negateVec ty v2 negOp = do
(env', vx, stmts1, top) <- exprToVar env x
([vx'], stmts2) <- castVars dflags [(vx, ty)]
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
return (env', v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
fiConv ty convOp = do fiConv ty convOp = do
(env', vx, stmts, top) <- exprToVar env x (env', vx, stmts, top) <- exprToVar env x
(v1, s1) <- doExpr ty $ Cast convOp vx ty (v1, s1) <- doExpr ty $ Cast convOp vx ty
...@@ -1014,22 +1029,50 @@ genMachOp_fast env opt op r n e ...@@ -1014,22 +1029,50 @@ genMachOp_fast env opt op r n e
genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData
-- Element extraction -- Element extraction
genMachOp_slow env _ (MO_V_Extract {}) [val, idx] = do genMachOp_slow env _ (MO_V_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
(v1, s1) <- doExpr ty $ Extract vval' vidx
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
where
dflags = getDflags env
ty = widthToLlvmInt w
genMachOp_slow env _ (MO_VF_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val (env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx (env2, vidx, stmts2, top2) <- exprToVar env1 idx
let (LMVector _ ty) = getVarType vval ([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
(v1, s1) <- doExpr ty $ Extract vval vidx (v1, s1) <- doExpr ty $ Extract vval' vidx
return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2) return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
where
dflags = getDflags env
ty = widthToLlvmFloat w
-- Element insertion -- Element insertion
genMachOp_slow env _ (MO_V_Insert {}) [val, elt, idx] = do genMachOp_slow env _ (MO_V_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
([vval'], stmts4) <- castVars dflags [(vval, ty)]
(v1, s1) <- doExpr ty $ Insert vval' velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3)
where
dflags = getDflags env
ty = LMVector l (widthToLlvmInt w)
genMachOp_slow env _ (MO_VF_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val (env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt (env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx (env3, vidx, stmts3, top3) <- exprToVar env2 idx
let ty = getVarType vval ([vval'], stmts4) <- castVars dflags [(vval, ty)]
(v1, s1) <- doExpr ty $ Insert vval velt vidx (v1, s1) <- doExpr ty $ Insert vval' velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3) top1 ++ top2 ++ top3)
where
dflags = getDflags env
ty = LMVector l (widthToLlvmFloat w)
-- Binary MachOp -- Binary MachOp
genMachOp_slow env opt op [x, y] = case op of genMachOp_slow env opt op [x, y] = case op of
...@@ -1080,17 +1123,17 @@ genMachOp_slow env opt op [x, y] = case op of ...@@ -1080,17 +1123,17 @@ genMachOp_slow env opt op [x, y] = case op of
MO_U_Shr _ -> genBinMach LM_MO_LShr MO_U_Shr _ -> genBinMach LM_MO_LShr
MO_S_Shr _ -> genBinMach LM_MO_AShr MO_S_Shr _ -> genBinMach LM_MO_AShr
MO_V_Add _ _ -> genBinMach LM_MO_Add MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
MO_V_Sub _ _ -> genBinMach LM_MO_Sub MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
MO_V_Mul _ _ -> genBinMach LM_MO_Mul MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
MO_VS_Quot _ _ -> genBinMach LM_MO_SDiv MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
MO_VS_Rem _ _ -> genBinMach LM_MO_SRem MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
MO_VF_Add _ _ -> genBinMach LM_MO_FAdd MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
MO_VF_Sub _ _ -> genBinMach LM_MO_FSub MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
MO_VF_Mul _ _ -> genBinMach LM_MO_FMul MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
MO_VF_Quot _ _ -> genBinMach LM_MO_FDiv MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
MO_Not _ -> panicOp MO_Not _ -> panicOp
MO_S_Neg _ -> panicOp MO_S_Neg _ -> panicOp
...@@ -1107,6 +1150,9 @@ genMachOp_slow env opt op [x, y] = case op of ...@@ -1107,6 +1150,9 @@ genMachOp_slow env opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp MO_VS_Neg {} -> panicOp
MO_VF_Insert {} -> panicOp
MO_VF_Extract {} -> panicOp
MO_VF_Neg {} -> panicOp MO_VF_Neg {} -> panicOp
where where
...@@ -1134,6 +1180,14 @@ genMachOp_slow env opt op [x, y] = case op of ...@@ -1134,6 +1180,14 @@ genMachOp_slow env opt op [x, y] = case op of
`snocOL` dy `snocOL` s1 `snocOL` dy `snocOL` s1
return (env2, v1, allStmts, top1 ++ top2) return (env2, v1, allStmts, top1 ++ top2)
binCastLlvmOp ty binOp = do
(env1, vx, stmts1, top1) <- exprToVar env x
(env2, vy, stmts2, top2) <- exprToVar env1 y
([vx', vy'], stmts3) <- castVars dflags [(vx, ty), (vy, ty)]
(v1, s1) <- doExpr ty $ binOp vx' vy'
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
top1 ++ top2)
-- | Need to use EOption here as Cmm expects word size results from -- | Need to use EOption here as Cmm expects word size results from
-- comparisons while LLVM return i1. Need to extend to llvmWord type -- comparisons while LLVM return i1. Need to extend to llvmWord type
-- if expected. See Note [Literals and branch conditions]. -- if expected. See Note [Literals and branch conditions].
...@@ -1152,6 +1206,8 @@ genMachOp_slow env opt op [x, y] = case op of ...@@ -1152,6 +1206,8 @@ genMachOp_slow env opt op [x, y] = case op of
genBinMach op = binLlvmOp getVarType (LlvmOp op) genBinMach op = binLlvmOp getVarType (LlvmOp op)
genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
-- | Detect if overflow will occur in signed multiply of the two -- | Detect if overflow will occur in signed multiply of the two
-- CmmExpr's. This is the LLVM assembly equivalent of the NCG -- CmmExpr's. This is the LLVM assembly equivalent of the NCG
-- implementation. Its much longer due to type information/safety. -- implementation. Its much longer due to type information/safety.
...@@ -1427,10 +1483,11 @@ funEpilogue env live = do ...@@ -1427,10 +1483,11 @@ funEpilogue env live = do
dflags = getDflags env dflags = getDflags env
platform = targetPlatform dflags platform = targetPlatform dflags
isLive r = r `elem` alwaysLive || r `elem` live isLive r = r `elem` alwaysLive || r `elem` live
isPassed r = not (isFloat r) || isLive r isPassed r = not (isSSE r) || isLive r
isFloat (FloatReg _) = True isSSE (FloatReg _) = True
isFloat (DoubleReg _) = True isSSE (DoubleReg _) = True
isFloat _ = False isSSE (XmmReg _) = True
isSSE _ = False
loadExpr r | isLive r = do loadExpr r | isLive r = do
let reg = lmGlobalRegVar dflags r let reg = lmGlobalRegVar dflags r
(v,s) <- doExpr (pLower $ getVarType reg) $ Load reg (v,s) <- doExpr (pLower $ getVarType reg) $ Load reg
......
...@@ -55,6 +55,12 @@ lmGlobalReg dflags suf reg ...@@ -55,6 +55,12 @@ lmGlobalReg dflags suf reg
DoubleReg 4 -> doubleGlobal $ "D4" ++ suf DoubleReg 4 -> doubleGlobal $ "D4" ++ suf
DoubleReg 5 -> doubleGlobal $ "D5" ++ suf DoubleReg 5 -> doubleGlobal $ "D5" ++ suf
DoubleReg 6 -> doubleGlobal $ "D6" ++ suf DoubleReg 6 -> doubleGlobal $ "D6" ++ suf
XmmReg 1 -> xmmGlobal $ "XMM1" ++ suf
XmmReg 2 -> xmmGlobal $ "XMM2" ++ suf
XmmReg 3 -> xmmGlobal $ "XMM3" ++ suf
XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg) _other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!" ++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc -- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
...@@ -64,6 +70,7 @@ lmGlobalReg dflags suf reg ...@@ -64,6 +70,7 @@ lmGlobalReg dflags suf reg
ptrGlobal name = LMNLocalVar (fsLit name) (llvmWordPtr dflags) ptrGlobal name = LMNLocalVar (fsLit name) (llvmWordPtr dflags)
floatGlobal name = LMNLocalVar (fsLit name) LMFloat floatGlobal name = LMNLocalVar (fsLit name) LMFloat
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
-- | A list of STG Registers that should always be considered alive -- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg] alwaysLive :: [GlobalReg]
......
...@@ -602,19 +602,21 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps ...@@ -602,19 +602,21 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_FS_Conv from to -> coerceFP2Int from to x MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x MO_SF_Conv from to -> coerceInt2FP from to x
MO_V_Insert {} -> needLlvm MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm MO_VS_Neg {} -> needLlvm
MO_VF_Add {} -> needLlvm MO_VF_Insert {} -> needLlvm
MO_VF_Sub {} -> needLlvm MO_VF_Extract {} -> needLlvm
MO_VF_Mul {} -> needLlvm MO_VF_Add {} -> needLlvm
MO_VF_Quot {} -> needLlvm MO_VF_Sub {} -> needLlvm
MO_VF_Neg {} -> needLlvm MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister" (pprMachOp mop) _other -> pprPanic "getRegister" (pprMachOp mop)
where where
...@@ -708,19 +710,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps ...@@ -708,19 +710,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-} MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-} MO_S_Shr rep -> shift_code rep SAR x y {-False-}
MO_V_Insert {} -> needLlvm MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm MO_VS_Neg {} -> needLlvm