Add support for passing SSE vectors in registers.

This patch adds support for 6 XMM registers on x86-64 which overlap with the F
and D registers and may hold 128-bit wide SIMD vectors. Because there is not a
good way to attach type information to STG registers, we aggressively bitcast in
the LLVM back-end.
parent 18114408
......@@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float
| otherwise = int
where vec = (assts, (r:rs))
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
(W32, (vs, f:fs, ds, ls, ss))
......
......@@ -336,6 +336,9 @@ data GlobalReg
| LongReg -- long int registers (64-bit, really)
{-# UNPACK #-} !Int -- its number
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
......@@ -371,6 +374,7 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
......@@ -392,6 +396,7 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
......@@ -413,6 +418,8 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT
compare _ (LongReg _) = GT
compare (XmmReg _) _ = LT
compare _ (XmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
......@@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
-- dynamically allocated closures
......@@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True
isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
isArgReg _ = False
......@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations
| MO_VF_Add Length Width
| MO_VF_Sub Length Width
......@@ -360,22 +364,25 @@ machOpResultType dflags mop tys =
MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to
MO_V_Insert {} -> ty1
MO_V_Extract {} -> vecElemType ty1
MO_V_Add {} -> ty1
MO_V_Sub {} -> ty1
MO_V_Mul {} -> ty1
MO_VS_Quot {} -> ty1
MO_VS_Rem {} -> ty1
MO_VS_Neg {} -> ty1
MO_VF_Add {} -> ty1
MO_VF_Sub {} -> ty1
MO_VF_Mul {} -> ty1
MO_VF_Quot {} -> ty1
MO_VF_Neg {} -> ty1
MO_V_Insert l w -> cmmVec l (cmmBits w)
MO_V_Extract _ w -> cmmBits w
MO_V_Add l w -> cmmVec l (cmmBits w)
MO_V_Sub l w -> cmmVec l (cmmBits w)
MO_V_Mul l w -> cmmVec l (cmmBits w)
MO_VS_Quot l w -> cmmVec l (cmmBits w)
MO_VS_Rem l w -> cmmVec l (cmmBits w)
MO_VS_Neg l w -> cmmVec l (cmmBits w)
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
MO_VF_Add l w -> cmmVec l (cmmFloat w)
MO_VF_Sub l w -> cmmVec l (cmmFloat w)
MO_VF_Mul l w -> cmmVec l (cmmFloat w)
MO_VF_Quot l w -> cmmVec l (cmmFloat w)
MO_VF_Neg l w -> cmmVec l (cmmFloat w)
where
(ty1:_) = tys
......@@ -443,8 +450,8 @@ machOpArgReps dflags op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
MO_V_Add _ r -> [r,r]
MO_V_Sub _ r -> [r,r]
......@@ -454,6 +461,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
......
......@@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
MO_VF_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
++ " should have been handled earlier!")
MO_VF_Extract {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Extract")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
++ " should have been handled earlier!")
MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
......
......@@ -255,6 +255,7 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
......
......@@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
......
......@@ -1183,8 +1183,11 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
vecPack src (e : es) i = do
dst <- newTemp ty
emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
......@@ -1214,8 +1217,11 @@ doVecUnpackOp maybe_post_read_cast ty e res =
return ()
vecUnpack (r : rs) i = do
emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
[e, iLit]))
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
[e, iLit]))
else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
[e, iLit]))
vecUnpack rs (i + 1)
where
-- vector indices are always 32-bits
......@@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
-- vector indices are always 32-bits
let idx' :: CmmExpr
idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
where
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of
......
......@@ -131,11 +131,12 @@ llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
llvmFunArgs dflags live =
map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
where platform = targetPlatform dflags
isLive r = not (isFloat r) || r `elem` alwaysLive || r `elem` live
isPassed r = not (isFloat r) || isLive r
isFloat (FloatReg _) = True
isFloat (DoubleReg _) = True
isFloat _ = False
isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
isPassed r = not (isSSE r) || isLive r
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr]
......
......@@ -470,6 +470,7 @@ castVar dflags v t
(vt, _) | isInt vt && isPointer t -> LM_Inttoptr
(vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
(vt, _) | isPointer vt && isPointer t -> LM_Bitcast
(vt, _) | isVector vt && isVector t -> LM_Bitcast
(vt, _) -> panic $ "castVars: Can't cast this type ("
++ show vt ++ ") to (" ++ show t ++ ")"
......@@ -582,16 +583,21 @@ genAssign env reg val = do
let stmts = stmts1 `appOL` stmts2
let ty = (pLower . getVarType) vreg
case isPointer ty && getVarType vval == llvmWord dflags of
-- Some registers are pointer types, so need to cast value to pointer
True -> do
(v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
case ty of
-- Some registers are pointer types, so need to cast value to pointer
LMPointer _ | getVarType vval == llvmWord dflags -> do
(v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
False -> do
let s1 = Store vval vreg
return (env2, stmts `snocOL` s1, top1 ++ top2)
LMVector _ _ -> do
(v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
_ -> do
let s1 = Store vval vreg
return (env2, stmts `snocOL` s1, top1 ++ top2)
-- | CmmStore operation
......@@ -884,14 +890,14 @@ genMachOp env _ op [x] = case op of
vecty = LMVector len ty
all0 = LMIntLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_Sub
in negateVec vecty all0s LM_MO_Sub
MO_VF_Neg len w ->
let ty = widthToLlvmFloat w
vecty = LMVector len ty
all0 = LMFloatLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_FSub
in negateVec vecty all0s LM_MO_FSub
-- Handle unsupported cases explicitly so we get a warning
-- of missing case when new MachOps added
......@@ -943,6 +949,9 @@ genMachOp env _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
......@@ -957,6 +966,12 @@ genMachOp env _ op [x] = case op of
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
return (env', v1, stmts `snocOL` s1, top)
negateVec ty v2 negOp = do
(env', vx, stmts1, top) <- exprToVar env x
([vx'], stmts2) <- castVars dflags [(vx, ty)]
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
return (env', v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
fiConv ty convOp = do
(env', vx, stmts, top) <- exprToVar env x
(v1, s1) <- doExpr ty $ Cast convOp vx ty
......@@ -1014,22 +1029,50 @@ genMachOp_fast env opt op r n e
genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData
-- Element extraction
genMachOp_slow env _ (MO_V_Extract {}) [val, idx] = do
genMachOp_slow env _ (MO_V_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
(v1, s1) <- doExpr ty $ Extract vval' vidx
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
where
dflags = getDflags env
ty = widthToLlvmInt w
genMachOp_slow env _ (MO_VF_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
let (LMVector _ ty) = getVarType vval
(v1, s1) <- doExpr ty $ Extract vval vidx
return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
(v1, s1) <- doExpr ty $ Extract vval' vidx
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
where
dflags = getDflags env
ty = widthToLlvmFloat w
-- Element insertion
genMachOp_slow env _ (MO_V_Insert {}) [val, elt, idx] = do
genMachOp_slow env _ (MO_V_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
([vval'], stmts4) <- castVars dflags [(vval, ty)]
(v1, s1) <- doExpr ty $ Insert vval' velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3)
where
dflags = getDflags env
ty = LMVector l (widthToLlvmInt w)
genMachOp_slow env _ (MO_VF_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
let ty = getVarType vval
(v1, s1) <- doExpr ty $ Insert vval velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
([vval'], stmts4) <- castVars dflags [(vval, ty)]
(v1, s1) <- doExpr ty $ Insert vval' velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3)
where
dflags = getDflags env
ty = LMVector l (widthToLlvmFloat w)
-- Binary MachOp
genMachOp_slow env opt op [x, y] = case op of
......@@ -1080,17 +1123,17 @@ genMachOp_slow env opt op [x, y] = case op of
MO_U_Shr _ -> genBinMach LM_MO_LShr
MO_S_Shr _ -> genBinMach LM_MO_AShr
MO_V_Add _ _ -> genBinMach LM_MO_Add
MO_V_Sub _ _ -> genBinMach LM_MO_Sub
MO_V_Mul _ _ -> genBinMach LM_MO_Mul
MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
MO_VS_Quot _ _ -> genBinMach LM_MO_SDiv
MO_VS_Rem _ _ -> genBinMach LM_MO_SRem
MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
MO_VF_Add _ _ -> genBinMach LM_MO_FAdd
MO_VF_Sub _ _ -> genBinMach LM_MO_FSub
MO_VF_Mul _ _ -> genBinMach LM_MO_FMul
MO_VF_Quot _ _ -> genBinMach LM_MO_FDiv
MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
MO_Not _ -> panicOp
MO_S_Neg _ -> panicOp
......@@ -1107,6 +1150,9 @@ genMachOp_slow env opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp
MO_VF_Insert {} -> panicOp
MO_VF_Extract {} -> panicOp
MO_VF_Neg {} -> panicOp
where
......@@ -1134,6 +1180,14 @@ genMachOp_slow env opt op [x, y] = case op of
`snocOL` dy `snocOL` s1
return (env2, v1, allStmts, top1 ++ top2)
binCastLlvmOp ty binOp = do
(env1, vx, stmts1, top1) <- exprToVar env x
(env2, vy, stmts2, top2) <- exprToVar env1 y
([vx', vy'], stmts3) <- castVars dflags [(vx, ty), (vy, ty)]
(v1, s1) <- doExpr ty $ binOp vx' vy'
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
top1 ++ top2)
-- | Need to use EOption here as Cmm expects word size results from
-- comparisons while LLVM return i1. Need to extend to llvmWord type
-- if expected. See Note [Literals and branch conditions].
......@@ -1152,6 +1206,8 @@ genMachOp_slow env opt op [x, y] = case op of
genBinMach op = binLlvmOp getVarType (LlvmOp op)
genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
-- | Detect if overflow will occur in signed multiply of the two
-- CmmExpr's. This is the LLVM assembly equivalent of the NCG
-- implementation. Its much longer due to type information/safety.
......@@ -1427,10 +1483,11 @@ funEpilogue env live = do
dflags = getDflags env
platform = targetPlatform dflags
isLive r = r `elem` alwaysLive || r `elem` live
isPassed r = not (isFloat r) || isLive r
isFloat (FloatReg _) = True
isFloat (DoubleReg _) = True
isFloat _ = False
isPassed r = not (isSSE r) || isLive r
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE _ = False
loadExpr r | isLive r = do
let reg = lmGlobalRegVar dflags r
(v,s) <- doExpr (pLower $ getVarType reg) $ Load reg
......
......@@ -55,6 +55,12 @@ lmGlobalReg dflags suf reg
DoubleReg 4 -> doubleGlobal $ "D4" ++ suf
DoubleReg 5 -> doubleGlobal $ "D5" ++ suf
DoubleReg 6 -> doubleGlobal $ "D6" ++ suf
XmmReg 1 -> xmmGlobal $ "XMM1" ++ suf
XmmReg 2 -> xmmGlobal $ "XMM2" ++ suf
XmmReg 3 -> xmmGlobal $ "XMM3" ++ suf
XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
......@@ -64,6 +70,7 @@ lmGlobalReg dflags suf reg
ptrGlobal name = LMNLocalVar (fsLit name) (llvmWordPtr dflags)
floatGlobal name = LMNLocalVar (fsLit name) LMFloat
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
......
......@@ -602,19 +602,21 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister" (pprMachOp mop)
where
......@@ -708,19 +710,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
where
......@@ -2722,21 +2726,23 @@ sse2NegCode w x = do
return (Any sz code)
isVecExpr :: CmmExpr -> Bool
isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
isVecExpr (CmmMachOp _ [e]) = isVecExpr e
isVecExpr _ = False
isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
isVecExpr (CmmMachOp _ [e]) = isVecExpr e
isVecExpr _ = False
needLlvm :: NatM a
needLlvm =
......
......@@ -388,36 +388,54 @@ activeStgRegs = [
#ifdef REG_D1
,DoubleReg 1
#endif
#ifdef REG_XMM1
,XmmReg 1
#endif
#ifdef REG_F2
,FloatReg 2
#endif
#ifdef REG_D2
,DoubleReg 2
#endif
#ifdef REG_XMM2
,XmmReg 2
#endif
#ifdef REG_F3
,FloatReg 3
#endif
#ifdef REG_D3
,DoubleReg 3
#endif
#ifdef REG_XMM3
,XmmReg 3
#endif
#ifdef REG_F4
,FloatReg 4
#endif
#ifdef REG_D4
,DoubleReg 4
#endif
#ifdef REG_XMM4
,XmmReg 4
#endif
#ifdef REG_F5
,FloatReg 5
#endif
#ifdef REG_D5
,DoubleReg 5
#endif
#ifdef REG_XMM5
,XmmReg 5
#endif
#ifdef REG_F6
,FloatReg 6
#endif
#ifdef REG_D6
,DoubleReg 6
#endif
#ifdef REG_XMM6
,XmmReg 6
#endif
#else /* MAX_REAL_SSE_REG == 0 */
#ifdef REG_F1
,FloatReg 1
......@@ -569,6 +587,14 @@ globalRegMaybe (DoubleReg 6) =
Just (RealRegSingle REG_D6)
# endif
# endif
#if MAX_REAL_SSE_REG != 0
globalRegMaybe (XmmReg 1) = Just (RealRegSingle REG_XMM1)
globalRegMaybe (XmmReg 2) = Just (RealRegSingle REG_XMM2)
globalRegMaybe (XmmReg 3) = Just (RealRegSingle REG_XMM3)
globalRegMaybe (XmmReg 4) = Just (RealRegSingle REG_XMM4)
globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5)
globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6)
# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
# endif
......
......@@ -167,12 +167,12 @@
#define REG_D5 xmm5
#define REG_D6 xmm6
#define REG_SSE1 xmm1
#define REG_SSE2 xmm2
#define REG_SSE3 xmm3
#define REG_SSE4 xmm4
#define REG_SSE5 xmm5
#define REG_SSE6 xmm6
#define REG_XMM1 xmm1
#define REG_XMM2 xmm2
#define REG_XMM3 xmm3
#define REG_XMM4 xmm4
#define REG_XMM5 xmm5
#define REG_XMM6 xmm6
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
......@@ -199,13 +199,13 @@
#define CALLER_SAVES_D6
#endif
#define CALLER_SAVES_SSE1
#define CALLER_SAVES_SSE2
#define CALLER_SAVES_SSE3
#define CALLER_SAVES_SSE4
#define CALLER_SAVES_SSE5
#define CALLER_SAVES_XMM1
#define CALLER_SAVES_XMM2
#define CALLER_SAVES_XMM3
#define CALLER_SAVES_XMM4
#define CALLER_SAVES_XMM5
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_SSE6
#define CALLER_SAVES_XMM6
#endif
#define MAX_REAL_VANILLA_REG 6
......
......@@ -81,6 +81,12 @@ typedef struct {
StgDouble rD4;
StgDouble rD5;
StgDouble rD6;
StgWord128 rXMM1;
StgWord128 rXMM2;
StgWord128 rXMM3;
StgWord128 rXMM4;
StgWord128 rXMM5;
StgWord128 rXMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
......@@ -270,6 +276,42 @@ GLOBAL_REG_DECL(StgDouble,D6,REG_D6)
#define D6 (BaseReg->rD6)