Add support for passing SSE vectors in registers.

This patch adds support for 6 XMM registers on x86-64 which overlap with the F
and D registers and may hold 128-bit wide SIMD vectors. Because there is not a
good way to attach type information to STG registers, we aggressively bitcast in
the LLVM back-end.
parent 18114408
......@@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float
| otherwise = int
where vec = (assts, (r:rs))
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
(W32, (vs, f:fs, ds, ls, ss))
......
......@@ -336,6 +336,9 @@ data GlobalReg
| LongReg -- long int registers (64-bit, really)
{-# UNPACK #-} !Int -- its number
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
......@@ -371,6 +374,7 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
......@@ -392,6 +396,7 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
......@@ -413,6 +418,8 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT
compare _ (LongReg _) = GT
compare (XmmReg _) _ = LT
compare _ (XmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
......@@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
-- dynamically allocated closures
......@@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True
isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
isArgReg _ = False
......@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations
| MO_VF_Add Length Width
| MO_VF_Sub Length Width
......@@ -360,22 +364,25 @@ machOpResultType dflags mop tys =
MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to
MO_V_Insert {} -> ty1
MO_V_Extract {} -> vecElemType ty1
MO_V_Insert l w -> cmmVec l (cmmBits w)
MO_V_Extract _ w -> cmmBits w
MO_V_Add l w -> cmmVec l (cmmBits w)
MO_V_Sub l w -> cmmVec l (cmmBits w)
MO_V_Mul l w -> cmmVec l (cmmBits w)
MO_V_Add {} -> ty1
MO_V_Sub {} -> ty1
MO_V_Mul {} -> ty1
MO_VS_Quot l w -> cmmVec l (cmmBits w)
MO_VS_Rem l w -> cmmVec l (cmmBits w)
MO_VS_Neg l w -> cmmVec l (cmmBits w)
MO_VS_Quot {} -> ty1
MO_VS_Rem {} -> ty1
MO_VS_Neg {} -> ty1
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
MO_VF_Add {} -> ty1
MO_VF_Sub {} -> ty1
MO_VF_Mul {} -> ty1
MO_VF_Quot {} -> ty1
MO_VF_Neg {} -> ty1
MO_VF_Add l w -> cmmVec l (cmmFloat w)
MO_VF_Sub l w -> cmmVec l (cmmFloat w)
MO_VF_Mul l w -> cmmVec l (cmmFloat w)
MO_VF_Quot l w -> cmmVec l (cmmFloat w)
MO_VF_Neg l w -> cmmVec l (cmmFloat w)
where
(ty1:_) = tys
......@@ -443,8 +450,8 @@ machOpArgReps dflags op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
MO_V_Add _ r -> [r,r]
MO_V_Sub _ r -> [r,r]
......@@ -454,6 +461,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
......
......@@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
MO_VF_Insert {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
++ " should have been handled earlier!")
MO_VF_Extract {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Extract")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
++ " should have been handled earlier!")
MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
......
......@@ -255,6 +255,7 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
......
......@@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
......
......@@ -1183,7 +1183,10 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
vecPack src (e : es) i = do
dst <- newTemp ty
emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1)
where
......@@ -1214,7 +1217,10 @@ doVecUnpackOp maybe_post_read_cast ty e res =
return ()
vecUnpack (r : rs) i = do
emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
[e, iLit]))
else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
[e, iLit]))
vecUnpack rs (i + 1)
where
......@@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
-- vector indices are always 32-bits
let idx' :: CmmExpr
idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
where
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of
......
......@@ -131,11 +131,12 @@ llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
llvmFunArgs dflags live =
map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
where platform = targetPlatform dflags
isLive r = not (isFloat r) || r `elem` alwaysLive || r `elem` live
isPassed r = not (isFloat r) || isLive r
isFloat (FloatReg _) = True
isFloat (DoubleReg _) = True
isFloat _ = False
isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
isPassed r = not (isSSE r) || isLive r
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr]
......
......@@ -470,6 +470,7 @@ castVar dflags v t
(vt, _) | isInt vt && isPointer t -> LM_Inttoptr
(vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
(vt, _) | isPointer vt && isPointer t -> LM_Bitcast
(vt, _) | isVector vt && isVector t -> LM_Bitcast
(vt, _) -> panic $ "castVars: Can't cast this type ("
++ show vt ++ ") to (" ++ show t ++ ")"
......@@ -582,14 +583,19 @@ genAssign env reg val = do
let stmts = stmts1 `appOL` stmts2
let ty = (pLower . getVarType) vreg
case isPointer ty && getVarType vval == llvmWord dflags of
case ty of
-- Some registers are pointer types, so need to cast value to pointer
True -> do
LMPointer _ | getVarType vval == llvmWord dflags -> do
(v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
False -> do
LMVector _ _ -> do
(v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
let s2 = Store v vreg
return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
_ -> do
let s1 = Store vval vreg
return (env2, stmts `snocOL` s1, top1 ++ top2)
......@@ -884,14 +890,14 @@ genMachOp env _ op [x] = case op of
vecty = LMVector len ty
all0 = LMIntLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_Sub
in negateVec vecty all0s LM_MO_Sub
MO_VF_Neg len w ->
let ty = widthToLlvmFloat w
vecty = LMVector len ty
all0 = LMFloatLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
in negate vecty all0s LM_MO_FSub
in negateVec vecty all0s LM_MO_FSub
-- Handle unsupported cases explicitly so we get a warning
-- of missing case when new MachOps added
......@@ -944,6 +950,9 @@ genMachOp env _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
MO_VF_Mul _ _ -> panicOp
......@@ -957,6 +966,12 @@ genMachOp env _ op [x] = case op of
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
return (env', v1, stmts `snocOL` s1, top)
negateVec ty v2 negOp = do
(env', vx, stmts1, top) <- exprToVar env x
([vx'], stmts2) <- castVars dflags [(vx, ty)]
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
return (env', v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
fiConv ty convOp = do
(env', vx, stmts, top) <- exprToVar env x
(v1, s1) <- doExpr ty $ Cast convOp vx ty
......@@ -1014,22 +1029,50 @@ genMachOp_fast env opt op r n e
genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData
-- Element extraction
genMachOp_slow env _ (MO_V_Extract {}) [val, idx] = do
genMachOp_slow env _ (MO_V_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
let (LMVector _ ty) = getVarType vval
(v1, s1) <- doExpr ty $ Extract vval vidx
return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
(v1, s1) <- doExpr ty $ Extract vval' vidx
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
where
dflags = getDflags env
ty = widthToLlvmInt w
genMachOp_slow env _ (MO_VF_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
(v1, s1) <- doExpr ty $ Extract vval' vidx
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
where
dflags = getDflags env
ty = widthToLlvmFloat w
-- Element insertion
genMachOp_slow env _ (MO_V_Insert {}) [val, elt, idx] = do
genMachOp_slow env _ (MO_V_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
let ty = getVarType vval
(v1, s1) <- doExpr ty $ Insert vval velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
([vval'], stmts4) <- castVars dflags [(vval, ty)]
(v1, s1) <- doExpr ty $ Insert vval' velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3)
where
dflags = getDflags env
ty = LMVector l (widthToLlvmInt w)
genMachOp_slow env _ (MO_VF_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
([vval'], stmts4) <- castVars dflags [(vval, ty)]
(v1, s1) <- doExpr ty $ Insert vval' velt vidx
return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3)
where
dflags = getDflags env
ty = LMVector l (widthToLlvmFloat w)
-- Binary MachOp
genMachOp_slow env opt op [x, y] = case op of
......@@ -1080,17 +1123,17 @@ genMachOp_slow env opt op [x, y] = case op of
MO_U_Shr _ -> genBinMach LM_MO_LShr
MO_S_Shr _ -> genBinMach LM_MO_AShr
MO_V_Add _ _ -> genBinMach LM_MO_Add
MO_V_Sub _ _ -> genBinMach LM_MO_Sub
MO_V_Mul _ _ -> genBinMach LM_MO_Mul
MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
MO_VS_Quot _ _ -> genBinMach LM_MO_SDiv
MO_VS_Rem _ _ -> genBinMach LM_MO_SRem
MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
MO_VF_Add _ _ -> genBinMach LM_MO_FAdd
MO_VF_Sub _ _ -> genBinMach LM_MO_FSub
MO_VF_Mul _ _ -> genBinMach LM_MO_FMul
MO_VF_Quot _ _ -> genBinMach LM_MO_FDiv
MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
MO_Not _ -> panicOp
MO_S_Neg _ -> panicOp
......@@ -1107,6 +1150,9 @@ genMachOp_slow env opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp
MO_VF_Insert {} -> panicOp
MO_VF_Extract {} -> panicOp
MO_VF_Neg {} -> panicOp
where
......@@ -1134,6 +1180,14 @@ genMachOp_slow env opt op [x, y] = case op of
`snocOL` dy `snocOL` s1
return (env2, v1, allStmts, top1 ++ top2)
binCastLlvmOp ty binOp = do
(env1, vx, stmts1, top1) <- exprToVar env x
(env2, vy, stmts2, top2) <- exprToVar env1 y
([vx', vy'], stmts3) <- castVars dflags [(vx, ty), (vy, ty)]
(v1, s1) <- doExpr ty $ binOp vx' vy'
return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
top1 ++ top2)
-- | Need to use EOption here as Cmm expects word size results from
-- comparisons while LLVM return i1. Need to extend to llvmWord type
-- if expected. See Note [Literals and branch conditions].
......@@ -1152,6 +1206,8 @@ genMachOp_slow env opt op [x, y] = case op of
genBinMach op = binLlvmOp getVarType (LlvmOp op)
genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
-- | Detect if overflow will occur in signed multiply of the two
-- CmmExpr's. This is the LLVM assembly equivalent of the NCG
-- implementation. Its much longer due to type information/safety.
......@@ -1427,10 +1483,11 @@ funEpilogue env live = do
dflags = getDflags env
platform = targetPlatform dflags
isLive r = r `elem` alwaysLive || r `elem` live
isPassed r = not (isFloat r) || isLive r
isFloat (FloatReg _) = True
isFloat (DoubleReg _) = True
isFloat _ = False
isPassed r = not (isSSE r) || isLive r
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE _ = False
loadExpr r | isLive r = do
let reg = lmGlobalRegVar dflags r
(v,s) <- doExpr (pLower $ getVarType reg) $ Load reg
......
......@@ -55,6 +55,12 @@ lmGlobalReg dflags suf reg
DoubleReg 4 -> doubleGlobal $ "D4" ++ suf
DoubleReg 5 -> doubleGlobal $ "D5" ++ suf
DoubleReg 6 -> doubleGlobal $ "D6" ++ suf
XmmReg 1 -> xmmGlobal $ "XMM1" ++ suf
XmmReg 2 -> xmmGlobal $ "XMM2" ++ suf
XmmReg 3 -> xmmGlobal $ "XMM3" ++ suf
XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
......@@ -64,6 +70,7 @@ lmGlobalReg dflags suf reg
ptrGlobal name = LMNLocalVar (fsLit name) (llvmWordPtr dflags)
floatGlobal name = LMNLocalVar (fsLit name) LMFloat
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
......
......@@ -610,6 +610,8 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
......@@ -716,6 +718,8 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
......@@ -2730,6 +2734,8 @@ isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
......
......@@ -388,36 +388,54 @@ activeStgRegs = [
#ifdef REG_D1
,DoubleReg 1
#endif
#ifdef REG_XMM1
,XmmReg 1
#endif
#ifdef REG_F2
,FloatReg 2
#endif
#ifdef REG_D2
,DoubleReg 2
#endif
#ifdef REG_XMM2
,XmmReg 2
#endif
#ifdef REG_F3
,FloatReg 3
#endif
#ifdef REG_D3
,DoubleReg 3
#endif
#ifdef REG_XMM3
,XmmReg 3
#endif
#ifdef REG_F4
,FloatReg 4
#endif
#ifdef REG_D4
,DoubleReg 4
#endif
#ifdef REG_XMM4
,XmmReg 4
#endif
#ifdef REG_F5
,FloatReg 5
#endif
#ifdef REG_D5
,DoubleReg 5
#endif
#ifdef REG_XMM5
,XmmReg 5
#endif
#ifdef REG_F6
,FloatReg 6
#endif
#ifdef REG_D6
,DoubleReg 6
#endif
#ifdef REG_XMM6
,XmmReg 6
#endif
#else /* MAX_REAL_SSE_REG == 0 */
#ifdef REG_F1
,FloatReg 1
......@@ -569,6 +587,14 @@ globalRegMaybe (DoubleReg 6) =
Just (RealRegSingle REG_D6)
# endif
# endif
#if MAX_REAL_SSE_REG != 0
globalRegMaybe (XmmReg 1) = Just (RealRegSingle REG_XMM1)
globalRegMaybe (XmmReg 2) = Just (RealRegSingle REG_XMM2)
globalRegMaybe (XmmReg 3) = Just (RealRegSingle REG_XMM3)
globalRegMaybe (XmmReg 4) = Just (RealRegSingle REG_XMM4)
globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5)
globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6)
# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
# endif
......
......@@ -167,12 +167,12 @@
#define REG_D5 xmm5
#define REG_D6 xmm6
#define REG_SSE1 xmm1
#define REG_SSE2 xmm2
#define REG_SSE3 xmm3
#define REG_SSE4 xmm4
#define REG_SSE5 xmm5
#define REG_SSE6 xmm6
#define REG_XMM1 xmm1
#define REG_XMM2 xmm2
#define REG_XMM3 xmm3
#define REG_XMM4 xmm4
#define REG_XMM5 xmm5
#define REG_XMM6 xmm6
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
......@@ -199,13 +199,13 @@
#define CALLER_SAVES_D6
#endif
#define CALLER_SAVES_SSE1
#define CALLER_SAVES_SSE2
#define CALLER_SAVES_SSE3
#define CALLER_SAVES_SSE4
#define CALLER_SAVES_SSE5
#define CALLER_SAVES_XMM1
#define CALLER_SAVES_XMM2
#define CALLER_SAVES_XMM3
#define CALLER_SAVES_XMM4
#define CALLER_SAVES_XMM5
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_SSE6
#define CALLER_SAVES_XMM6
#endif
#define MAX_REAL_VANILLA_REG 6
......
......@@ -81,6 +81,12 @@ typedef struct {
StgDouble rD4;
StgDouble rD5;
StgDouble rD6;
StgWord128 rXMM1;
StgWord128 rXMM2;
StgWord128 rXMM3;
StgWord128 rXMM4;
StgWord128 rXMM5;
StgWord128 rXMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
......@@ -270,6 +276,42 @@ GLOBAL_REG_DECL(StgDouble,D6,REG_D6)
#define D6 (BaseReg->rD6)
#endif
#if defined(REG_XMM1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord128,XMM1,REG_XMM1)
#else
#define XMM1 (BaseReg->rXMM1)
#endif
#if defined(REG_XMM2) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord128,XMM2,REG_XMM2)
#else
#define XMM2 (BaseReg->rXMM2)
#endif
#if defined(REG_XMM3) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord128,XMM3,REG_XMM3)
#else
#define XMM3 (BaseReg->rXMM3)
#endif
#if defined(REG_XMM4) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord128,XMM4,REG_XMM4)
#else
#define XMM4 (BaseReg->rXMM4)
#endif
#if defined(REG_XMM5) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord128,XMM5,REG_XMM5)
#else
#define XMM5 (BaseReg->rXMM5)
#endif
#if defined(REG_XMM6) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6)
#else
#define XMM6 (BaseReg->rXMM6)
#endif
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else
......
......@@ -83,6 +83,8 @@ typedef unsigned long long int StgWord64;
#error cannot find a way to define StgInt64
#endif
typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
/*
* Define the standard word size we'll use on this machine: make it
* big enough to hold a pointer.
......
......@@ -307,6 +307,12 @@ wanteds = concat
,fieldOffset Both "StgRegTable" "rD4"
,fieldOffset Both "StgRegTable" "rD5"
,fieldOffset Both "StgRegTable" "rD6"
,fieldOffset Both "StgRegTable" "rXMM1"
,fieldOffset Both "StgRegTable" "rXMM2"
,fieldOffset Both "StgRegTable" "rXMM3"
,fieldOffset Both "StgRegTable" "rXMM4"
,fieldOffset Both "StgRegTable" "rXMM5"
,fieldOffset Both "StgRegTable" "rXMM6"
,fieldOffset Both "StgRegTable" "rL1"
,fieldOffset Both "StgRegTable" "rSp"
,fieldOffset Both "StgRegTable" "rSpLim"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment