Skip to content
Snippets Groups Projects
Commit acd79558 authored by Abhiroop Sarkar's avatar Abhiroop Sarkar Committed by Marge Bot
Browse files

Add support for SIMD operations in the NCG


This adds support for constructing vector types from Float#, Double# etc
and performing arithmetic operations on them

Cleaned-Up-By: default avatarBen Gamari <ben@well-typed.com>
parent df3e5b74
No related branches found
No related tags found
No related merge requests found
Showing
with 300 additions and 124 deletions
......@@ -64,13 +64,20 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float
| otherwise = int
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss))
| passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
(W256, (vs, fs, ds, ls, s:ss))
| passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
(W512, (vs, fs, ds, ls, s:ss))
| passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
where vec = case regs of
(vs, fs, ds, ls, s:ss)
| passVectorInReg w dflags
-> let elt_ty = vecElemType ty
reg_ty = if isFloatType elt_ty
then Float else Integer
reg_class = case w of
W128 -> XmmReg
W256 -> YmmReg
W512 -> ZmmReg
_ -> panic "CmmCallConv.assignArgumentsPos: Invalid vector width"
in k (RegisterParam
(reg_class s (vecLength ty) (typeWidth elt_ty) reg_ty),
(vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss))
......@@ -89,6 +96,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
(_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
-> k (RegisterParam l, (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
ty = arg_ty r
w = typeWidth ty
......@@ -202,11 +210,13 @@ nodeOnly = ([VanillaReg 1], [], [], [], [])
-- only use this functionality in hand-written C-- code in the RTS.
realArgRegsCover :: DynFlags -> [GlobalReg]
realArgRegsCover dflags
| passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
realLongRegs dflags ++
map XmmReg (realXmmRegNos dflags)
| otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
realFloatRegs dflags ++
realDoubleRegs dflags ++
realLongRegs dflags ++
map XmmReg (realXmmRegNos dflags)
| passFloatArgsInXmm dflags
= map ($VGcPtr) (realVanillaRegs dflags) ++
realLongRegs dflags ++
map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags)
| otherwise
= map ($VGcPtr) (realVanillaRegs dflags) ++
realFloatRegs dflags ++
realDoubleRegs dflags ++
realLongRegs dflags ++
map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags)
......@@ -14,6 +14,7 @@ module CmmExpr
, currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
, node, baseReg
, VGcPtr(..)
, GlobalVecRegTy(..)
, DefinerOfRegs, UserOfRegs
, foldRegsDefd, foldRegsUsed
......@@ -41,6 +42,7 @@ import Outputable (panic)
import Unique
import Data.Set (Set)
import Data.Monoid ((<>))
import qualified Data.Set as Set
import BasicTypes (Alignment, mkAlignment, alignmentOf)
......@@ -392,6 +394,7 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
-----------------------------------------------------------------------------
{-
Note [Overlapping global registers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The backend might not faithfully implement the abstraction of the STG
machine with independent registers for different values of type
......@@ -413,6 +416,26 @@ on a particular platform. The instance Eq GlobalReg is syntactic
equality of STG registers and does not take overlap into
account. However it is still used in UserOfRegs/DefinerOfRegs and
there are likely still bugs there, beware!
Note [SIMD registers]
~~~~~~~~~~~~~~~~~~~~~
GHC's treatment of SIMD registers is heavily modelled after the x86_64
architecture. Namely we have 128- (XMM), 256- (YMM), and 512-bit (ZMM)
registers. Furthermore, we treat each possible format in these registers as a
distinct register which overlaps with the others. For instance, we XMM1 as a
2xI64 register is distinct from but overlaps with (in the sense defined in Note
[Overlapping global registers]) its use as a 4xI32 register.
This model makes it easier to fit SIMD registers into the NCG, which generally
expects that each global register has a single, known CmmType.
In the future we could consider further refactoring this to eliminate the
XMM, YMM, and ZMM register names (which are quite x86-specific) and instead just
having a set of NxM-bit vector registers (e.g. Vec2x64A, Vec2x64B, ...,
Vec4x32A, ..., Vec4x64A).
-}
data GlobalReg
......@@ -432,12 +455,15 @@ data GlobalReg
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
!Length !Width !GlobalVecRegTy
| YmmReg -- 256-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
!Length !Width !GlobalVecRegTy
| ZmmReg -- 512-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
!Length !Width !GlobalVecRegTy
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
......@@ -478,17 +504,17 @@ data GlobalReg
deriving( Show )
data GlobalVecRegTy = Integer | Float
deriving (Show, Eq, Ord)
instance Eq GlobalReg where
VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
-- NOTE: XMM, YMM, ZMM registers actually are the same registers
-- at least with respect to store at YMM i and then read from XMM i
-- and similarly for ZMM etc.
XmmReg i == XmmReg j = i==j
YmmReg i == YmmReg j = i==j
ZmmReg i == ZmmReg j = i==j
XmmReg i l w grt == XmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
YmmReg i l w grt == YmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
ZmmReg i l w grt == ZmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
......@@ -512,9 +538,21 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
compare (YmmReg i) (YmmReg j) = compare i j
compare (ZmmReg i) (ZmmReg j) = compare i j
compare (XmmReg i l w grt)
(XmmReg j l' w' grt') = compare i j
<> compare l l'
<> compare w w'
<> compare grt grt'
compare (YmmReg i l w grt)
(YmmReg j l' w' grt') = compare i j
<> compare l l'
<> compare w w'
<> compare grt grt'
compare (ZmmReg i l w grt)
(ZmmReg j l' w' grt') = compare i j
<> compare l l'
<> compare w w'
<> compare grt grt'
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
......@@ -538,12 +576,12 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT
compare _ (LongReg _) = GT
compare (XmmReg _) _ = LT
compare _ (XmmReg _) = GT
compare (YmmReg _) _ = LT
compare _ (YmmReg _) = GT
compare (ZmmReg _) _ = LT
compare _ (ZmmReg _) = GT
compare (XmmReg _ _ _ _) _ = LT
compare _ (XmmReg _ _ _ _) = GT
compare (YmmReg _ _ _ _) _ = LT
compare _ (YmmReg _ _ _ _) = GT
compare (ZmmReg _ _ _ _) _ = LT
compare _ (ZmmReg _ _ _ _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
......@@ -596,12 +634,15 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
-- TODO: improve the internal model of SIMD/vectorized registers
-- the right design SHOULd improve handling of float and double code too.
-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
globalRegType _ (XmmReg _ l w ty) = case ty of
Integer -> cmmVec l (cmmBits w)
Float -> cmmVec l (cmmFloat w)
globalRegType _ (YmmReg _ l w ty) = case ty of
Integer -> cmmVec l (cmmBits w)
Float -> cmmVec l (cmmFloat w)
globalRegType _ (ZmmReg _ l w ty) = case ty of
Integer -> cmmVec l (cmmBits w)
Float -> cmmVec l (cmmFloat w)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
......
......@@ -148,9 +148,13 @@ lintCmmMiddle node = case node of
dflags <- getDynFlags
erep <- lintCmmExpr expr
let reg_ty = cmmRegType dflags reg
if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
then return ()
else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
case isVecCatType reg_ty of
True -> if ((typeWidth reg_ty) == (typeWidth erep))
then return ()
else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
_ -> if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
then return ()
else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
CmmStore l r -> do
_ <- lintCmmExpr l
......
......@@ -136,8 +136,9 @@ data MachOp
| MO_VU_Rem Length Width
-- Floting point vector element insertion and extraction operations
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
| MO_VF_Broadcast Length Width -- Broadcast a scalar into a vector
| MO_VF_Insert Length Width -- Insert scalar into vector
| MO_VF_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations
| MO_VF_Add Length Width
......@@ -430,6 +431,7 @@ machOpResultType dflags mop tys =
MO_VU_Quot l w -> cmmVec l (cmmBits w)
MO_VU_Rem l w -> cmmVec l (cmmBits w)
MO_VF_Broadcast l w -> cmmVec l (cmmFloat w)
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
......@@ -522,16 +524,21 @@ machOpArgReps dflags op =
MO_VU_Quot _ r -> [r,r]
MO_VU_Rem _ r -> [r,r]
MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
-- offset is always W32 as mentioned in StgCmmPrim.hs
MO_VF_Broadcast l r -> [vecwidth l r, r]
MO_VF_Insert l r -> [vecwidth l r, r, W32]
MO_VF_Extract l r -> [vecwidth l r, W32]
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
MO_VF_Quot _ r -> [r,r]
MO_VF_Neg _ r -> [r]
-- NOTE: The below is owing to the fact that floats use the SSE registers
MO_VF_Add l w -> [vecwidth l w, vecwidth l w]
MO_VF_Sub l w -> [vecwidth l w, vecwidth l w]
MO_VF_Mul l w -> [vecwidth l w, vecwidth l w]
MO_VF_Quot l w -> [vecwidth l w, vecwidth l w]
MO_VF_Neg l w -> [vecwidth l w]
MO_AlignmentCheck _ r -> [r]
where
vecwidth l w = widthFromBytes (l*widthInBytes w)
-----------------------------------------------------------------------------
-- CallishMachOp
......
......@@ -6,6 +6,7 @@ module CmmType
, typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
, isFloatType, isGcPtrType, isBitsType
, isWord32, isWord64, isFloat64, isFloat32
, isVecCatType
, Width(..)
, widthInBits, widthInBytes, widthInLog, widthFromBytes
......@@ -133,7 +134,7 @@ cInt :: DynFlags -> CmmType
cInt dflags = cmmBits (cIntWidth dflags)
------------ Predicates ----------------
isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool
isFloatType, isGcPtrType, isBitsType, isVecCatType :: CmmType -> Bool
isFloatType (CmmType FloatCat _) = True
isFloatType _other = False
......@@ -143,6 +144,9 @@ isGcPtrType _other = False
isBitsType (CmmType BitsCat _) = True
isBitsType _ = False
isVecCatType (CmmType (VecCat _ _) _) = True
isVecCatType _other = False
isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
-- isFloat32 and 64 are obvious
......
......@@ -713,6 +713,10 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
++ " should have been handled earlier!")
MO_VF_Broadcast {} -> pprTrace "offending mop:"
(text "MO_VF_Broadcast")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Broadcast"
++ " should have been handled earlier!")
MO_VF_Insert {} -> pprTrace "offending mop:"
(text "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
......
......@@ -261,9 +261,9 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
XmmReg n -> text "XMM" <> int n
YmmReg n -> text "YMM" <> int n
ZmmReg n -> text "ZMM" <> int n
XmmReg n _ _ _ -> text "XMM" <> int n
YmmReg n _ _ _ -> text "YMM" <> int n
ZmmReg n _ _ _ -> text "ZMM" <> int n
Sp -> text "Sp"
SpLim -> text "SpLim"
Hp -> text "Hp"
......
......@@ -57,27 +57,27 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags
baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags
baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags
baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
baseRegOffset dflags (XmmReg 1 _ _ _) = oFFSET_StgRegTable_rXMM1 dflags
baseRegOffset dflags (XmmReg 2 _ _ _) = oFFSET_StgRegTable_rXMM2 dflags
baseRegOffset dflags (XmmReg 3 _ _ _) = oFFSET_StgRegTable_rXMM3 dflags
baseRegOffset dflags (XmmReg 4 _ _ _) = oFFSET_StgRegTable_rXMM4 dflags
baseRegOffset dflags (XmmReg 5 _ _ _) = oFFSET_StgRegTable_rXMM5 dflags
baseRegOffset dflags (XmmReg 6 _ _ _) = oFFSET_StgRegTable_rXMM6 dflags
baseRegOffset _ (XmmReg n _ _ _) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags (YmmReg 1 _ _ _) = oFFSET_StgRegTable_rYMM1 dflags
baseRegOffset dflags (YmmReg 2 _ _ _) = oFFSET_StgRegTable_rYMM2 dflags
baseRegOffset dflags (YmmReg 3 _ _ _) = oFFSET_StgRegTable_rYMM3 dflags
baseRegOffset dflags (YmmReg 4 _ _ _) = oFFSET_StgRegTable_rYMM4 dflags
baseRegOffset dflags (YmmReg 5 _ _ _) = oFFSET_StgRegTable_rYMM5 dflags
baseRegOffset dflags (YmmReg 6 _ _ _) = oFFSET_StgRegTable_rYMM6 dflags
baseRegOffset _ (YmmReg n _ _ _) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
baseRegOffset dflags (ZmmReg 1 _ _ _) = oFFSET_StgRegTable_rZMM1 dflags
baseRegOffset dflags (ZmmReg 2 _ _ _) = oFFSET_StgRegTable_rZMM2 dflags
baseRegOffset dflags (ZmmReg 3 _ _ _) = oFFSET_StgRegTable_rZMM3 dflags
baseRegOffset dflags (ZmmReg 4 _ _ _) = oFFSET_StgRegTable_rZMM4 dflags
baseRegOffset dflags (ZmmReg 5 _ _ _) = oFFSET_StgRegTable_rZMM5 dflags
baseRegOffset dflags (ZmmReg 6 _ _ _) = oFFSET_StgRegTable_rZMM6 dflags
baseRegOffset _ (ZmmReg n _ _ _) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
......
......@@ -669,7 +669,7 @@ emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
-- SIMD primops
emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
checkVecCompatibility dflags vcat n w
doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
doVecBroadcastOp (vecElemInjectCast dflags vcat w) ty zeros e res
where
zeros :: CmmExpr
zeros = CmmLit $ CmmVec (replicate n zero)
......@@ -1765,9 +1765,8 @@ vecElemProjectCast _ _ _ = Nothing
checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
checkVecCompatibility dflags vcat l w = do
when (hscTarget dflags /= HscLlvm) $ do
sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
,"Please use -fllvm."]
when (hscTarget dflags /= HscLlvm && hscTarget dflags /= HscAsm) $ do
sorry "SIMD vector instructions not supported for the C backend or GHCi"
check vecWidth vcat l w
where
check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
......@@ -1792,6 +1791,38 @@ checkVecCompatibility dflags vcat l w = do
------------------------------------------------------------------------------
-- Helpers for translating vector packing and unpacking.
doVecBroadcastOp :: Maybe MachOp -- Cast from element to vector component
-> CmmType -- Type of vector
-> CmmExpr -- Initial vector
-> CmmExpr -- Elements
-> CmmFormal -- Destination for result
-> FCode ()
doVecBroadcastOp maybe_pre_write_cast ty z es res = do
dst <- newTemp ty
emitAssign (CmmLocal dst) z
vecBroadcast dst es 0
where
vecBroadcast :: CmmFormal -> CmmExpr -> Int -> FCode ()
vecBroadcast src e _ = do
dst <- newTemp ty
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Broadcast len wid)
[CmmReg (CmmLocal src), cast e])
--TODO : Add the MachOp MO_V_Broadcast
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e])
emitAssign (CmmLocal res) (CmmReg (CmmLocal dst))
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of
Nothing -> val
Just cast -> CmmMachOp cast [val]
len :: Length
len = vecLength ty
wid :: Width
wid = typeWidth (vecElemType ty)
doVecPackOp :: Maybe MachOp -- Cast from element to vector component
-> CmmType -- Type of vector
......@@ -1809,16 +1840,16 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
vecPack src (e : es) i = do
dst <- newTemp ty
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1)
dst <- newTemp ty
if isFloatType (vecElemType ty)
then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
[CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
iLit = CmmLit (CmmInt (toInteger i) W32)
iLit = CmmLit (CmmInt ((toInteger i) * 16) W32)
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of
......
......@@ -152,12 +152,12 @@ llvmFunArgs dflags live =
where platform = targetPlatform dflags
isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
isPassed r = not (isSSE r) || isLive r
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE _ = False
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _ _ _ _ ) = True
isSSE (YmmReg _ _ _ _ ) = True
isSSE (ZmmReg _ _ _ _ ) = True
isSSE _ = False
-- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr]
......
......@@ -1287,6 +1287,7 @@ genMachOp _ op [x] = case op of
MO_VU_Quot _ _ -> panicOp
MO_VU_Rem _ _ -> panicOp
MO_VF_Broadcast _ _ -> panicOp
MO_VF_Insert _ _ -> panicOp
MO_VF_Extract _ _ -> panicOp
......@@ -1483,6 +1484,7 @@ genMachOp_slow opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp
MO_VF_Broadcast {} -> panicOp
MO_VF_Insert {} -> panicOp
MO_VF_Extract {} -> panicOp
......@@ -1844,9 +1846,9 @@ funEpilogue live = do
let liveRegs = alwaysLive ++ live
isSSE (FloatReg _) = True
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE (XmmReg _ _ _ _) = True
isSSE (YmmReg _ _ _ _) = True
isSSE (ZmmReg _ _ _ _) = True
isSSE _ = False
-- Set to value or "undef" depending on whether the register is
......
......@@ -60,24 +60,24 @@ lmGlobalReg dflags suf reg
DoubleReg 4 -> doubleGlobal $ "D4" ++ suf
DoubleReg 5 -> doubleGlobal $ "D5" ++ suf
DoubleReg 6 -> doubleGlobal $ "D6" ++ suf
XmmReg 1 -> xmmGlobal $ "XMM1" ++ suf
XmmReg 2 -> xmmGlobal $ "XMM2" ++ suf
XmmReg 3 -> xmmGlobal $ "XMM3" ++ suf
XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
YmmReg 1 -> ymmGlobal $ "YMM1" ++ suf
YmmReg 2 -> ymmGlobal $ "YMM2" ++ suf
YmmReg 3 -> ymmGlobal $ "YMM3" ++ suf
YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf
YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf
YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf
ZmmReg 1 -> zmmGlobal $ "ZMM1" ++ suf
ZmmReg 2 -> zmmGlobal $ "ZMM2" ++ suf
ZmmReg 3 -> zmmGlobal $ "ZMM3" ++ suf
ZmmReg 4 -> zmmGlobal $ "ZMM4" ++ suf
ZmmReg 5 -> zmmGlobal $ "ZMM5" ++ suf
ZmmReg 6 -> zmmGlobal $ "ZMM6" ++ suf
XmmReg 1 _ _ _ -> xmmGlobal $ "XMM1" ++ suf
XmmReg 2 _ _ _ -> xmmGlobal $ "XMM2" ++ suf
XmmReg 3 _ _ _ -> xmmGlobal $ "XMM3" ++ suf
XmmReg 4 _ _ _ -> xmmGlobal $ "XMM4" ++ suf
XmmReg 5 _ _ _ -> xmmGlobal $ "XMM5" ++ suf
XmmReg 6 _ _ _ -> xmmGlobal $ "XMM6" ++ suf
YmmReg 1 _ _ _ -> ymmGlobal $ "YMM1" ++ suf
YmmReg 2 _ _ _ -> ymmGlobal $ "YMM2" ++ suf
YmmReg 3 _ _ _ -> ymmGlobal $ "YMM3" ++ suf
YmmReg 4 _ _ _ -> ymmGlobal $ "YMM4" ++ suf
YmmReg 5 _ _ _ -> ymmGlobal $ "YMM5" ++ suf
YmmReg 6 _ _ _ -> ymmGlobal $ "YMM6" ++ suf
ZmmReg 1 _ _ _ -> zmmGlobal $ "ZMM1" ++ suf
ZmmReg 2 _ _ _ -> zmmGlobal $ "ZMM2" ++ suf
ZmmReg 3 _ _ _ -> zmmGlobal $ "ZMM3" ++ suf
ZmmReg 4 _ _ _ -> zmmGlobal $ "ZMM4" ++ suf
ZmmReg 5 _ _ _ -> zmmGlobal $ "ZMM5" ++ suf
ZmmReg 6 _ _ _ -> zmmGlobal $ "ZMM6" ++ suf
MachSp -> wordGlobal $ "MachSp" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
......
......@@ -219,6 +219,7 @@ module DynFlags (
-- * SSE and AVX
isSseEnabled,
isSse2Enabled,
isSse4_1Enabled,
isSse4_2Enabled,
isBmiEnabled,
isBmi2Enabled,
......@@ -5908,6 +5909,8 @@ isSse2Enabled dflags = case platformArch (targetPlatform dflags) of
ArchX86 -> True
_ -> False
isSse4_1Enabled :: DynFlags -> Bool
isSse4_1Enabled dflags = sseVersion dflags >= Just SSE4
isSse4_2Enabled :: DynFlags -> Bool
isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42
......
......@@ -10,9 +10,11 @@
--
module Format (
Format(..),
ScalarFormat(..),
intFormat,
floatFormat,
isFloatFormat,
isVecFormat,
cmmTypeFormat,
formatToWidth,
formatInBytes
......@@ -25,6 +27,29 @@ import GhcPrelude
import Cmm
import Outputable
-- Note [GHC's data format representations]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- GHC has severals types that represent various aspects of data format.
-- These include:
--
-- * 'CmmType.CmmType': The data classification used throughout the C--
-- pipeline. This is a pair of a CmmCat and a Width.
--
-- * 'CmmType.CmmCat': What the bits in a C-- value mean (e.g. a pointer, integer, or floating-point value)
--
-- * 'CmmType.Width': The width of a C-- value.
--
-- * 'CmmType.Length': The width (measured in number of scalars) of a vector value.
--
-- * 'Format.Format': The data format representation used by much of the backend.
--
-- * 'Format.ScalarFormat': The format of a 'Format.VecFormat'\'s scalar.
--
-- * 'RegClass.RegClass': Whether a register is an integer, float-point, or vector register
--
-- It looks very like the old MachRep, but it's now of purely local
-- significance, here in the native code generator. You can change it
-- without global consequences.
......@@ -47,8 +72,16 @@ data Format
| II64
| FF32
| FF64
| VecFormat !Length !ScalarFormat !Width
deriving (Show, Eq)
data ScalarFormat = FmtInt8
| FmtInt16
| FmtInt32
| FmtInt64
| FmtFloat
| FmtDouble
deriving (Show, Eq)
-- | Get the integer format of this width.
intFormat :: Width -> Format
......@@ -81,13 +114,33 @@ isFloatFormat format
FF64 -> True
_ -> False
-- | Check if a format represents a vector
isVecFormat :: Format -> Bool
isVecFormat (VecFormat {}) = True
isVecFormat _ = False
-- | Convert a Cmm type to a Format.
cmmTypeFormat :: CmmType -> Format
cmmTypeFormat ty
| isFloatType ty = floatFormat (typeWidth ty)
| isVecType ty = vecFormat ty
| otherwise = intFormat (typeWidth ty)
vecFormat :: CmmType -> Format
vecFormat ty =
let l = vecLength ty
elemTy = vecElemType ty
in if isFloatType elemTy
then case typeWidth elemTy of
W32 -> VecFormat l FmtFloat W32
W64 -> VecFormat l FmtDouble W64
_ -> pprPanic "Incorrect vector element width" (ppr elemTy)
else case typeWidth elemTy of
W8 -> VecFormat l FmtInt8 W8
W16 -> VecFormat l FmtInt16 W16
W32 -> VecFormat l FmtInt32 W32
W64 -> VecFormat l FmtInt64 W64
_ -> pprPanic "Incorrect vector element width" (ppr elemTy)
-- | Get the Width of a Format.
formatToWidth :: Format -> Width
......@@ -99,7 +152,7 @@ formatToWidth format
II64 -> W64
FF32 -> W32
FF64 -> W64
VecFormat l _ w -> widthFromBytes (l*widthInBytes w)
formatInBytes :: Format -> Int
formatInBytes = widthInBytes . formatToWidth
......@@ -250,7 +250,6 @@ getNewRegNat rep
dflags <- getDynFlags
return (RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep)
getNewRegPairNat :: Format -> NatM (Reg,Reg)
getNewRegPairNat rep
= do u <- getUniqueNat
......
......@@ -1909,6 +1909,8 @@ genCCall' dflags gcp target dest_regs args
FF32 -> (1, 1, 4, fprs)
FF64 -> (2, 1, 8, fprs)
II64 -> panic "genCCall' passArguments II64"
VecFormat {}
-> panic "genCCall' passArguments vector format"
GCP32ELF ->
case cmmTypeFormat rep of
......@@ -1919,6 +1921,8 @@ genCCall' dflags gcp target dest_regs args
FF32 -> (0, 1, 4, fprs)
FF64 -> (0, 1, 8, fprs)
II64 -> panic "genCCall' passArguments II64"
VecFormat {}
-> panic "genCCall' passArguments vector format"
GCP64ELF _ ->
case cmmTypeFormat rep of
II8 -> (1, 0, 8, gprs)
......@@ -1930,6 +1934,8 @@ genCCall' dflags gcp target dest_regs args
-- the FPRs.
FF32 -> (1, 1, 8, fprs)
FF64 -> (1, 1, 8, fprs)
VecFormat {}
-> panic "genCCall' passArguments vector format"
moveResult reduceToFF32 =
case dest_regs of
......
......@@ -29,7 +29,7 @@ import BlockId
import CLabel
import PprCmmExpr ()
import Unique ( pprUniqueAlways, getUnique )
import Unique ( getUnique )
import GHC.Platform
import FastString
import Outputable
......@@ -168,10 +168,7 @@ pprReg r
= case r of
RegReal (RealRegSingle i) -> ppr_reg_no i
RegReal (RealRegPair{}) -> panic "PPC.pprReg: no reg pairs on this arch"
RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
RegVirtual v -> ppr v
where
ppr_reg_no :: Int -> SDoc
......@@ -190,7 +187,8 @@ pprFormat x
II32 -> sLit "w"
II64 -> sLit "d"
FF32 -> sLit "fs"
FF64 -> sLit "fd")
FF64 -> sLit "fd"
VecFormat _ _ _ -> panic "PPC.Ppr.pprFormat: VecFormat")
pprCond :: Cond -> SDoc
......@@ -375,6 +373,7 @@ pprInstr (LD fmt reg addr) = hcat [
II64 -> sLit "d"
FF32 -> sLit "fs"
FF64 -> sLit "fd"
VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat"
),
case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x',
......@@ -414,6 +413,7 @@ pprInstr (LA fmt reg addr) = hcat [
II64 -> sLit "d"
FF32 -> sLit "fs"
FF64 -> sLit "fd"
VecFormat _ _ _ -> panic "PPC.Ppr.pprInstr: VecFormat"
),
case addr of AddrRegImm _ _ -> empty
AddrRegReg _ _ -> char 'x',
......
......@@ -56,6 +56,7 @@ data VirtualReg
| VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
| VirtualRegF {-# UNPACK #-} !Unique
| VirtualRegD {-# UNPACK #-} !Unique
| VirtualRegVec {-# UNPACK #-} !Unique
deriving (Eq, Show)
......@@ -69,6 +70,7 @@ instance Ord VirtualReg where
compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
compare (VirtualRegVec a) (VirtualRegVec b) = nonDetCmpUnique a b
compare VirtualRegI{} _ = LT
compare _ VirtualRegI{} = GT
......@@ -76,7 +78,8 @@ instance Ord VirtualReg where
compare _ VirtualRegHi{} = GT
compare VirtualRegF{} _ = LT
compare _ VirtualRegF{} = GT
compare VirtualRegVec{} _ = LT
compare _ VirtualRegVec{} = GT
instance Uniquable VirtualReg where
......@@ -86,6 +89,7 @@ instance Uniquable VirtualReg where
VirtualRegHi u -> u
VirtualRegF u -> u
VirtualRegD u -> u
VirtualRegVec u -> u
instance Outputable VirtualReg where
ppr reg
......@@ -95,8 +99,9 @@ instance Outputable VirtualReg where
-- this code is kinda wrong on x86
-- because float and double occupy the same register set
-- namely SSE2 register xmm0 .. xmm15
VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
VirtualRegVec u -> text "%vVec_" <> pprUniqueAlways u
......@@ -107,6 +112,7 @@ renameVirtualReg u r
VirtualRegHi _ -> VirtualRegHi u
VirtualRegF _ -> VirtualRegF u
VirtualRegD _ -> VirtualRegD u
VirtualRegVec _ -> VirtualRegVec u
classOfVirtualReg :: VirtualReg -> RegClass
......@@ -116,6 +122,8 @@ classOfVirtualReg vr
VirtualRegHi{} -> RcInteger
VirtualRegF{} -> RcFloat
VirtualRegD{} -> RcDouble
-- Below is an awful, largely x86-specific hack
VirtualRegVec{} -> RcDouble
......
......@@ -195,7 +195,6 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu
-- Specification Code ----------------------------------------------------------
--
-- The trivColorable function for each particular architecture should
......
......@@ -2,6 +2,7 @@
module RegAlloc.Linear.FreeRegs (
FR(..),
allFreeRegs,
maxSpillSlots
)
......@@ -69,6 +70,10 @@ instance FR SPARC.FreeRegs where
frInitFreeRegs = SPARC.initFreeRegs
frReleaseReg = SPARC.releaseReg
-- | For debugging output.
allFreeRegs :: FR freeRegs => Platform -> freeRegs -> [RealReg]
allFreeRegs plat fr = foldMap (\rcls -> frGetFreeRegs plat rcls fr) allRegClasses
maxSpillSlots :: DynFlags -> Int
maxSpillSlots dflags
= case platformArch (targetPlatform dflags) of
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment