Pass 512-bit-wide vectors in registers.

parent 26a960c6
......@@ -68,6 +68,7 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
(W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
(W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss))
......
......@@ -346,6 +346,9 @@ data GlobalReg
| YmmReg -- 256-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
| ZmmReg -- 512-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
......@@ -383,6 +386,7 @@ instance Eq GlobalReg where
LongReg i == LongReg j = i==j
XmmReg i == XmmReg j = i==j
YmmReg i == YmmReg j = i==j
ZmmReg i == ZmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
......@@ -406,6 +410,7 @@ instance Ord GlobalReg where
compare (LongReg i) (LongReg j) = compare i j
compare (XmmReg i) (XmmReg j) = compare i j
compare (YmmReg i) (YmmReg j) = compare i j
compare (ZmmReg i) (ZmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
......@@ -431,6 +436,8 @@ instance Ord GlobalReg where
compare _ (XmmReg _) = GT
compare (YmmReg _) _ = LT
compare _ (YmmReg _) = GT
compare (ZmmReg _) _ = LT
compare _ (ZmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
......@@ -475,6 +482,7 @@ globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
......@@ -488,4 +496,5 @@ isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
isArgReg (XmmReg {}) = True
isArgReg (YmmReg {}) = True
isArgReg (ZmmReg {}) = True
isArgReg _ = False
......@@ -257,6 +257,7 @@ pprGlobalReg gr
LongReg n -> char 'L' <> int n
XmmReg n -> ptext (sLit "XMM") <> int n
YmmReg n -> ptext (sLit "YMM") <> int n
ZmmReg n -> ptext (sLit "ZMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
......
......@@ -63,6 +63,13 @@ baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
......
......@@ -156,6 +156,7 @@ llvmFunArgs dflags live =
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE _ = False
-- | Llvm standard fun attributes
......
......@@ -1534,6 +1534,7 @@ funEpilogue live = do
isSSE (DoubleReg _) = True
isSSE (XmmReg _) = True
isSSE (YmmReg _) = True
isSSE (ZmmReg _) = True
isSSE _ = False
-- Set to value or "undef" depending on whether the register is
......
......@@ -68,6 +68,12 @@ lmGlobalReg dflags suf reg
YmmReg 4 -> ymmGlobal $ "YMM4" ++ suf
YmmReg 5 -> ymmGlobal $ "YMM5" ++ suf
YmmReg 6 -> ymmGlobal $ "YMM6" ++ suf
ZmmReg 1 -> zmmGlobal $ "ZMM1" ++ suf
ZmmReg 2 -> zmmGlobal $ "ZMM2" ++ suf
ZmmReg 3 -> zmmGlobal $ "ZMM3" ++ suf
ZmmReg 4 -> zmmGlobal $ "ZMM4" ++ suf
ZmmReg 5 -> zmmGlobal $ "ZMM5" ++ suf
ZmmReg 6 -> zmmGlobal $ "ZMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
......@@ -79,6 +85,7 @@ lmGlobalReg dflags suf reg
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
ymmGlobal name = LMNLocalVar (fsLit name) (LMVector 8 (LMInt 32))
zmmGlobal name = LMNLocalVar (fsLit name) (LMVector 16 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
......
......@@ -82,6 +82,23 @@ import Reg
# define ymm14 54
# define ymm15 55
# define zmm0 56
# define zmm1 57
# define zmm2 58
# define zmm3 59
# define zmm4 60
# define zmm5 61
# define zmm6 62
# define zmm7 63
# define zmm8 64
# define zmm9 65
# define zmm10 66
# define zmm11 67
# define zmm12 68
# define zmm13 69
# define zmm14 70
# define zmm15 71
#elif MACHREGS_powerpc
# define r0 0
......@@ -411,6 +428,9 @@ activeStgRegs = [
#ifdef REG_YMM1
,YmmReg 1
#endif
#ifdef REG_ZMM1
,ZmmReg 1
#endif
#ifdef REG_F2
,FloatReg 2
#endif
......@@ -423,6 +443,9 @@ activeStgRegs = [
#ifdef REG_YMM2
,YmmReg 2
#endif
#ifdef REG_ZMM2
,ZmmReg 2
#endif
#ifdef REG_F3
,FloatReg 3
#endif
......@@ -435,6 +458,9 @@ activeStgRegs = [
#ifdef REG_YMM3
,YmmReg 3
#endif
#ifdef REG_ZMM3
,ZmmReg 3
#endif
#ifdef REG_F4
,FloatReg 4
#endif
......@@ -447,6 +473,9 @@ activeStgRegs = [
#ifdef REG_YMM4
,YmmReg 4
#endif
#ifdef REG_ZMM4
,ZmmReg 4
#endif
#ifdef REG_F5
,FloatReg 5
#endif
......@@ -459,6 +488,9 @@ activeStgRegs = [
#ifdef REG_YMM5
,YmmReg 5
#endif
#ifdef REG_ZMM5
,ZmmReg 5
#endif
#ifdef REG_F6
,FloatReg 6
#endif
......@@ -471,6 +503,9 @@ activeStgRegs = [
#ifdef REG_YMM6
,YmmReg 6
#endif
#ifdef REG_ZMM6
,ZmmReg 6
#endif
#else /* MAX_REAL_XMM_REG == 0 */
#ifdef REG_F1
,FloatReg 1
......@@ -662,6 +697,26 @@ globalRegMaybe (YmmReg 5) = Just (RealRegSingle REG_YMM5)
globalRegMaybe (YmmReg 6) = Just (RealRegSingle REG_YMM6)
# endif
# endif
# if MAX_REAL_ZMM_REG != 0
# ifdef REG_ZMM1
globalRegMaybe (ZmmReg 1) = Just (RealRegSingle REG_ZMM1)
# endif
# ifdef REG_ZMM2
globalRegMaybe (ZmmReg 2) = Just (RealRegSingle REG_ZMM2)
# endif
# ifdef REG_ZMM3
globalRegMaybe (ZmmReg 3) = Just (RealRegSingle REG_ZMM3)
# endif
# ifdef REG_ZMM4
globalRegMaybe (ZmmReg 4) = Just (RealRegSingle REG_ZMM4)
# endif
# ifdef REG_ZMM5
globalRegMaybe (ZmmReg 5) = Just (RealRegSingle REG_ZMM5)
# endif
# ifdef REG_ZMM6
globalRegMaybe (ZmmReg 6) = Just (RealRegSingle REG_ZMM6)
# endif
# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
# endif
......
......@@ -113,12 +113,18 @@
#define REG_YMM3 ymm2
#define REG_YMM4 ymm3
#define REG_ZMM1 zmm0
#define REG_ZMM2 zmm1
#define REG_ZMM3 zmm2
#define REG_ZMM4 zmm3
#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
#define MAX_REAL_FLOAT_REG 0
#define MAX_REAL_DOUBLE_REG 0
#define MAX_REAL_LONG_REG 0
#define MAX_REAL_XMM_REG 4
#define MAX_REAL_YMM_REG 4
#define MAX_REAL_ZMM_REG 4
/* -----------------------------------------------------------------------------
The x86-64 register mapping
......@@ -192,6 +198,13 @@
#define REG_YMM5 ymm5
#define REG_YMM6 ymm6
#define REG_ZMM1 zmm1
#define REG_ZMM2 zmm2
#define REG_ZMM3 zmm3
#define REG_ZMM4 zmm4
#define REG_ZMM5 zmm5
#define REG_ZMM6 zmm6
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
#define CALLER_SAVES_R4
......@@ -235,12 +248,22 @@
#define CALLER_SAVES_YMM6
#endif
#define CALLER_SAVES_ZMM1
#define CALLER_SAVES_ZMM2
#define CALLER_SAVES_ZMM3
#define CALLER_SAVES_ZMM4
#define CALLER_SAVES_ZMM5
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_ZMM6
#endif
#define MAX_REAL_VANILLA_REG 6
#define MAX_REAL_FLOAT_REG 6
#define MAX_REAL_DOUBLE_REG 6
#define MAX_REAL_LONG_REG 0
#define MAX_REAL_XMM_REG 6
#define MAX_REAL_YMM_REG 6
#define MAX_REAL_ZMM_REG 6
/* -----------------------------------------------------------------------------
The PowerPC register mapping
......
......@@ -93,6 +93,12 @@ typedef struct {
StgWord256 rYMM4;
StgWord256 rYMM5;
StgWord256 rYMM6;
StgWord512 rZMM1;
StgWord512 rZMM2;
StgWord512 rZMM3;
StgWord512 rZMM4;
StgWord512 rZMM5;
StgWord512 rZMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
......@@ -354,6 +360,42 @@ GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6)
#define YMM6 (BaseReg->rYMM6)
#endif
#if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1)
#else
#define ZMM1 (BaseReg->rZMM1)
#endif
#if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2)
#else
#define ZMM2 (BaseReg->rZMM2)
#endif
#if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3)
#else
#define ZMM3 (BaseReg->rZMM3)
#endif
#if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4)
#else
#define ZMM4 (BaseReg->rZMM4)
#endif
#if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5)
#else
#define ZMM5 (BaseReg->rZMM5)
#endif
#if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6)
#else
#define ZMM6 (BaseReg->rZMM6)
#endif
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else
......
......@@ -87,6 +87,8 @@ typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
typedef struct { StgWord128 h; StgWord128 l; } StgWord256;
typedef struct { StgWord256 h; StgWord256 l; } StgWord512;
/*
* Define the standard word size we'll use on this machine: make it
* big enough to hold a pointer.
......
......@@ -319,6 +319,12 @@ wanteds = concat
,fieldOffset Both "StgRegTable" "rYMM4"
,fieldOffset Both "StgRegTable" "rYMM5"
,fieldOffset Both "StgRegTable" "rYMM6"
,fieldOffset Both "StgRegTable" "rZMM1"
,fieldOffset Both "StgRegTable" "rZMM2"
,fieldOffset Both "StgRegTable" "rZMM3"
,fieldOffset Both "StgRegTable" "rZMM4"
,fieldOffset Both "StgRegTable" "rZMM5"
,fieldOffset Both "StgRegTable" "rZMM6"
,fieldOffset Both "StgRegTable" "rL1"
,fieldOffset Both "StgRegTable" "rSp"
,fieldOffset Both "StgRegTable" "rSpLim"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment