Add support for 512-bit-wide vectors.

parent 49f4c12e
...@@ -48,6 +48,7 @@ data ArgRep = P -- GC Ptr ...@@ -48,6 +48,7 @@ data ArgRep = P -- GC Ptr
| D -- Double | D -- Double
| V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc. | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
| V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc. | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc.
| V64 -- 64-byte (512-bit) vectors of Float/Double/Int8/Word32/etc.
instance Outputable ArgRep where ppr = text . argRepString instance Outputable ArgRep where ppr = text . argRepString
argRepString :: ArgRep -> String argRepString :: ArgRep -> String
...@@ -59,6 +60,7 @@ argRepString F = "F" ...@@ -59,6 +60,7 @@ argRepString F = "F"
argRepString D = "D" argRepString D = "D"
argRepString V16 = "V16" argRepString V16 = "V16"
argRepString V32 = "V32" argRepString V32 = "V32"
argRepString V64 = "V64"
toArgRep :: PrimRep -> ArgRep toArgRep :: PrimRep -> ArgRep
toArgRep VoidRep = V toArgRep VoidRep = V
...@@ -73,6 +75,7 @@ toArgRep DoubleRep = D ...@@ -73,6 +75,7 @@ toArgRep DoubleRep = D
toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of
16 -> V16 16 -> V16
32 -> V32 32 -> V32
64 -> V64
_ -> error "toArgRep: bad vector primrep" _ -> error "toArgRep: bad vector primrep"
isNonV :: ArgRep -> Bool isNonV :: ArgRep -> Bool
...@@ -88,6 +91,7 @@ argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags ...@@ -88,6 +91,7 @@ argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
argRepSizeW _ V = 0 argRepSizeW _ V = 0
argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags
argRepSizeW dflags V64 = 64 `quot` wORD_SIZE dflags
idArgRep :: Id -> ArgRep idArgRep :: Id -> ArgRep
idArgRep = toArgRep . idPrimRep idArgRep = toArgRep . idPrimRep
...@@ -137,4 +141,5 @@ slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) ...@@ -137,4 +141,5 @@ slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1) slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1) slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1)
slowCallPattern (V64: _) = (fsLit "stg_ap_v64", 1)
slowCallPattern [] = (fsLit "stg_ap_0", 0) slowCallPattern [] = (fsLit "stg_ap_0", 0)
...@@ -386,6 +386,7 @@ stdPattern reps ...@@ -386,6 +386,7 @@ stdPattern reps
[L] -> Just ARG_L [L] -> Just ARG_L
[V16] -> Just ARG_V16 [V16] -> Just ARG_V16
[V32] -> Just ARG_V32 [V32] -> Just ARG_V32
[V64] -> Just ARG_V64
[N,N] -> Just ARG_NN [N,N] -> Just ARG_NN
[N,P] -> Just ARG_NP [N,P] -> Just ARG_NP
......
...@@ -463,6 +463,7 @@ push_alts F = bci_PUSH_ALTS_F ...@@ -463,6 +463,7 @@ push_alts F = bci_PUSH_ALTS_F
push_alts D = bci_PUSH_ALTS_D push_alts D = bci_PUSH_ALTS_D
push_alts V16 = error "push_alts: vector" push_alts V16 = error "push_alts: vector"
push_alts V32 = error "push_alts: vector" push_alts V32 = error "push_alts: vector"
push_alts V64 = error "push_alts: vector"
return_ubx :: ArgRep -> Word16 return_ubx :: ArgRep -> Word16
return_ubx V = bci_RETURN_V return_ubx V = bci_RETURN_V
...@@ -473,6 +474,7 @@ return_ubx F = bci_RETURN_F ...@@ -473,6 +474,7 @@ return_ubx F = bci_RETURN_F
return_ubx D = bci_RETURN_D return_ubx D = bci_RETURN_D
return_ubx V16 = error "return_ubx: vector" return_ubx V16 = error "return_ubx: vector"
return_ubx V32 = error "return_ubx: vector" return_ubx V32 = error "return_ubx: vector"
return_ubx V64 = error "return_ubx: vector"
-- Make lists of host-sized words for literals, so that when the -- Make lists of host-sized words for literals, so that when the
-- words are placed in memory at increasing addresses, the -- words are placed in memory at increasing addresses, the
......
...@@ -100,6 +100,7 @@ ...@@ -100,6 +100,7 @@
#define L_ bits64 #define L_ bits64
#define V16_ bits128 #define V16_ bits128
#define V32_ bits256 #define V32_ bits256
#define V64_ bits512
#define SIZEOF_StgDouble 8 #define SIZEOF_StgDouble 8
#define SIZEOF_StgWord64 8 #define SIZEOF_StgWord64 8
......
...@@ -35,22 +35,23 @@ ...@@ -35,22 +35,23 @@
#define ARG_L 8 #define ARG_L 8
#define ARG_V16 9 #define ARG_V16 9
#define ARG_V32 10 #define ARG_V32 10
#define ARG_NN 11 #define ARG_V64 11
#define ARG_NP 12 #define ARG_NN 12
#define ARG_PN 13 #define ARG_NP 13
#define ARG_PP 14 #define ARG_PN 14
#define ARG_NNN 15 #define ARG_PP 15
#define ARG_NNP 16 #define ARG_NNN 16
#define ARG_NPN 17 #define ARG_NNP 17
#define ARG_NPP 18 #define ARG_NPN 18
#define ARG_PNN 19 #define ARG_NPP 19
#define ARG_PNP 20 #define ARG_PNN 20
#define ARG_PPN 21 #define ARG_PNP 21
#define ARG_PPP 22 #define ARG_PPN 22
#define ARG_PPPP 23 #define ARG_PPP 23
#define ARG_PPPPP 24 #define ARG_PPPP 24
#define ARG_PPPPPP 25 #define ARG_PPPPP 25
#define ARG_PPPPPPP 26 #define ARG_PPPPPP 26
#define ARG_PPPPPPPP 27 #define ARG_PPPPPPP 27
#define ARG_PPPPPPPP 28
#endif /* RTS_STORAGE_FUNTYPES_H */ #endif /* RTS_STORAGE_FUNTYPES_H */
...@@ -226,6 +226,7 @@ RTS_RET(stg_ap_d); ...@@ -226,6 +226,7 @@ RTS_RET(stg_ap_d);
RTS_RET(stg_ap_l); RTS_RET(stg_ap_l);
RTS_RET(stg_ap_v16); RTS_RET(stg_ap_v16);
RTS_RET(stg_ap_v32); RTS_RET(stg_ap_v32);
RTS_RET(stg_ap_v64);
RTS_RET(stg_ap_n); RTS_RET(stg_ap_n);
RTS_RET(stg_ap_p); RTS_RET(stg_ap_p);
RTS_RET(stg_ap_pv); RTS_RET(stg_ap_pv);
...@@ -244,6 +245,7 @@ RTS_FUN_DECL(stg_ap_d_fast); ...@@ -244,6 +245,7 @@ RTS_FUN_DECL(stg_ap_d_fast);
RTS_FUN_DECL(stg_ap_l_fast); RTS_FUN_DECL(stg_ap_l_fast);
RTS_FUN_DECL(stg_ap_v16_fast); RTS_FUN_DECL(stg_ap_v16_fast);
RTS_FUN_DECL(stg_ap_v32_fast); RTS_FUN_DECL(stg_ap_v32_fast);
RTS_FUN_DECL(stg_ap_v64_fast);
RTS_FUN_DECL(stg_ap_n_fast); RTS_FUN_DECL(stg_ap_n_fast);
RTS_FUN_DECL(stg_ap_p_fast); RTS_FUN_DECL(stg_ap_p_fast);
RTS_FUN_DECL(stg_ap_pv_fast); RTS_FUN_DECL(stg_ap_pv_fast);
......
...@@ -884,6 +884,7 @@ typedef struct _RtsSymbolVal { ...@@ -884,6 +884,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_l_ret) \ SymI_HasProto(stg_ap_l_ret) \
SymI_HasProto(stg_ap_v16_ret) \ SymI_HasProto(stg_ap_v16_ret) \
SymI_HasProto(stg_ap_v32_ret) \ SymI_HasProto(stg_ap_v32_ret) \
SymI_HasProto(stg_ap_v64_ret) \
SymI_HasProto(stg_ap_n_ret) \ SymI_HasProto(stg_ap_n_ret) \
SymI_HasProto(stg_ap_p_ret) \ SymI_HasProto(stg_ap_p_ret) \
SymI_HasProto(stg_ap_pv_ret) \ SymI_HasProto(stg_ap_pv_ret) \
...@@ -1254,6 +1255,7 @@ typedef struct _RtsSymbolVal { ...@@ -1254,6 +1255,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_l_info) \ SymI_HasProto(stg_ap_l_info) \
SymI_HasProto(stg_ap_v16_info) \ SymI_HasProto(stg_ap_v16_info) \
SymI_HasProto(stg_ap_v32_info) \ SymI_HasProto(stg_ap_v32_info) \
SymI_HasProto(stg_ap_v64_info) \
SymI_HasProto(stg_ap_n_info) \ SymI_HasProto(stg_ap_n_info) \
SymI_HasProto(stg_ap_p_info) \ SymI_HasProto(stg_ap_p_info) \
SymI_HasProto(stg_ap_pv_info) \ SymI_HasProto(stg_ap_pv_info) \
...@@ -1271,6 +1273,7 @@ typedef struct _RtsSymbolVal { ...@@ -1271,6 +1273,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_ap_l_fast) \ SymI_HasProto(stg_ap_l_fast) \
SymI_HasProto(stg_ap_v16_fast) \ SymI_HasProto(stg_ap_v16_fast) \
SymI_HasProto(stg_ap_v32_fast) \ SymI_HasProto(stg_ap_v32_fast) \
SymI_HasProto(stg_ap_v64_fast) \
SymI_HasProto(stg_ap_n_fast) \ SymI_HasProto(stg_ap_n_fast) \
SymI_HasProto(stg_ap_p_fast) \ SymI_HasProto(stg_ap_p_fast) \
SymI_HasProto(stg_ap_pv_fast) \ SymI_HasProto(stg_ap_pv_fast) \
......
...@@ -34,6 +34,7 @@ data ArgRep ...@@ -34,6 +34,7 @@ data ArgRep
| L -- long (64-bit) | L -- long (64-bit)
| V16 -- 16-byte (128-bit) vectors | V16 -- 16-byte (128-bit) vectors
| V32 -- 32-byte (256-bit) vectors | V32 -- 32-byte (256-bit) vectors
| V64 -- 64-byte (512-bit) vectors
-- size of a value in *words* -- size of a value in *words*
argSize :: ArgRep -> Int argSize :: ArgRep -> Int
...@@ -45,6 +46,7 @@ argSize D = (SIZEOF_DOUBLE `quot` SIZEOF_VOID_P :: Int) ...@@ -45,6 +46,7 @@ argSize D = (SIZEOF_DOUBLE `quot` SIZEOF_VOID_P :: Int)
argSize L = (8 `quot` SIZEOF_VOID_P :: Int) argSize L = (8 `quot` SIZEOF_VOID_P :: Int)
argSize V16 = (16 `quot` SIZEOF_VOID_P :: Int) argSize V16 = (16 `quot` SIZEOF_VOID_P :: Int)
argSize V32 = (32 `quot` SIZEOF_VOID_P :: Int) argSize V32 = (32 `quot` SIZEOF_VOID_P :: Int)
argSize V64 = (64 `quot` SIZEOF_VOID_P :: Int)
showArg :: ArgRep -> String showArg :: ArgRep -> String
showArg N = "n" showArg N = "n"
...@@ -55,6 +57,7 @@ showArg D = "d" ...@@ -55,6 +57,7 @@ showArg D = "d"
showArg L = "l" showArg L = "l"
showArg V16 = "v16" showArg V16 = "v16"
showArg V32 = "v32" showArg V32 = "v32"
showArg V64 = "v64"
-- is a value a pointer? -- is a value a pointer?
isPtr :: ArgRep -> Bool isPtr :: ArgRep -> Bool
...@@ -508,6 +511,7 @@ argRep L = text "L_" ...@@ -508,6 +511,7 @@ argRep L = text "L_"
argRep P = text "gcptr" argRep P = text "gcptr"
argRep V16 = text "V16_" argRep V16 = text "V16_"
argRep V32 = text "V32_" argRep V32 = text "V32_"
argRep V64 = text "V64_"
argRep _ = text "W_" argRep _ = text "W_"
genApply regstatus args = genApply regstatus args =
...@@ -859,6 +863,7 @@ applyTypes = [ ...@@ -859,6 +863,7 @@ applyTypes = [
[L], [L],
[V16], [V16],
[V32], [V32],
[V64],
[N], [N],
[P], [P],
[P,V], [P,V],
...@@ -888,6 +893,7 @@ stackApplyTypes = [ ...@@ -888,6 +893,7 @@ stackApplyTypes = [
[L], [L],
[V16], [V16],
[V32], [V32],
[V64],
[N,N], [N,N],
[N,P], [N,P],
[P,N], [P,N],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment