By default, only pass 128-bit SIMD vectors in registers on X86-64.

LLVM's GHC calling convention only allows 128-bit SIMD vectors to be passed in
machine registers on X86-64. This may change in LLVM 3.4; the hidden flag
-fllvm-pass-vectors-in-regs causes all SIMD vector widths to be passed in
registers on both X86-64 and on X86-32.
parent 7dda67b9
......@@ -66,9 +66,12 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
| isFloatType ty = float
| otherwise = int
where vec = case (w, regs) of
(W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
(W256, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
(W512, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
(W128, (vs, fs, ds, ls, s:ss))
| passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
(W256, (vs, fs, ds, ls, s:ss))
| passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
(W512, (vs, fs, ds, ls, s:ss))
| passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss))
......@@ -100,6 +103,20 @@ passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True
_ -> False
-- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
-- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
-- doesn't currently passing vectors in registers. The patch to update the GHC
-- calling convention to support passing SIMD vectors in registers is small and
-- well-contained, so it may make it into LLVM 3.4. The hidden
-- -fllvm-pass-vectors-in-regs flag will generate LLVM code that attempts to
-- pass vectors in registers, but it must only be used with a version of LLVM
-- that has an updated GHC calling convention.
passVectorInReg :: Width -> DynFlags -> Bool
passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
ArchX86_64 -> True
_ -> gopt Opt_LlvmPassVectorsInRegisters dflags
passVectorInReg _ dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
-> (
ByteOff -- bytes of stack args
......
......@@ -309,6 +309,7 @@ data GeneralFlag
| Opt_RegsIterative -- do iterative coalescing graph coloring register allocation
| Opt_PedanticBottoms -- Be picky about how we treat bottom
| Opt_LlvmTBAA -- Use LLVM TBAA infastructure for improving AA (hidden flag)
| Opt_LlvmPassVectorsInRegisters -- Pass SIMD vectors in registers (requires a patched LLVM) (hidden flag)
| Opt_IrrefutableTuples
| Opt_CmmSink
| Opt_CmmElimCommonBlocks
......@@ -2611,6 +2612,7 @@ fFlags = [
( "regs-graph", Opt_RegsGraph, nop ),
( "regs-iterative", Opt_RegsIterative, nop ),
( "llvm-tbaa", Opt_LlvmTBAA, nop), -- hidden flag
( "llvm-pass-vectors-in-regs", Opt_LlvmPassVectorsInRegisters, nop), -- hidden flag
( "irrefutable-tuples", Opt_IrrefutableTuples, nop ),
( "cmm-sink", Opt_CmmSink, nop ),
( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment