Commit 96daec08 authored by dterei's avatar dterei

Better documentation for stack alignment design

parent 93e08909
......@@ -1677,10 +1677,13 @@ genCCall32 target dest_regs args =
++ show (length args) ++ ")"
_ -> do
let
-- Align stack to 16n for calls, assuming a starting stack
-- alignment of 16n - word_size on procedure entry. Which we
-- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
raw_arg_size = sum sizes + 4
raw_arg_size = sum sizes + wORD_SIZE
arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
tot_arg_size = raw_arg_size + arg_pad_size - 4
tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE
delta0 <- getDeltaNat
setDeltaNat (delta0 - arg_pad_size)
......@@ -1820,14 +1823,17 @@ genCCall64 :: CmmCallTarget -- function to call
-> NatM InstrBlock
genCCall64 target dest_regs args =
case (target, dest_regs) of
(CmmPrim op, []) ->
-- void return type prim op
outOfLineCmmOp op Nothing args
(CmmPrim op, [res]) ->
-- we only cope with a single result for foreign calls
outOfLineCmmOp op (Just res) args
_ -> do
-- load up the register arguments
-- load up the register arguments
(stack_args, aregs, fregs, load_args_code)
<- load_args args allArgRegs allFPArgRegs nilOL
......@@ -1836,33 +1842,24 @@ genCCall64 target dest_regs args =
int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
-- for annotating the call instruction with
sse_regs = length fp_regs_used
tot_arg_size = arg_size * length stack_args
-- On entry to the called function, %rsp should be aligned
-- on a 16-byte boundary +8 (i.e. the first stack arg
-- above the return address is 16-byte aligned). In STG
-- land %rsp is kept 8-byte aligned (see StgCRun.c), so we
-- just need to make sure we pad by eight bytes after
-- pushing a multiple of 16-bytes of args to get the
-- correct alignment. If we push an odd number of eight byte
-- arguments then no padding is needed.
-- Urg, this is hard. We need to feed the delta back into
-- the arg pushing code.
-- Align stack to 16n for calls, assuming a starting stack
-- alignment of 16n - word_size on procedure entry. Which we
-- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
(real_size, adjust_rsp) <-
if (tot_arg_size + 8) `rem` 16 == 0
if (tot_arg_size + wORD_SIZE) `rem` 16 == 0
then return (tot_arg_size, nilOL)
else do -- we need to adjust...
delta <- getDeltaNat
setDeltaNat (delta-8)
return (tot_arg_size+8, toOL [
SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
DELTA (delta-8)
])
setDeltaNat (delta - wORD_SIZE)
return (tot_arg_size + wORD_SIZE, toOL [
SUB II64 (OpImm (ImmInt wORD_SIZE)) (OpReg rsp),
DELTA (delta - wORD_SIZE) ])
-- push the stack args, right to left
-- push the stack args, right to left
push_code <- push_args (reverse stack_args) nilOL
delta <- getDeltaNat
......@@ -1893,9 +1890,9 @@ genCCall64 target dest_regs args =
let call = callinsns `appOL`
toOL (
-- Deallocate parameters after call for ccall;
-- stdcall has callee do it, but is not supported on
-- x86_64 target (see #3336)
-- Deallocate parameters after call for ccall;
-- stdcall has callee do it, but is not supported on
-- x86_64 target (see #3336)
(if real_size==0 then [] else
[ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
++
......
......@@ -120,6 +120,43 @@ StgFunPtr StgReturn(void)
#define STG_GLOBAL ".global "
#endif
/*
* Note [Stack Alignment on X86]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at
* a 16-byte boundary. This is done because on a number of architectures the
* ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with
* other libraries through the FFI.
*
* As part of this arrangment we must maitain the stack at a 16-byte boundary
* - word_size-bytes (so 16n - 4 for i386 and 16n - 8 for x64) on entry to a
* procedure since both GCC and LLVM expect this. This is because the stack
* should have been 16-byte boundary aligned and then a call made which pushes
* a return address onto the stack (so word_size more space used). In STG code
* we only jump to other STG procedures, so we maintain the 16n - word_size
* alignment for these jumps.
*
* This gives us binary compatability with LLVM and GCC as well as dealing
* with the FFI. Previously we just maintianed a 16n byte alignment for
* procedure entry and calls, which led to bugs (see #4211 and #5250).
*
* To change this convention you need to change the code here, and in
* compiler/nativeGen/X86/CodeGen.hs::GenCCall, and maybe the adjustor
* code for thunks in rts/AdjustorAsm.s, rts/Adjustor.c.
*
* A quick way to see if this is wrong is to compile this code:
*
* main = System.Exit.exitWith ExitSuccess
*
* And run it with +RTS -sstderr. The stats code in the RTS, in
* particular statsPrintf(), relies on the stack alignment because
* it saves the %xmm regs on the stack, so it'll fall over if the
* stack isn't aligned, and calling exitWith from Haskell invokes
* shutdownHaskellAndExit using a C call.
*
*/
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
{
......@@ -180,7 +217,7 @@ StgRunIsImplementedInAssembler(void)
: : "i" (RESERVED_C_STACK_BYTES + 16)
// + 16 to make room for the 4 registers we have to save
// + 12 because we need to align %esp to a 16-byte boundary (#5250)
// See Note [Stack Alignment on X86]
);
}
......@@ -260,46 +297,9 @@ StgRunIsImplementedInAssembler(void)
"retq"
: : "i"(RESERVED_C_STACK_BYTES + 48 /*stack frame size*/));
/*
The x86_64 ABI specifies that on entry to a procedure, %rsp is
aligned on a 16-byte boundary + 8. That is, the first
argument on the stack after the return address will be
16-byte aligned.
We maintain the 16+8 stack alignment throughout the STG code.
When we call STG_RUN the stack will be aligned to 16+8. We used
to subtract an extra 8 bytes so that %rsp would be 16 byte
aligned at all times in STG land. This worked fine for the
native code generator which knew that the stack was already
aligned on 16 bytes when it generated calls to C functions.
This arrangemnt caused problems for the LLVM backend. The LLVM
code generator would assume that on entry to each function the
stack is aligned to 16+8 as required by the ABI. However, since
we only enter STG functions by jumping to them with tail calls,
the stack was actually aligned to a 16-byte boundary. The LLVM
backend had its own mangler that would post-process the
assembly code to fixup the stack manipulation code to mainain
the correct alignment (see #4211).
Therefore, we now now keep the stack aligned to 16+8 while in
STG land so that LLVM generates correct code without any
mangling. The native code generator can handle this alignment
just fine by making sure the stack is aligned to a 16-byte
boundary before it makes a C-call.
A quick way to see if this is wrong is to compile this code:
main = System.Exit.exitWith ExitSuccess
And run it with +RTS -sstderr. The stats code in the RTS, in
particular statsPrintf(), relies on the stack alignment because
it saves the %xmm regs on the stack, so it'll fall over if the
stack isn't aligned, and calling exitWith from Haskell invokes
shutdownHaskellAndExit using a C call.
*/
/*
* See Note [Stack Alignment on X86]
*/
}
#endif /* x86-64 */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment