Commit 04db0e9f authored by Simon Marlow's avatar Simon Marlow

pass arguments to unknown function calls in registers

We now have more stg_ap entry points: stg_ap_*_fast, which take
arguments in registers according to the platform calling convention.
This is faster if the function being called is evaluated and has the
right arity, which is the common case (see the eval/apply paper for
measurements).  

We still need the stg_ap_*_info entry points for stack-based
application, such as an overflows when a function is applied to too
many argumnets.  The stg_ap_*_fast functions actually just check for
an evaluated function, and if they don't find one, push the args on
the stack and invoke stg_ap_*_info.  (this might be slightly slower in
some cases, but not the common case).
parent 174c7f29
......@@ -80,6 +80,8 @@ module CLabel (
mkRtsCodeLabelFS,
mkRtsDataLabelFS,
mkRtsApFastLabel,
mkForeignLabel,
mkCCLabel, mkCCSLabel,
......@@ -259,6 +261,8 @@ data RtsLabelInfo
| RtsDataFS FastString -- misc rts data bits, eg CHARLIKE_closure
| RtsCodeFS FastString -- misc rts code
| RtsApFast LitString -- _fast versions of generic apply
| RtsSlowTickyCtr String
deriving (Eq, Ord)
......@@ -393,6 +397,8 @@ mkRtsRetLabelFS str = RtsLabel (RtsRetFS str)
mkRtsCodeLabelFS str = RtsLabel (RtsCodeFS str)
mkRtsDataLabelFS str = RtsLabel (RtsDataFS str)
mkRtsApFastLabel str = RtsLabel (RtsApFast str)
mkRtsSlowTickyCtrLabel :: String -> CLabel
mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat)
......@@ -520,6 +526,7 @@ labelType (RtsLabel (RtsInfoFS _)) = DataLabel
labelType (RtsLabel (RtsEntryFS _)) = CodeLabel
labelType (RtsLabel (RtsRetInfoFS _)) = DataLabel
labelType (RtsLabel (RtsRetFS _)) = CodeLabel
labelType (RtsLabel (RtsApFast _)) = CodeLabel
labelType (CaseLabel _ CaseReturnInfo) = DataLabel
labelType (CaseLabel _ _) = CodeLabel
labelType (ModuleInitLabel _ _ _) = CodeLabel
......@@ -676,6 +683,8 @@ pprCLbl (RtsLabel (RtsData str)) = ptext str
pprCLbl (RtsLabel (RtsCodeFS str)) = ftext str
pprCLbl (RtsLabel (RtsDataFS str)) = ftext str
pprCLbl (RtsLabel (RtsApFast str)) = ptext str <> ptext SLIT("_fast")
pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset))
= hcat [ptext SLIT("stg_sel_"), text (show offset),
ptext (if upd_reqd
......
......@@ -206,17 +206,20 @@ mkRegLiveness regs ptrs nptrs
-- For a slow call, we must take a bunch of arguments and intersperse
-- some stg_ap_<pattern>_ret_info return addresses.
constructSlowCall :: [(CgRep,CmmExpr)] -> (CLabel, [(CgRep,CmmExpr)])
constructSlowCall
:: [(CgRep,CmmExpr)]
-> (CLabel, -- RTS entry point for call
[(CgRep,CmmExpr)], -- args to pass to the entry point
[(CgRep,CmmExpr)]) -- stuff to save on the stack
-- don't forget the zero case
constructSlowCall []
= (stg_ap_0, [])
where
stg_ap_0 = enterRtsRetLabel SLIT("stg_ap_0")
= (mkRtsApFastLabel SLIT("stg_ap_0"), [], [])
constructSlowCall amodes
= (stg_ap_pat, these ++ slowArgs rest)
= (stg_ap_pat, these, rest)
where
stg_ap_pat = enterRtsRetLabel arg_pat
stg_ap_pat = mkRtsApFastLabel arg_pat
(arg_pat, these, rest) = matchSlowPattern amodes
enterRtsRetLabel arg_pat
......
......@@ -149,56 +149,34 @@ performTailCall fun_info arg_amodes pending_assts
-- A slow function call via the RTS apply routines
-- Node must definitely point to the thing
SlowCall -> do
{ let (apply_lbl, new_amodes) = constructSlowCall arg_amodes
-- Fill in all the arguments on the stack
; (final_sp,stk_assts) <- mkStkAmodes sp new_amodes
; emitSimultaneously (node_asst `plusStmts` stk_assts
`plusStmts` pending_assts)
; when (not (null arg_amodes)) $ do
{ when (not (null arg_amodes)) $ do
{ if (isKnownFun lf_info)
then tickyKnownCallTooFewArgs
else tickyUnknownCall
; tickySlowCallPat (map fst arg_amodes)
}
; tickySlowCallPat (map fst arg_amodes)
}
; doFinalJump (final_sp + 1)
-- Add one, because the stg_ap functions
-- expect there to be a free slot on the stk
False (jumpToLbl apply_lbl)
; let (apply_lbl, args, extra_args)
= constructSlowCall arg_amodes
; directCall sp apply_lbl args extra_args
(node_asst `plusStmts` pending_assts)
}
-- A direct function call (possibly with some left-over arguments)
DirectEntry lbl arity -> do
{ let
-- The args beyond the arity go straight on the stack
(arity_args, extra_stk_args) = splitAt arity arg_amodes
-- First chunk of args go in registers
(reg_arg_amodes, stk_args) = assignCallRegs arity_args
-- Any "extra" arguments are placed in frames on the
-- stack after the other arguments.
slow_stk_args = slowArgs extra_stk_args
reg_assts = assignToRegs reg_arg_amodes
; if null slow_stk_args
{ if arity == length arg_amodes
then tickyKnownCallExact
else do tickyKnownCallExtraArgs
tickySlowCallPat (map fst extra_stk_args)
tickySlowCallPat (map fst (drop arity arg_amodes))
; (final_sp, stk_assts) <- mkStkAmodes sp
(stk_args ++ slow_stk_args)
; emitSimultaneously (opt_node_asst `plusStmts`
reg_assts `plusStmts`
stk_assts `plusStmts`
pending_assts)
; doFinalJump final_sp False (jumpToLbl lbl) }
; let
-- The args beyond the arity go straight on the stack
(arity_args, extra_args) = splitAt arity arg_amodes
; directCall sp lbl arity_args extra_args
(opt_node_asst `plusStmts` pending_assts)
}
}
where
fun_name = idName (cgIdInfoId fun_info)
......@@ -206,6 +184,25 @@ performTailCall fun_info arg_amodes pending_assts
directCall sp lbl args extra_args assts = do
let
-- First chunk of args go in registers
(reg_arg_amodes, stk_args) = assignCallRegs args
-- Any "extra" arguments are placed in frames on the
-- stack after the other arguments.
slow_stk_args = slowArgs extra_args
reg_assts = assignToRegs reg_arg_amodes
--
(final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
emitSimultaneously (reg_assts `plusStmts`
stk_assts `plusStmts`
assts)
doFinalJump final_sp False (jumpToLbl lbl)
-- -----------------------------------------------------------------------------
-- The final clean-up before we do a jump at the end of a basic block.
-- This code is shared by tail-calls and returns.
......
......@@ -379,7 +379,6 @@ RTS_ENTRY(stg_ap_7_upd_entry);
/* standard application routines (see also rts/gen_apply.py,
* and compiler/codeGen/CgStackery.lhs).
*/
RTS_RET_INFO(stg_ap_0_info);
RTS_RET_INFO(stg_ap_v_info);
RTS_RET_INFO(stg_ap_f_info);
RTS_RET_INFO(stg_ap_d_info);
......@@ -395,7 +394,6 @@ RTS_RET_INFO(stg_ap_pppp_info);
RTS_RET_INFO(stg_ap_ppppp_info);
RTS_RET_INFO(stg_ap_pppppp_info);
RTS_ENTRY(stg_ap_0_ret);
RTS_ENTRY(stg_ap_v_ret);
RTS_ENTRY(stg_ap_f_ret);
RTS_ENTRY(stg_ap_d_ret);
......@@ -411,6 +409,22 @@ RTS_ENTRY(stg_ap_pppp_ret);
RTS_ENTRY(stg_ap_ppppp_ret);
RTS_ENTRY(stg_ap_pppppp_ret);
RTS_FUN(stg_ap_0_fast);
RTS_FUN(stg_ap_v_fast);
RTS_FUN(stg_ap_f_fast);
RTS_FUN(stg_ap_d_fast);
RTS_FUN(stg_ap_l_fast);
RTS_FUN(stg_ap_n_fast);
RTS_FUN(stg_ap_p_fast);
RTS_FUN(stg_ap_pv_fast);
RTS_FUN(stg_ap_pp_fast);
RTS_FUN(stg_ap_ppv_fast);
RTS_FUN(stg_ap_ppp_fast);
RTS_FUN(stg_ap_pppv_fast);
RTS_FUN(stg_ap_pppp_fast);
RTS_FUN(stg_ap_ppppp_fast);
RTS_FUN(stg_ap_pppppp_fast);
/* standard GC & stack check entry points, all defined in HeapStackCheck.hc */
RTS_RET_INFO(stg_enter_info);
......
......@@ -15,28 +15,13 @@
/* ----------------------------------------------------------------------------
* Evaluate a closure and return it.
*
* stg_ap_0_info <--- Sp
*
* NOTE: this needs to be a polymorphic return point, because we can't
* be sure that the thing being evaluated is not a function.
* There isn't an info table / return address version of stg_ap_0, because
* everything being returned is guaranteed evaluated, so it would be a no-op.
*/
#if MAX_VECTORED_RTN > 8
#error MAX_VECTORED_RTN has changed: please modify stg_ap_0 too.
#endif
STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ")
INFO_TABLE_RET( stg_ap_0,
0/*framsize*/, 0/*bitmap*/, RET_SMALL,
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0) )
stg_ap_0_fast
{
// fn is in R1, no args on the stack
......@@ -45,11 +30,10 @@ INFO_TABLE_RET( stg_ap_0,
foreign "C" printClosure(R1 "ptr") [R1]);
IF_DEBUG(sanity,
foreign "C" checkStackChunk(Sp+WDS(1) "ptr",
foreign "C" checkStackChunk(Sp "ptr",
CurrentTSO + TSO_OFFSET_StgTSO_stack +
WDS(StgTSO_stack_size(CurrentTSO)) "ptr") [R1]);
Sp_adj(1);
ENTER();
}
......
......@@ -102,10 +102,9 @@ blockAsyncExceptionszh_fast
Sp(0) = stg_unblockAsyncExceptionszh_ret_info;
}
}
Sp_adj(-1);
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_v();
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
unblockAsyncExceptionszh_fast
......@@ -130,10 +129,9 @@ unblockAsyncExceptionszh_fast
Sp(0) = stg_blockAsyncExceptionszh_ret_info;
}
}
Sp_adj(-1);
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_v();
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -307,10 +305,9 @@ catchzh_fast
TICK_CATCHF_PUSHED();
/* Apply R1 to the realworld token */
Sp_adj(-1);
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_v();
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
/* -----------------------------------------------------------------------------
......@@ -372,8 +369,7 @@ retry_pop_stack:
"ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(Sp);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......
......@@ -402,7 +402,6 @@ typedef struct _RtsSymbolVal {
#define RTS_RET_SYMBOLS \
SymX(stg_enter_ret) \
SymX(stg_gc_fun_ret) \
SymX(stg_ap_0_ret) \
SymX(stg_ap_v_ret) \
SymX(stg_ap_f_ret) \
SymX(stg_ap_d_ret) \
......@@ -617,7 +616,6 @@ typedef struct _RtsSymbolVal {
SymX(stg_MUT_ARR_PTRS_FROZEN_info) \
SymX(stg_MUT_ARR_PTRS_FROZEN0_info) \
SymX(stg_WEAK_info) \
SymX(stg_ap_0_info) \
SymX(stg_ap_v_info) \
SymX(stg_ap_f_info) \
SymX(stg_ap_d_info) \
......@@ -632,6 +630,21 @@ typedef struct _RtsSymbolVal {
SymX(stg_ap_pppp_info) \
SymX(stg_ap_ppppp_info) \
SymX(stg_ap_pppppp_info) \
SymX(stg_ap_0_fast) \
SymX(stg_ap_v_fast) \
SymX(stg_ap_f_fast) \
SymX(stg_ap_d_fast) \
SymX(stg_ap_l_fast) \
SymX(stg_ap_n_fast) \
SymX(stg_ap_p_fast) \
SymX(stg_ap_pv_fast) \
SymX(stg_ap_pp_fast) \
SymX(stg_ap_ppv_fast) \
SymX(stg_ap_ppp_fast) \
SymX(stg_ap_pppv_fast) \
SymX(stg_ap_pppp_fast) \
SymX(stg_ap_ppppp_fast) \
SymX(stg_ap_pppppp_fast) \
SymX(stg_ap_1_upd_info) \
SymX(stg_ap_2_upd_info) \
SymX(stg_ap_3_upd_info) \
......
......@@ -996,8 +996,7 @@ INFO_TABLE_RET(stg_catch_retry_frame,
R1 = StgCatchRetryFrame_first_code(frame);
StgCatchRetryFrame_first_code_trec(frame) = new_trec;
}
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......@@ -1061,8 +1060,7 @@ INFO_TABLE_RET(stg_atomically_frame,
"ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......@@ -1097,8 +1095,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame,
StgTSO_trec(CurrentTSO) = trec;
StgHeader_info(frame) = stg_atomically_frame_info;
R1 = StgAtomicallyFrame_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......@@ -1193,8 +1190,7 @@ atomicallyzh_fast
StgTSO_trec(CurrentTSO) = new_trec;
/* Apply R1 to the realworld token */
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -1214,8 +1210,7 @@ catchSTMzh_fast
StgCatchSTMFrame_handler(frame) = R2;
/* Apply R1 to the realworld token */
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -1248,8 +1243,7 @@ catchRetryzh_fast
StgCatchRetryFrame_first_code_trec(frame) = new_trec;
/* Apply R1 to the realworld token */
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -1281,8 +1275,7 @@ retry_pop_stack:
StgTSO_trec(CurrentTSO) = trec;
StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
R1 = StgCatchRetryFrame_alt_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
} else {
// Retry in the alternative code: propagate
W_ other_trec;
......@@ -1306,8 +1299,7 @@ retry_pop_stack:
StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
StgTSO_trec(CurrentTSO) = trec;
R1 = StgCatchRetryFrame_first_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
}
......@@ -1332,8 +1324,7 @@ retry_pop_stack:
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(frame);
Sp = frame;
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......
......@@ -153,8 +153,7 @@ INFO_TABLE(stg_ap_1_upd,1,1,THUNK_1_0,"stg_ap_1_upd_info","stg_ap_1_upd_info")
PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame;
Sp_adj(-1); // for stg_ap_0_ret
jump RET_LBL(stg_ap_0);
jump stg_ap_0_fast;
}
INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info")
......@@ -168,7 +167,7 @@ INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(1);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_p();
jump RET_LBL(stg_ap_p);
......@@ -186,7 +185,7 @@ INFO_TABLE(stg_ap_3_upd,3,0,THUNK,"stg_ap_3_upd_info","stg_ap_3_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(2);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_pp();
jump RET_LBL(stg_ap_pp);
......@@ -205,7 +204,7 @@ INFO_TABLE(stg_ap_4_upd,4,0,THUNK,"stg_ap_4_upd_info","stg_ap_4_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(3);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_ppp();
jump RET_LBL(stg_ap_ppp);
......@@ -225,7 +224,7 @@ INFO_TABLE(stg_ap_5_upd,5,0,THUNK,"stg_ap_5_upd_info","stg_ap_5_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(4);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_pppp();
jump RET_LBL(stg_ap_pppp);
......@@ -246,7 +245,7 @@ INFO_TABLE(stg_ap_6_upd,6,0,THUNK,"stg_ap_6_upd_info","stg_ap_6_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(5);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_ppppp();
jump RET_LBL(stg_ap_ppppp);
......@@ -268,7 +267,7 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(6)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(6);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_pppppp();
jump RET_LBL(stg_ap_pppppp);
......
......@@ -83,9 +83,14 @@ longRegs n = [ "L" ++ show m | m <- [1..n] ]
loadRegArgs :: RegStatus -> Int -> [ArgRep] -> (Doc,Int)
loadRegArgs regstatus sp args
= (vcat (map (uncurry assign_stk_to_reg) reg_locs), sp')
where
(reg_locs, _leftovers, sp') = assignRegs regstatus sp args
= (loadRegOffs reg_locs, sp')
where (reg_locs, _, sp') = assignRegs regstatus sp args
loadRegOffs :: [(Reg,Int)] -> Doc
loadRegOffs = vcat . map (uncurry assign_stk_to_reg)
saveRegOffs :: [(Reg,Int)] -> Doc
saveRegOffs = vcat . map (uncurry assign_reg_to_stk)
-- a bit like assignRegs in CgRetConv.lhs
assignRegs
......@@ -163,10 +168,15 @@ mkApplyName args
mkApplyRetName args
= mkApplyName args <> text "_ret"
mkApplyFastName args
= mkApplyName args <> text "_fast"
mkApplyInfoName args
= mkApplyName args <> text "_info"
genMkPAP regstatus macro jump ticker disamb stack_apply
genMkPAP regstatus macro jump ticker disamb
no_load_regs -- don't load argumnet regs before jumping
args_in_regs -- arguments are already in regs
is_pap args all_args_size fun_info_label
= smaller_arity_cases
$$ exact_arity_case
......@@ -175,45 +185,104 @@ genMkPAP regstatus macro jump ticker disamb stack_apply
where
n_args = length args
-- offset of args on the stack, see large comment above.
arg_sp_offset = 1
-- offset of arguments on the stack at slow apply calls.
stk_args_slow_offset = 1
stk_args_offset
| args_in_regs = 0
| otherwise = stk_args_slow_offset
-- The SMALLER ARITY cases:
-- if (arity == 1) {
-- Sp[0] = Sp[1];
-- Sp[1] = (W_)&stg_ap_1_info;
-- JMP_(GET_ENTRY(R1.cl));
smaller_arity_cases = vcat [ smaller_arity i | i <- [1..n_args-1] ]
smaller_arity arity
= text "if (arity == " <> int arity <> text ") {" $$
let
(reg_doc, sp')
| stack_apply = (empty, arg_sp_offset)
| otherwise = loadRegArgs regstatus arg_sp_offset these_args
in
nest 4 (vcat [
text "TICK_SLOW_CALL_" <> text ticker <> text "_TOO_MANY();",
reg_doc,
vcat [ shuffle_down j | j <- [sp'..these_args_size] ],
loadSpWordOff "W_" these_args_size <> text " = " <>
mkApplyInfoName rest_args <> semi,
text "Sp_adj(" <> int (sp' - 1) <> text ");",
-- load up regs for the call, if necessary
load_regs,
-- If we have more args in registers than are required
-- for the call, then we must save some on the stack,
-- and set up the stack for the follow-up call.
-- If the extra arguments are on the stack, then we must
-- instead shuffle them down to make room for the info
-- table for the follow-on call.
if overflow_regs
then save_extra_regs
else shuffle_extra_args,
-- for a PAP, we have to arrange that the stack contains a
-- return address in the even that stg_PAP_entry fails its
-- heap check. See stg_PAP_entry in Apply.hc for details.
if is_pap
then text "R2 = " <> mkApplyInfoName these_args <> semi
then text "R2 = " <> mkApplyInfoName this_call_args <> semi
else empty,
text "jump " <> text jump <> semi
]) $$
text "}"
where
(these_args, rest_args) = splitAt arity args
these_args_size = sum (map argSize these_args)
-- offsets in case we need to save regs:
(reg_locs, _, _)
= assignRegs regstatus stk_args_offset args
-- register assignment for *this function call*
(reg_locs', reg_call_leftovers, reg_call_sp_stk_args)
= assignRegs regstatus stk_args_offset (take arity args)
load_regs
| no_load_regs || args_in_regs = empty
| otherwise = loadRegOffs reg_locs'
(this_call_args, rest_args) = splitAt arity args
-- the offset of the stack args from initial Sp
sp_stk_args
| args_in_regs = stk_args_offset
| no_load_regs = stk_args_offset
| otherwise = reg_call_sp_stk_args
-- the stack args themselves
this_call_stack_args
| args_in_regs = reg_call_leftovers -- sp offsets are wrong
| no_load_regs = this_call_args
| otherwise = reg_call_leftovers
stack_args_size = sum (map argSize this_call_stack_args)
shuffle_down i =
overflow_regs = args_in_regs && length reg_locs > length reg_locs'
save_extra_regs
= -- we have extra arguments in registers to save
let
extra_reg_locs = drop (length reg_locs') (reverse reg_locs)
adj_reg_locs = [ (reg, off - adj + 1) |
(reg,off) <- extra_reg_locs ]
adj = case extra_reg_locs of
(reg, fst_off):_ -> fst_off
size = snd (last adj_reg_locs)
in
text "Sp_adj(" <> int (-size - 1) <> text ");" $$
saveRegOffs adj_reg_locs $$
loadSpWordOff "W_" 0 <> text " = " <>
mkApplyInfoName rest_args <> semi
shuffle_extra_args
= vcat (map shuffle_down
[sp_stk_args .. sp_stk_args+stack_args_size-1]) $$
loadSpWordOff "W_" (sp_stk_args+stack_args_size-1