Commit 04db0e9f authored by Simon Marlow's avatar Simon Marlow

pass arguments to unknown function calls in registers

We now have more stg_ap entry points: stg_ap_*_fast, which take
arguments in registers according to the platform calling convention.
This is faster if the function being called is evaluated and has the
right arity, which is the common case (see the eval/apply paper for
measurements).  

We still need the stg_ap_*_info entry points for stack-based
application, such as an overflows when a function is applied to too
many argumnets.  The stg_ap_*_fast functions actually just check for
an evaluated function, and if they don't find one, push the args on
the stack and invoke stg_ap_*_info.  (this might be slightly slower in
some cases, but not the common case).
parent 174c7f29
......@@ -80,6 +80,8 @@ module CLabel (
mkRtsCodeLabelFS,
mkRtsDataLabelFS,
mkRtsApFastLabel,
mkForeignLabel,
mkCCLabel, mkCCSLabel,
......@@ -259,6 +261,8 @@ data RtsLabelInfo
| RtsDataFS FastString -- misc rts data bits, eg CHARLIKE_closure
| RtsCodeFS FastString -- misc rts code
| RtsApFast LitString -- _fast versions of generic apply
| RtsSlowTickyCtr String
deriving (Eq, Ord)
......@@ -393,6 +397,8 @@ mkRtsRetLabelFS str = RtsLabel (RtsRetFS str)
mkRtsCodeLabelFS str = RtsLabel (RtsCodeFS str)
mkRtsDataLabelFS str = RtsLabel (RtsDataFS str)
mkRtsApFastLabel str = RtsLabel (RtsApFast str)
mkRtsSlowTickyCtrLabel :: String -> CLabel
mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat)
......@@ -520,6 +526,7 @@ labelType (RtsLabel (RtsInfoFS _)) = DataLabel
labelType (RtsLabel (RtsEntryFS _)) = CodeLabel
labelType (RtsLabel (RtsRetInfoFS _)) = DataLabel
labelType (RtsLabel (RtsRetFS _)) = CodeLabel
labelType (RtsLabel (RtsApFast _)) = CodeLabel
labelType (CaseLabel _ CaseReturnInfo) = DataLabel
labelType (CaseLabel _ _) = CodeLabel
labelType (ModuleInitLabel _ _ _) = CodeLabel
......@@ -676,6 +683,8 @@ pprCLbl (RtsLabel (RtsData str)) = ptext str
pprCLbl (RtsLabel (RtsCodeFS str)) = ftext str
pprCLbl (RtsLabel (RtsDataFS str)) = ftext str
pprCLbl (RtsLabel (RtsApFast str)) = ptext str <> ptext SLIT("_fast")
pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset))
= hcat [ptext SLIT("stg_sel_"), text (show offset),
ptext (if upd_reqd
......
......@@ -206,17 +206,20 @@ mkRegLiveness regs ptrs nptrs
-- For a slow call, we must take a bunch of arguments and intersperse
-- some stg_ap_<pattern>_ret_info return addresses.
constructSlowCall :: [(CgRep,CmmExpr)] -> (CLabel, [(CgRep,CmmExpr)])
constructSlowCall
:: [(CgRep,CmmExpr)]
-> (CLabel, -- RTS entry point for call
[(CgRep,CmmExpr)], -- args to pass to the entry point
[(CgRep,CmmExpr)]) -- stuff to save on the stack
-- don't forget the zero case
constructSlowCall []
= (stg_ap_0, [])
where
stg_ap_0 = enterRtsRetLabel SLIT("stg_ap_0")
= (mkRtsApFastLabel SLIT("stg_ap_0"), [], [])
constructSlowCall amodes
= (stg_ap_pat, these ++ slowArgs rest)
= (stg_ap_pat, these, rest)
where
stg_ap_pat = enterRtsRetLabel arg_pat
stg_ap_pat = mkRtsApFastLabel arg_pat
(arg_pat, these, rest) = matchSlowPattern amodes
enterRtsRetLabel arg_pat
......
......@@ -149,56 +149,34 @@ performTailCall fun_info arg_amodes pending_assts
-- A slow function call via the RTS apply routines
-- Node must definitely point to the thing
SlowCall -> do
{ let (apply_lbl, new_amodes) = constructSlowCall arg_amodes
-- Fill in all the arguments on the stack
; (final_sp,stk_assts) <- mkStkAmodes sp new_amodes
; emitSimultaneously (node_asst `plusStmts` stk_assts
`plusStmts` pending_assts)
; when (not (null arg_amodes)) $ do
{ when (not (null arg_amodes)) $ do
{ if (isKnownFun lf_info)
then tickyKnownCallTooFewArgs
else tickyUnknownCall
; tickySlowCallPat (map fst arg_amodes)
}
; tickySlowCallPat (map fst arg_amodes)
}
; doFinalJump (final_sp + 1)
-- Add one, because the stg_ap functions
-- expect there to be a free slot on the stk
False (jumpToLbl apply_lbl)
; let (apply_lbl, args, extra_args)
= constructSlowCall arg_amodes
; directCall sp apply_lbl args extra_args
(node_asst `plusStmts` pending_assts)
}
-- A direct function call (possibly with some left-over arguments)
DirectEntry lbl arity -> do
{ let
-- The args beyond the arity go straight on the stack
(arity_args, extra_stk_args) = splitAt arity arg_amodes
-- First chunk of args go in registers
(reg_arg_amodes, stk_args) = assignCallRegs arity_args
-- Any "extra" arguments are placed in frames on the
-- stack after the other arguments.
slow_stk_args = slowArgs extra_stk_args
reg_assts = assignToRegs reg_arg_amodes
; if null slow_stk_args
{ if arity == length arg_amodes
then tickyKnownCallExact
else do tickyKnownCallExtraArgs
tickySlowCallPat (map fst extra_stk_args)
tickySlowCallPat (map fst (drop arity arg_amodes))
; (final_sp, stk_assts) <- mkStkAmodes sp
(stk_args ++ slow_stk_args)
; emitSimultaneously (opt_node_asst `plusStmts`
reg_assts `plusStmts`
stk_assts `plusStmts`
pending_assts)
; doFinalJump final_sp False (jumpToLbl lbl) }
; let
-- The args beyond the arity go straight on the stack
(arity_args, extra_args) = splitAt arity arg_amodes
; directCall sp lbl arity_args extra_args
(opt_node_asst `plusStmts` pending_assts)
}
}
where
fun_name = idName (cgIdInfoId fun_info)
......@@ -206,6 +184,25 @@ performTailCall fun_info arg_amodes pending_assts
directCall sp lbl args extra_args assts = do
let
-- First chunk of args go in registers
(reg_arg_amodes, stk_args) = assignCallRegs args
-- Any "extra" arguments are placed in frames on the
-- stack after the other arguments.
slow_stk_args = slowArgs extra_args
reg_assts = assignToRegs reg_arg_amodes
--
(final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
emitSimultaneously (reg_assts `plusStmts`
stk_assts `plusStmts`
assts)
doFinalJump final_sp False (jumpToLbl lbl)
-- -----------------------------------------------------------------------------
-- The final clean-up before we do a jump at the end of a basic block.
-- This code is shared by tail-calls and returns.
......
......@@ -379,7 +379,6 @@ RTS_ENTRY(stg_ap_7_upd_entry);
/* standard application routines (see also rts/gen_apply.py,
* and compiler/codeGen/CgStackery.lhs).
*/
RTS_RET_INFO(stg_ap_0_info);
RTS_RET_INFO(stg_ap_v_info);
RTS_RET_INFO(stg_ap_f_info);
RTS_RET_INFO(stg_ap_d_info);
......@@ -395,7 +394,6 @@ RTS_RET_INFO(stg_ap_pppp_info);
RTS_RET_INFO(stg_ap_ppppp_info);
RTS_RET_INFO(stg_ap_pppppp_info);
RTS_ENTRY(stg_ap_0_ret);
RTS_ENTRY(stg_ap_v_ret);
RTS_ENTRY(stg_ap_f_ret);
RTS_ENTRY(stg_ap_d_ret);
......@@ -411,6 +409,22 @@ RTS_ENTRY(stg_ap_pppp_ret);
RTS_ENTRY(stg_ap_ppppp_ret);
RTS_ENTRY(stg_ap_pppppp_ret);
RTS_FUN(stg_ap_0_fast);
RTS_FUN(stg_ap_v_fast);
RTS_FUN(stg_ap_f_fast);
RTS_FUN(stg_ap_d_fast);
RTS_FUN(stg_ap_l_fast);
RTS_FUN(stg_ap_n_fast);
RTS_FUN(stg_ap_p_fast);
RTS_FUN(stg_ap_pv_fast);
RTS_FUN(stg_ap_pp_fast);
RTS_FUN(stg_ap_ppv_fast);
RTS_FUN(stg_ap_ppp_fast);
RTS_FUN(stg_ap_pppv_fast);
RTS_FUN(stg_ap_pppp_fast);
RTS_FUN(stg_ap_ppppp_fast);
RTS_FUN(stg_ap_pppppp_fast);
/* standard GC & stack check entry points, all defined in HeapStackCheck.hc */
RTS_RET_INFO(stg_enter_info);
......
......@@ -15,28 +15,13 @@
/* ----------------------------------------------------------------------------
* Evaluate a closure and return it.
*
* stg_ap_0_info <--- Sp
*
* NOTE: this needs to be a polymorphic return point, because we can't
* be sure that the thing being evaluated is not a function.
* There isn't an info table / return address version of stg_ap_0, because
* everything being returned is guaranteed evaluated, so it would be a no-op.
*/
#if MAX_VECTORED_RTN > 8
#error MAX_VECTORED_RTN has changed: please modify stg_ap_0 too.
#endif
STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ")
INFO_TABLE_RET( stg_ap_0,
0/*framsize*/, 0/*bitmap*/, RET_SMALL,
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0),
RET_LBL(stg_ap_0) )
stg_ap_0_fast
{
// fn is in R1, no args on the stack
......@@ -45,11 +30,10 @@ INFO_TABLE_RET( stg_ap_0,
foreign "C" printClosure(R1 "ptr") [R1]);
IF_DEBUG(sanity,
foreign "C" checkStackChunk(Sp+WDS(1) "ptr",
foreign "C" checkStackChunk(Sp "ptr",
CurrentTSO + TSO_OFFSET_StgTSO_stack +
WDS(StgTSO_stack_size(CurrentTSO)) "ptr") [R1]);
Sp_adj(1);
ENTER();
}
......
......@@ -102,10 +102,9 @@ blockAsyncExceptionszh_fast
Sp(0) = stg_unblockAsyncExceptionszh_ret_info;
}
}
Sp_adj(-1);
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_v();
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
unblockAsyncExceptionszh_fast
......@@ -130,10 +129,9 @@ unblockAsyncExceptionszh_fast
Sp(0) = stg_blockAsyncExceptionszh_ret_info;
}
}
Sp_adj(-1);
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_v();
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -307,10 +305,9 @@ catchzh_fast
TICK_CATCHF_PUSHED();
/* Apply R1 to the realworld token */
Sp_adj(-1);
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_v();
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
/* -----------------------------------------------------------------------------
......@@ -372,8 +369,7 @@ retry_pop_stack:
"ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(Sp);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......
......@@ -402,7 +402,6 @@ typedef struct _RtsSymbolVal {
#define RTS_RET_SYMBOLS \
SymX(stg_enter_ret) \
SymX(stg_gc_fun_ret) \
SymX(stg_ap_0_ret) \
SymX(stg_ap_v_ret) \
SymX(stg_ap_f_ret) \
SymX(stg_ap_d_ret) \
......@@ -617,7 +616,6 @@ typedef struct _RtsSymbolVal {
SymX(stg_MUT_ARR_PTRS_FROZEN_info) \
SymX(stg_MUT_ARR_PTRS_FROZEN0_info) \
SymX(stg_WEAK_info) \
SymX(stg_ap_0_info) \
SymX(stg_ap_v_info) \
SymX(stg_ap_f_info) \
SymX(stg_ap_d_info) \
......@@ -632,6 +630,21 @@ typedef struct _RtsSymbolVal {
SymX(stg_ap_pppp_info) \
SymX(stg_ap_ppppp_info) \
SymX(stg_ap_pppppp_info) \
SymX(stg_ap_0_fast) \
SymX(stg_ap_v_fast) \
SymX(stg_ap_f_fast) \
SymX(stg_ap_d_fast) \
SymX(stg_ap_l_fast) \
SymX(stg_ap_n_fast) \
SymX(stg_ap_p_fast) \
SymX(stg_ap_pv_fast) \
SymX(stg_ap_pp_fast) \
SymX(stg_ap_ppv_fast) \
SymX(stg_ap_ppp_fast) \
SymX(stg_ap_pppv_fast) \
SymX(stg_ap_pppp_fast) \
SymX(stg_ap_ppppp_fast) \
SymX(stg_ap_pppppp_fast) \
SymX(stg_ap_1_upd_info) \
SymX(stg_ap_2_upd_info) \
SymX(stg_ap_3_upd_info) \
......
......@@ -996,8 +996,7 @@ INFO_TABLE_RET(stg_catch_retry_frame,
R1 = StgCatchRetryFrame_first_code(frame);
StgCatchRetryFrame_first_code_trec(frame) = new_trec;
}
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......@@ -1061,8 +1060,7 @@ INFO_TABLE_RET(stg_atomically_frame,
"ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......@@ -1097,8 +1095,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame,
StgTSO_trec(CurrentTSO) = trec;
StgHeader_info(frame) = stg_atomically_frame_info;
R1 = StgAtomicallyFrame_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......@@ -1193,8 +1190,7 @@ atomicallyzh_fast
StgTSO_trec(CurrentTSO) = new_trec;
/* Apply R1 to the realworld token */
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -1214,8 +1210,7 @@ catchSTMzh_fast
StgCatchSTMFrame_handler(frame) = R2;
/* Apply R1 to the realworld token */
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -1248,8 +1243,7 @@ catchRetryzh_fast
StgCatchRetryFrame_first_code_trec(frame) = new_trec;
/* Apply R1 to the realworld token */
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
......@@ -1281,8 +1275,7 @@ retry_pop_stack:
StgTSO_trec(CurrentTSO) = trec;
StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
R1 = StgCatchRetryFrame_alt_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
} else {
// Retry in the alternative code: propagate
W_ other_trec;
......@@ -1306,8 +1299,7 @@ retry_pop_stack:
StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
StgTSO_trec(CurrentTSO) = trec;
R1 = StgCatchRetryFrame_first_code(frame);
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
}
......@@ -1332,8 +1324,7 @@ retry_pop_stack:
StgTSO_trec(CurrentTSO) = trec;
R1 = StgAtomicallyFrame_code(frame);
Sp = frame;
Sp_adj(-1);
jump RET_LBL(stg_ap_v);
jump stg_ap_v_fast;
}
}
......
......@@ -153,8 +153,7 @@ INFO_TABLE(stg_ap_1_upd,1,1,THUNK_1_0,"stg_ap_1_upd_info","stg_ap_1_upd_info")
PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame;
Sp_adj(-1); // for stg_ap_0_ret
jump RET_LBL(stg_ap_0);
jump stg_ap_0_fast;
}
INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info")
......@@ -168,7 +167,7 @@ INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(1);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_p();
jump RET_LBL(stg_ap_p);
......@@ -186,7 +185,7 @@ INFO_TABLE(stg_ap_3_upd,3,0,THUNK,"stg_ap_3_upd_info","stg_ap_3_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(2);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_pp();
jump RET_LBL(stg_ap_pp);
......@@ -205,7 +204,7 @@ INFO_TABLE(stg_ap_4_upd,4,0,THUNK,"stg_ap_4_upd_info","stg_ap_4_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(3);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_ppp();
jump RET_LBL(stg_ap_ppp);
......@@ -225,7 +224,7 @@ INFO_TABLE(stg_ap_5_upd,5,0,THUNK,"stg_ap_5_upd_info","stg_ap_5_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(4);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_pppp();
jump RET_LBL(stg_ap_pppp);
......@@ -246,7 +245,7 @@ INFO_TABLE(stg_ap_6_upd,6,0,THUNK,"stg_ap_6_upd_info","stg_ap_6_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(5);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_ppppp();
jump RET_LBL(stg_ap_ppppp);
......@@ -268,7 +267,7 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
W_[Sp-SIZEOF_StgUpdateFrame-WDS(6)] = StgThunk_payload(R1,1);
R1 = StgThunk_payload(R1,0);
Sp = Sp - SIZEOF_StgUpdateFrame - WDS(6);
Sp_adj(-1); // for stg_ap_0_ret
Sp_adj(-1); // for stg_ap_*_ret
TICK_UNKNOWN_CALL();
TICK_SLOW_CALL_pppppp();
jump RET_LBL(stg_ap_pppppp);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment