CgTailCall.lhs 16.1 KB
Newer Older
1
%
2
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
%
%********************************************************
%*							*
\section[CgTailCall]{Tail calls: converting @StgApps@}
%*							*
%********************************************************

\begin{code}
#include "HsVersions.h"

module CgTailCall (
	cgTailCall,
	performReturn,
	mkStaticAlgReturnCode, mkDynamicAlgReturnCode,
	mkPrimReturnCode,
18
19

	tailCallBusiness
20
21
    ) where

22
IMP_Ubiq(){-uitous-}
23
24
25
26

import CgMonad
import AbsCSyn

27
28
29
30
31
import AbsCUtils	( mkAbstractCs, mkAbsCStmts, getAmodeRep )
import CgBindery	( getArgAmodes, getCAddrMode, getCAddrModeAndInfo )
import CgRetConv	( dataReturnConvPrim, dataReturnConvAlg,
			  ctrlReturnConvAlg, CtrlReturnConvention(..),
			  DataReturnConvention(..)
32
33
			)
import CgStackery	( adjustRealSps, mkStkAmodes )
34
35
36
import CgUsages		( getSpARelOffset )
import CLabel		( mkStdUpdCodePtrVecLabel, mkConUpdCodePtrVecLabel )
import ClosureInfo	( nodeMustPointToIt,
37
38
			  getEntryConvention, EntryConvention(..),
			  LambdaFormInfo
39
			)
40
import CmdLineOpts	( opt_DoSemiTagging )
41
import HeapOffs		( zeroOff, SYN_IE(VirtualSpAOffset) )
42
43
import Id		( idType, dataConTyCon, dataConTag,
			  fIRST_TAG
44
			)
45
46
47
import Literal		( mkMachInt )
import Maybes		( assocMaybe )
import PrimRep		( PrimRep(..) )
48
import StgSyn		( SYN_IE(StgArg), GenStgArg(..), SYN_IE(StgLiveVars) )
49
50
import Type		( isPrimType )
import Util		( zipWithEqual, panic, assertPanic )
51
52
53
54
55
56
57
58
59
\end{code}

%************************************************************************
%*									*
\subsection[tailcall-doc]{Documentation}
%*									*
%************************************************************************

\begin{code}
60
cgTailCall :: StgArg -> [StgArg] -> StgLiveVars -> Code
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
\end{code}

Here's the code we generate for a tail call.  (NB there may be no
arguments, in which case this boils down to just entering a variable.)

\begin{itemize}
\item	Adjust the stack ptr to \tr{tailSp + #args}.
\item	Put args in the top locations of the resulting stack.
\item	Make Node point to the function closure.
\item	Enter the function closure.
\end{itemize}

Things to be careful about:
\begin{itemize}
\item	Don't overwrite stack locations before you have finished with
	them (remember you need the function and the as-yet-unmoved
	arguments).
\item	Preferably, generate no code to replace x by x on the stack (a
	common situation in tail-recursion).
\item	Adjust the stack high water mark appropriately.
\end{itemize}

Literals are similar to constructors; they return by putting
themselves in an appropriate register and returning to the address on
top of the B stack.

\begin{code}
88
cgTailCall (StgLitArg lit) [] live_vars
89
90
91
92
93
94
95
96
  = performPrimReturn (CLit lit) live_vars
\end{code}

Treat unboxed locals exactly like literals (above) except use the addr
mode for the local instead of (CLit lit) in the assignment.

Case for unboxed @Ids@ first:
\begin{code}
97
98
cgTailCall atom@(StgVarArg fun) [] live_vars
  | isPrimType (idType fun)
99
100
101
102
103
104
  = getCAddrMode fun `thenFC` \ amode ->
    performPrimReturn amode live_vars
\end{code}

The general case (@fun@ is boxed):
\begin{code}
105
cgTailCall (StgVarArg fun) args live_vars = performTailCall fun args live_vars
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
\end{code}

%************************************************************************
%*									*
\subsection[return-and-tail-call]{Return and tail call}
%*									*
%************************************************************************

ADR-HACK

  A quick bit of hacking to try to solve my void#-leaking blues...

  I think I'm getting bitten by this stuff because code like

  \begin{pseudocode}
	  case ds.s12 :: IoWorld of {
	      -- lvs: [ds.s12]; rhs lvs: []; uniq: c0
	    IoWorld ds.s13# -> ds.s13#;
	  } :: Universe#
  \end{pseudocode}

  causes me to try to allocate a register to return the result in.  The
  hope is that the following will avoid such problems (and that Will
  will do this in a cleaner way when he hits the same problem).

KCAH-RDA

\begin{code}
performPrimReturn :: CAddrMode	-- The thing to return
135
		  -> StgLiveVars
136
137
138
139
		  -> Code

performPrimReturn amode live_vars
  = let
140
	kind = getAmodeRep amode
141
142
143
	ret_reg = dataReturnConvPrim kind

	assign_possibly = case kind of
144
	  VoidRep -> AbsCNop
145
146
147
148
149
	  kind -> (CAssign (CReg ret_reg) amode)
    in
    performReturn assign_possibly mkPrimReturnCode live_vars

mkPrimReturnCode :: Sequel -> Code
150
151
152
153
mkPrimReturnCode (UpdateCode _)	= panic "mkPrimReturnCode: Upd"
mkPrimReturnCode sequel		= sequelToAmode sequel	`thenFC` \ dest_amode ->
				  absC (CReturn dest_amode DirectReturn)
				  -- Direct, no vectoring
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

-- All constructor arguments in registers; Node and InfoPtr are set.
-- All that remains is
--	(a) to set TagReg, if necessary
--	(b) to set InfoPtr to the info ptr, if necessary
--	(c) to do the right sort of jump.

mkStaticAlgReturnCode :: Id		-- The constructor
		      -> Maybe CLabel	-- The info ptr, if it isn't already set
		      -> Sequel		-- where to return to
		      -> Code

mkStaticAlgReturnCode con maybe_info_lbl sequel
  =	-- Generate profiling code if necessary
    (case return_convention of
169
170
	VectoredReturn sz -> profCtrC SLIT("VEC_RETURN") [mkIntCLit sz]
	other		  -> nopC
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    )					`thenC`

	-- Set tag if necessary
	-- This is done by a macro, because if we are short of registers
	-- we don't set TagReg; instead the continuation gets the tag
	-- by indexing off the info ptr
    (case return_convention of

	UnvectoredReturn no_of_constrs
	 | no_of_constrs > 1
		-> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag])

	other	-> nopC
    )					`thenC`

	-- Generate the right jump or return
    (case sequel of
	UpdateCode _ ->	-- Ha!	We know the constructor,
			-- so we can go direct to the correct
			-- update code for that constructor

				-- Set the info pointer, and jump
			set_info_ptr		`thenC`
194
    			absC (CJump (CLbl update_label CodePtrRep))
195
196
197
198
199
200
201
202
203
204

	CaseAlts _ (Just (alts, _)) ->	-- Ho! We know the constructor so
					-- we can go right to the alternative

			-- No need to set info ptr when returning to a
			-- known join point. After all, the code at
			-- the destination knows what constructor it
			-- is going to handle.

			case assocMaybe alts tag of
205
			   Just (alt_absC, join_lbl) -> absC (CJump (CLbl join_lbl CodePtrRep))
206
207
208
209
210
211
212
213
214
215
216
217
			   Nothing		     -> panic "mkStaticAlgReturnCode: default"
				-- The Nothing case should never happen; it's the subject
				-- of a wad of special-case code in cgReturnCon

	other ->	-- OnStack, or (CaseAlts) ret_amode Nothing)
			-- Set the info pointer, and jump
		    set_info_ptr		`thenC`
		    sequelToAmode sequel	`thenFC` \ ret_amode ->
		    absC (CReturn ret_amode return_info)
    )

  where
218
219
    tag		      = dataConTag   con
    tycon	      = dataConTyCon con
220
221
    return_convention = ctrlReturnConvAlg tycon
    zero_indexed_tag  = tag - fIRST_TAG	      -- Adjust tag to be zero-indexed
222
					      -- cf AbsCUtils.mkAlgAltsCSwitch
223

224
225
    update_label
      = case (dataReturnConvAlg con) of
226
227
	  ReturnInHeap   -> mkStdUpdCodePtrVecLabel tycon tag
	  ReturnInRegs _ -> mkConUpdCodePtrVecLabel tycon tag
228
229
230
231
232
233
234

    return_info = case return_convention of
			UnvectoredReturn _ -> DirectReturn
			VectoredReturn _   -> StaticVectoredReturn zero_indexed_tag

    set_info_ptr = case maybe_info_lbl of
			Nothing	      -> nopC
235
			Just info_lbl -> absC (CAssign (CReg infoptr) (CLbl info_lbl DataPtrRep))
236
237
238
239
240
241


mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code

mkDynamicAlgReturnCode tycon dyn_tag sequel
  = case ctrlReturnConvAlg tycon of
242
	VectoredReturn sz ->
243

244
		profCtrC SLIT("VEC_RETURN") [mkIntCLit sz] `thenC`
245
		sequelToAmode sequel		`thenFC` \ ret_addr ->
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
		absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag))

	UnvectoredReturn no_of_constrs ->

		-- Set tag if necessary
		-- This is done by a macro, because if we are short of registers
		-- we don't set TagReg; instead the continuation gets the tag
		-- by indexing off the info ptr
		(if no_of_constrs > 1 then
			absC (CMacroStmt SET_TAG [dyn_tag])
		else
			nopC
		)			`thenC`


		sequelToAmode sequel		`thenFC` \ ret_addr ->
		-- Generate the right jump or return
		absC (CReturn ret_addr DirectReturn)
\end{code}

\begin{code}
performReturn :: AbstractC	    -- Simultaneous assignments to perform
	      -> (Sequel -> Code)   -- The code to execute to actually do
				    -- the return, given an addressing mode
				    -- for the return address
271
	      -> StgLiveVars
272
273
274
275
276
277
	      -> Code

performReturn sim_assts finish_code live_vars
  = getEndOfBlockInfo	`thenFC` \ (EndOfBlockInfo args_spa args_spb sequel) ->

	-- Do the simultaneous assignments,
278
    doSimAssts args_spa live_vars sim_assts	`thenC`
279
280
281
282
283
284
285
286
287
288
289

	-- Adjust stack pointers
    adjustRealSps args_spa args_spb	`thenC`

	-- Do the return
    finish_code sequel		-- "sequel" is `robust' in that it doesn't
				-- depend on stk-ptr values
\end{code}

\begin{code}
performTailCall :: Id			-- Function
290
291
		-> [StgArg]	-- Args
		-> StgLiveVars
292
293
294
295
296
297
		-> Code

performTailCall fun args live_vars
  =	-- Get all the info we have about the function and args and go on to
	-- the business end
    getCAddrModeAndInfo fun	`thenFC` \ (fun_amode, lf_info) ->
298
    getArgAmodes args		`thenFC` \ arg_amodes ->
299
300
301
302
303
304
305
306
307

    tailCallBusiness
		fun fun_amode lf_info arg_amodes
		live_vars AbsCNop {- No pending assignments -}


tailCallBusiness :: Id -> CAddrMode	-- Function and its amode
		 -> LambdaFormInfo	-- Info about the function
		 -> [CAddrMode]		-- Arguments
308
		 -> StgLiveVars	-- Live in continuation
309
310
311
312
313
314
315
316
317

		 -> AbstractC		-- Pending simultaneous assignments
					-- *** GUARANTEED to contain only stack assignments.
					--     In ptic, we don't need to look in here to
					--     discover all live regs

		 -> Code

tailCallBusiness fun fun_amode lf_info arg_amodes live_vars pending_assts
318
  = nodeMustPointToIt lf_info			`thenFC` \ node_points ->
319
    getEntryConvention fun lf_info
320
	(map getAmodeRep arg_amodes)		`thenFC` \ entry_conv ->
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

    getEndOfBlockInfo	`thenFC` \ (EndOfBlockInfo args_spa args_spb sequel) ->

    let
	node_asst
	  = if node_points then
		CAssign (CReg node) fun_amode
	    else
		AbsCNop

	(arg_regs, finish_code)
	  = case entry_conv of
	      ViaNode			  ->
		([],
		     mkAbstractCs [
			CCallProfCtrMacro SLIT("ENT_VIA_NODE") [],
			CAssign (CReg infoptr)

339
340
				(CMacroExpr DataPtrRep INFO_PTR [CReg node]),
			CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr])
341
		     ])
342
343
	      StdEntry lbl Nothing	  -> ([], CJump (CLbl lbl CodePtrRep))
	      StdEntry lbl (Just itbl)	  -> ([], CAssign (CReg infoptr) (CLbl itbl DataPtrRep)
344
						     `mkAbsCStmts`
345
						  CJump (CLbl lbl CodePtrRep))
346
	      DirectEntry lbl arity regs  ->
347
		(regs,	 CJump (CLbl lbl CodePtrRep))
348
349
350

	no_of_args = length arg_amodes

351
352
353
354
355
	(reg_arg_amodes, stk_arg_amodes) = splitAt (length arg_regs) arg_amodes
	    -- We get some stk_arg_amodes if (a) no regs, or (b) args beyond arity

	reg_arg_assts
	  = mkAbstractCs (zipWithEqual "assign_to_reg2" assign_to_reg arg_regs reg_arg_amodes)
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

	assign_to_reg reg_id amode = CAssign (CReg reg_id) amode
    in
    case fun_amode of
      CJoinPoint join_spa join_spb ->  -- Ha!  A let-no-escape thingy

	  ASSERT(not (args_spa > join_spa) || (args_spb > join_spb))
	      -- If ASSERTion fails: Oops: the join point has *lower*
	      -- stack ptrs than the continuation Note that we take
	      -- the SpB point without the return address here.	 The
	      -- return address is put on by the let-no-escapey thing
	      -- when it finishes.

	  mkStkAmodes join_spa join_spb stk_arg_amodes
		      `thenFC` \ (final_spa, final_spb, stk_arg_assts) ->

		-- Do the simultaneous assignments,
373
	  doSimAssts join_spa live_vars
374
375
376
377
378
379
380
381
382
383
384
385
386
		(mkAbstractCs [pending_assts, reg_arg_assts, stk_arg_assts])
			`thenC`

		-- Adjust stack ptrs
	  adjustRealSps final_spa final_spb	`thenC`

		-- Jump to join point
	  absC finish_code

      _ -> -- else: not a let-no-escape (the common case)

		-- Make instruction to save return address
	    loadRetAddrIntoRetReg sequel	`thenFC` \ ret_asst ->
387

388
389
390
391
392
393
394
395
	    mkStkAmodes args_spa args_spb stk_arg_amodes
						`thenFC`
			    \ (final_spa, final_spb, stk_arg_assts) ->

		-- The B-stack space for the pushed return addess, with any args pushed
		-- on top, is recorded in final_spb.

		-- Do the simultaneous assignments,
396
	    doSimAssts args_spa live_vars
397
398
399
400
401
402
403
404
		(mkAbstractCs [pending_assts, node_asst, ret_asst,
			       reg_arg_assts, stk_arg_assts])
						`thenC`

		-- Final adjustment of stack pointers
	    adjustRealSps final_spa final_spb	`thenC`

		-- Now decide about semi-tagging
405
406
407
	    let
		semi_tagging_on = opt_DoSemiTagging
	    in
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
	    case (semi_tagging_on, arg_amodes, node_points, sequel) of

	--
	-- *************** The semi-tagging case ***************
	--
	      (	  True,		   [],		True,	     CaseAlts _ (Just (st_alts, maybe_deflt_join_details))) ->

		-- Whoppee!  Semi-tagging rules OK!
		-- (a) semi-tagging is switched on
		-- (b) there are no arguments,
		-- (c) Node points to the closure
		-- (d) we have a case-alternative sequel with
		--	some visible alternatives

		-- Why is test (c) necessary?
		-- Usually Node will point to it at this point, because we're
		-- scrutinsing something which is either a thunk or a
		-- constructor.
		-- But not always!  The example I came across is when we have
		-- a top-level Double:
		--	lit.3 = D# 3.000
		--	... (case lit.3 of ...) ...
		-- Here, lit.3 is built as a re-entrant thing, which you must enter.
		-- (OK, the simplifier should have eliminated this, but it's
		--  easy to deal with the case anyway.)
		let
		    join_details_to_code (load_regs_and_profiling_code, join_lbl)
			= load_regs_and_profiling_code		`mkAbsCStmts`
436
			  CJump (CLbl join_lbl CodePtrRep)
437
438
439
440
441
442

		    semi_tagged_alts = [ (mkMachInt (toInteger (tag - fIRST_TAG)),
					  join_details_to_code join_details)
				       | (tag, join_details) <- st_alts
				       ]

443
		    enter_jump
444
		      -- Enter Node (we know infoptr will have the info ptr in it)!
445
446
		      = mkAbstractCs [
			CCallProfCtrMacro SLIT("RET_SEMI_FAILED")
447
448
					[CMacroExpr IntRep INFO_TAG [CReg infoptr]],
			CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr]) ]
449
450
451
452
		in
			-- Final switch
		absC (mkAbstractCs [
			    CAssign (CReg infoptr)
453
				    (CVal (NodeRel zeroOff) DataPtrRep),
454
455
456

			    case maybe_deflt_join_details of
				Nothing ->
457
				    CSwitch (CMacroExpr IntRep INFO_TAG [CReg infoptr])
458
459
460
					(semi_tagged_alts)
					(enter_jump)
				Just (_, details) ->
461
				    CSwitch (CMacroExpr IntRep EVAL_TAG [CReg infoptr])
462
463
				     [(mkMachInt 0, enter_jump)]
				     (CSwitch
464
					 (CMacroExpr IntRep INFO_TAG [CReg infoptr])
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
					 (semi_tagged_alts)
					 (join_details_to_code details))
		])

	--
	-- *************** The non-semi-tagging case ***************
	--
	      other -> absC finish_code
\end{code}

\begin{code}
loadRetAddrIntoRetReg :: Sequel -> FCode AbstractC

loadRetAddrIntoRetReg InRetReg
  = returnFC AbsCNop  -- Return address already there

loadRetAddrIntoRetReg sequel
  = sequelToAmode sequel      `thenFC` \ amode ->
    returnFC (CAssign (CReg RetReg) amode)

\end{code}

%************************************************************************
%*									*
\subsection[doSimAssts]{@doSimAssts@}
%*									*
%************************************************************************

@doSimAssts@ happens at the end of every block of code.
They are separate because we sometimes do some jiggery-pokery in between.

\begin{code}
doSimAssts :: VirtualSpAOffset	-- tail_spa: SpA as seen by continuation
498
	   -> StgLiveVars	-- Live in continuation
499
500
501
	   -> AbstractC
	   -> Code

502
doSimAssts tail_spa live_vars sim_assts
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
  =	-- Do the simultaneous assignments
    absC (CSimultaneous sim_assts)	`thenC`

	-- Stub any unstubbed slots; the only live variables are indicated in
	-- the end-of-block info in the monad
    nukeDeadBindings live_vars		`thenC`
    getUnstubbedAStackSlots tail_spa	`thenFC` \ a_slots ->
	-- Passing in tail_spa here should actually be redundant, because
	-- the stack should be trimmed (by nukeDeadBindings) to
	-- exactly the tail_spa position anyhow.

	-- Emit code to stub dead regs; this only generates actual
	-- machine instructions in in the DEBUG version
	-- *** NOT DONE YET ***

    (if (null a_slots)
     then nopC
     else profCtrC SLIT("A_STK_STUB") [mkIntCLit (length a_slots)]	`thenC`
	  mapCs stub_A_slot a_slots
    )
  where
    stub_A_slot :: VirtualSpAOffset -> Code
    stub_A_slot offset = getSpARelOffset offset		`thenFC` \ spa_rel ->
526
			 absC (CAssign	(CVal spa_rel PtrRep)
527
528
					(CReg StkStubReg))
\end{code}