CgTailCall.lhs 18.2 KB
Newer Older
1
%
Simon Marlow's avatar
Simon Marlow committed
2
% (c) The University of Glasgow 2006
3
4
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
Simon Marlow's avatar
Simon Marlow committed
5
% Code generation for tail calls.
6
7

\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
8
9
10
11
12
13
14
{-# OPTIONS -fno-warn-tabs #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and
-- detab the module (please do the detabbing in a separate patch). See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
-- for details

15
module CgTailCall (
Simon Marlow's avatar
Simon Marlow committed
16
	cgTailCall, performTailCall,
17
	performReturn, performPrimReturn,
18
	returnUnboxedTuple, ccallReturnUnboxedTuple,
19
	pushUnboxedTuple,
20
	tailCallPrimOp,
21
        tailCallPrimCall,
22
23

	pushReturnAddress
24
25
    ) where

26
#include "HsVersions.h"
27
28

import CgMonad
Simon Marlow's avatar
Simon Marlow committed
29
30
import CgBindery
import CgInfoTbls
31
import CgCallConv
Simon Marlow's avatar
Simon Marlow committed
32
33
34
import CgStackery
import CgHeapery
import CgUtils
35
import CgTicky
36
import ClosureInfo
37
38
import OldCmm	
import OldCmmUtils
Simon Marlow's avatar
Simon Marlow committed
39
40
41
42
43
import CLabel
import Type
import Id
import StgSyn
import PrimOp
44
import DynFlags
45
import Outputable
46
import Util
47

Simon Marlow's avatar
Simon Marlow committed
48
import Control.Monad
49
import Data.Maybe
50
51
52

-----------------------------------------------------------------------------
-- Tail Calls
53

54
cgTailCall :: Id -> [StgArg] -> Code
55

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
-- Here's the code we generate for a tail call.  (NB there may be no
-- arguments, in which case this boils down to just entering a variable.)
-- 
--    *	Put args in the top locations of the stack.
--    *	Adjust the stack ptr
--    *	Make R1 point to the function closure if necessary.
--    *	Perform the call.
--
-- Things to be careful about:
--
--    *	Don't overwrite stack locations before you have finished with
-- 	them (remember you need the function and the as-yet-unmoved
-- 	arguments).
--    *	Preferably, generate no code to replace x by x on the stack (a
-- 	common situation in tail-recursion).
--    *	Adjust the stack high water mark appropriately.
-- 
-- Treat unboxed locals exactly like literals (above) except use the addr
-- mode for the local instead of (CLit lit) in the assignment.

cgTailCall fun args
77
78
79
80
81
82
83
84
85
86
87
88
89
  = do	{ fun_info <- getCgIdInfo fun

	; if isUnLiftedType (idType fun)
	  then 	-- Primitive return
		ASSERT( null args )
	    do	{ fun_amode <- idInfoToAmode fun_info
		; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 

	  else -- Normal case, fun is boxed
	    do  { arg_amodes <- getArgAmodes args
		; performTailCall fun_info arg_amodes noStmts }
	}
		
90

91
92
93
94
-- -----------------------------------------------------------------------------
-- The guts of a tail-call

performTailCall 
95
96
97
	:: CgIdInfo		-- The function
	-> [(CgRep,CmmExpr)]	-- Args
	-> CmmStmts		-- Pending simultaneous assignments
98
				--  *** GUARANTEED to contain only stack assignments.
99
100
	-> Code

101
102
103
performTailCall fun_info arg_amodes pending_assts
  | Just join_sp <- maybeLetNoEscape fun_info
  = 	   -- A let-no-escape is slightly different, because we
104
105
106
107
	   -- arrange the stack arguments into pointers and non-pointers
	   -- to make the heap check easier.  The tail-call sequence
	   -- is very similar to returning an unboxed tuple, so we
	   -- share some code.
108
109
     do	{ dflags <- getDynFlags
        ; (final_sp, arg_assts, live) <- pushUnboxedTuple join_sp arg_amodes
110
	; emitSimultaneously (pending_assts `plusStmts` arg_assts)
111
	; let lbl = enterReturnPtLabel dflags (idUnique (cgIdInfoId fun_info))
112
	; doFinalJump final_sp True $ jumpToLbl lbl (Just live) }
113
114
115

  | otherwise
  = do 	{ fun_amode <- idInfoToAmode fun_info
116
	; dflags <- getDynFlags
Simon Marlow's avatar
Simon Marlow committed
117
118
	; let assignSt  = CmmAssign nodeReg fun_amode
              node_asst = oneStmt assignSt
119
120
              node_live = Just [node]
	      (opt_node_asst, opt_node_live)
121
                      | nodeMustPointToIt dflags lf_info = (node_asst, node_live)
122
                      | otherwise                 = (noStmts, Just [])
123
124
	; EndOfBlockInfo sp _ <- getEndOfBlockInfo

125
	; case (getCallMethod dflags fun_name fun_has_cafs lf_info (length arg_amodes)) of
126
127
128
129

	    -- Node must always point to things we enter
	    EnterIt -> do
		{ emitSimultaneously (node_asst `plusStmts` pending_assts) 
130
		; let target       = entryCode dflags (closureInfoPtr (CmmReg nodeReg))
131
                      enterClosure = stmtC (CmmJump target node_live)
Simon Marlow's avatar
Simon Marlow committed
132
133
134
135
136
                      -- If this is a scrutinee
                      -- let's check if the closure is a constructor
                      -- so we can directly jump to the alternatives switch
                      -- statement.
                      jumpInstr = getEndOfBlockInfo >>=
137
                                  maybeSwitchOnCons dflags enterClosure
Simon Marlow's avatar
Simon Marlow committed
138
		; doFinalJump sp False jumpInstr }
139
140
141
142
143
144
    
	    -- A function, but we have zero arguments.  It is already in WHNF,
	    -- so we can just return it.  
	    -- As with any return, Node must point to it.
	    ReturnIt -> do
		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
145
		; doFinalJump sp False $ emitReturnInstr node_live }
146
147
148
149
    
	    -- A real constructor.  Don't bother entering it, 
	    -- just do the right sort of return instead.
	    -- As with any return, Node must point to it.
Ian Lynagh's avatar
Ian Lynagh committed
150
	    ReturnCon _ -> do
151
		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
152
		; doFinalJump sp False $ emitReturnInstr node_live }
153
154
155

	    JumpToIt lbl -> do
		{ emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
156
		; doFinalJump sp False $ jumpToLbl lbl opt_node_live }
157
158
159
160
    
	    -- A slow function call via the RTS apply routines
	    -- Node must definitely point to the thing
	    SlowCall -> do 
161
		{  when (not (null arg_amodes)) $ do
162
163
164
		   { if (isKnownFun lf_info) 
			then tickyKnownCallTooFewArgs
			else tickyUnknownCall
165
166
		   ; tickySlowCallPat (map fst arg_amodes) 
		   }
167

168
169
170
		; let (apply_lbl, args, extra_args) 
			= constructSlowCall arg_amodes

171
		; directCall sp apply_lbl args extra_args node_live
172
			(node_asst `plusStmts` pending_assts)
Simon Marlow's avatar
Simon Marlow committed
173

174
175
176
177
		}
    
	    -- A direct function call (possibly with some left-over arguments)
	    DirectEntry lbl arity -> do
178
		{ if arity == length arg_amodes
179
180
			then tickyKnownCallExact
			else do tickyKnownCallExtraArgs
181
				tickySlowCallPat (map fst (drop arity arg_amodes))
182

183
184
185
186
 		; let
		     -- The args beyond the arity go straight on the stack
		     (arity_args, extra_args) = splitAt arity arg_amodes
     
187
		; directCall sp lbl arity_args extra_args opt_node_live
188
189
			(opt_node_asst `plusStmts` pending_assts)
	        }
190
191
	}
  where
192
193
    fun_id    = cgIdInfoId fun_info
    fun_name  = idName fun_id
194
    lf_info   = cgIdInfoLF fun_info
195
    fun_has_cafs = idCafInfo fun_id
Simon Marlow's avatar
Simon Marlow committed
196
197
    untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
    -- Test if closure is a constructor
198
    maybeSwitchOnCons dflags enterClosure eob
199
              | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob,
200
                not (dopt Opt_SccProfilingOn dflags)
201
202
                -- we can't shortcut when profiling is on, because we have
                -- to enter a closure to mark it as "used" for LDV profiling
Simon Marlow's avatar
Simon Marlow committed
203
204
205
206
207
208
209
210
              = do { is_constr <- newLabelC
                   -- Is the pointer tagged?
                   -- Yes, jump to switch statement
                   ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) 
                                is_constr)
                   -- No, enter the closure.
                   ; enterClosure
                   ; labelC is_constr
211
                   ; stmtC (CmmJump (entryCode dflags $
212
                               CmmLit (CmmLabel lbl)) (Just [node]))
Simon Marlow's avatar
Simon Marlow committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
                   }
{-
              -- This is a scrutinee for a case expression
              -- so let's see if we can directly inspect the closure
              | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
              = do { no_cons <- newLabelC
                   -- Both the NCG and gcc optimize away the temp
                   ; z <- newTemp  wordRep
                   ; stmtC (CmmAssign z tag_expr)
                   ; let tag = CmmReg z
                   -- Is the closure a cons?
                   ; stmtC (CmmCondBranch (cond1 tag) no_cons)
                   ; stmtC (CmmCondBranch (cond2 tag) no_cons)
                   -- Yes, jump to switch statement
227
                   ; stmtC (CmmJump (CmmLit (CmmLabel lbl)))
Simon Marlow's avatar
Simon Marlow committed
228
229
230
231
232
233
234
235
236
237
238
239
240
                   ; labelC no_cons
                   -- No, enter the closure.
                   ; enterClosure
                   }
-}
              -- No case expression involved, enter the closure.
              | otherwise
              = do { stmtC untag_node
                   ; enterClosure
                   }
        where
          --cond1 tag  = cmmULtWord tag lowCons
          -- More efficient than the above?
241
{-
Simon Marlow's avatar
Simon Marlow committed
242
243
244
245
246
247
248
          tag_expr   = cmmGetClosureType (CmmReg nodeReg)
          cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
          cond2 tag  = cmmUGtWord tag highCons
          lowCons    = CmmLit (mkIntCLit 1)
            -- CONSTR
          highCons   = CmmLit (mkIntCLit 8)
            -- CONSTR_NOCAF_STATIC (from ClosureType.h)
249
-}
Simon Marlow's avatar
Simon Marlow committed
250

Ian Lynagh's avatar
Ian Lynagh committed
251
directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)]
252
           -> [(CgRep, CmmExpr)] -> Maybe [GlobalReg] -> CmmStmts
Ian Lynagh's avatar
Ian Lynagh committed
253
           -> Code
254
directCall sp lbl args extra_args live_node assts = do
255
  dflags <- getDynFlags
256
257
  let
	-- First chunk of args go in registers
258
	(reg_arg_amodes, stk_args) = assignCallRegs dflags args
259
260
261
     
	-- Any "extra" arguments are placed in frames on the
	-- stack after the other arguments.
262
	slow_stk_args = slowArgs dflags extra_args
263
264

	reg_assts = assignToRegs reg_arg_amodes
265
266
        live_args = map snd reg_arg_amodes
        live_regs = Just $ (fromMaybe [] live_node) ++ live_args
267
268
  --
  (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
269
270
  emitSimultaneously $ reg_assts `plusStmts` stk_assts `plusStmts` assts
  doFinalJump final_sp False $ jumpToLbl lbl live_regs
271

272
273
274
275
-- -----------------------------------------------------------------------------
-- The final clean-up before we do a jump at the end of a basic block.
-- This code is shared by tail-calls and returns.

276
277
278
279
doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
doFinalJump final_sp is_let_no_escape jump_code
  = do	{ -- Adjust the high-water mark if necessary
	  adjustStackHW final_sp
280

281
	-- Push a return address if necessary (after the assignments
282
283
284
285
286
	-- above, in case we clobber a live stack location)
	--
	-- DONT push the return address when we're about to jump to a
	-- let-no-escape: the final tail call in the let-no-escape
	-- will do this.
287
288
	; eob <- getEndOfBlockInfo
	; whenC (not is_let_no_escape) (pushReturnAddress eob)
289

290
291
	    -- Final adjustment of Sp/Hp
	; adjustSpAndHp final_sp
292

293
294
	    -- and do the jump
	; jump_code }
295

Simon Marlow's avatar
Simon Marlow committed
296
-- ----------------------------------------------------------------------------
297
298
-- A general return (just a special case of doFinalJump, above)

Simon Marlow's avatar
Simon Marlow committed
299
performReturn :: Code	-- The code to execute to actually do the return
300
301
	      -> Code

302
performReturn finish_code
Ian Lynagh's avatar
Ian Lynagh committed
303
  = do  { EndOfBlockInfo args_sp _sequel <- getEndOfBlockInfo
304
	; doFinalJump args_sp False finish_code }
305

Simon Marlow's avatar
Simon Marlow committed
306
-- ----------------------------------------------------------------------------
307
308
309
-- Primitive Returns
-- Just load the return value into the right register, and return.

310
311
312
313
314
315
performPrimReturn :: CgRep -> CmmExpr -> Code

-- non-void return value
performPrimReturn rep amode | not (isVoidArg rep)
  = do { stmtC (CmmAssign ret_reg amode)
       ; performReturn $ emitReturnInstr live_regs }
316
  where
317
318
319
320
321
322
323
324
    -- careful here as 'dataReturnConvPrim' will panic if given a Void rep
    ret_reg@(CmmGlobal r) = dataReturnConvPrim rep
    live_regs = Just [r]

-- void return value
performPrimReturn _ _
  = performReturn $ emitReturnInstr (Just [])

325

326
327
-- ---------------------------------------------------------------------------
-- Unboxed tuple returns
328

329
330
331
332
333
334
335
336
337
338
-- These are a bit like a normal tail call, except that:
--
--   - The tail-call target is an info table on the stack
--
--   - We separate stack arguments into pointers and non-pointers,
--     to make it easier to leave things in a sane state for a heap check.
--     This is OK because we can never partially-apply an unboxed tuple,
--     unlike a function.  The same technique is used when calling
--     let-no-escape functions, because they also can't be partially
--     applied.
339

340
341
returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
returnUnboxedTuple amodes
Ian Lynagh's avatar
Ian Lynagh committed
342
  = do 	{ (EndOfBlockInfo args_sp _sequel) <- getEndOfBlockInfo
343
	; tickyUnboxedTupleReturn (length amodes)
344
	; (final_sp, assts, live_regs) <- pushUnboxedTuple args_sp amodes
345
	; emitSimultaneously assts
346
	; doFinalJump final_sp False $ emitReturnInstr (Just live_regs) }
347
348
349
350

pushUnboxedTuple :: VirtualSpOffset		-- Sp at which to start pushing
		 -> [(CgRep, CmmExpr)]		-- amodes of the components
		 -> FCode (VirtualSpOffset,	-- final Sp
351
352
			   CmmStmts,		-- assignments (regs+stack)
                           [GlobalReg])         -- registers used (liveness)
353
354

pushUnboxedTuple sp [] 
355
  = return (sp, noStmts, [])
356
pushUnboxedTuple sp amodes
357
358
  = do	{ dflags <- getDynFlags
        ; let	(reg_arg_amodes, stk_arg_amodes) = assignReturnRegs dflags amodes
359
                live_regs = map snd reg_arg_amodes
360
361
362
363
364
365
366
367
	
		-- separate the rest of the args into pointers and non-pointers
		(ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
		reg_arg_assts = assignToRegs reg_arg_amodes
		
	    -- push ptrs, then nonptrs, on the stack
	; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
	; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
368

369
	; returnFC (final_sp,
370
371
	  	    reg_arg_assts `plusStmts` ptr_assts `plusStmts` nptr_assts,
                    live_regs) }
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    
		  
-- -----------------------------------------------------------------------------
-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
-- we want to do things in a slightly different order to normal:
-- 
-- 		- push return address
-- 		- adjust stack pointer
-- 		- r = call(args...)
-- 		- assign regs for unboxed tuple (usually just R1 = r)
-- 		- return to continuation
-- 
-- The return address (i.e. stack frame) must be on the stack before
-- doing the call in case the call ends up in the garbage collector.
-- 
-- Sadly, the information about the continuation is lost after we push it
-- (in order to avoid pushing it again), so we end up doing a needless
-- indirect jump (ToDo).

391
ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
392
ccallReturnUnboxedTuple amodes before_jump
393
  = do 	{ eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
394

395
396
397
398
399
400
401
	-- Push a return address if necessary
	; pushReturnAddress eob
	; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
	    (do	{ adjustSpAndHp args_sp
		; before_jump
  		; returnUnboxedTuple amodes })
    }
402

403
404
-- -----------------------------------------------------------------------------
-- Calling an out-of-line primop
405

406
tailCallPrimOp :: PrimOp -> [StgArg] -> Code
407
408
409
410
411
412
413
414
415
tailCallPrimOp op
 = tailCallPrim (mkRtsPrimOpLabel op)

tailCallPrimCall :: PrimCall -> [StgArg] -> Code
tailCallPrimCall primcall
 = tailCallPrim (mkPrimCallLabel primcall)

tailCallPrim :: CLabel -> [StgArg] -> Code
tailCallPrim lbl args
416
417
418
419
 = do	{	-- We're going to perform a normal-looking tail call, 
		-- except that *all* the arguments will be in registers.
		-- Hence the ASSERT( null leftovers )
	  arg_amodes <- getArgAmodes args
dias@eecs.tufts.edu's avatar
dias@eecs.tufts.edu committed
420
	; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
421
422
              live_regs = Just $ map snd arg_regs
	      jump_to_primop = jumpToLbl lbl live_regs
423

424
425
	; ASSERT(null leftovers) -- no stack-resident args
 	  emitSimultaneously (assignToRegs arg_regs)
426

427
	; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
428
	; doFinalJump args_sp False jump_to_primop }
429
430
431
432

-- -----------------------------------------------------------------------------
-- Return Addresses

433
-- We always push the return address just before performing a tail call
434
435
436
437
438
439
440
441
442
443
444
-- or return.  The reason we leave it until then is because the stack
-- slot that the return address is to go into might contain something
-- useful.
-- 
-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
-- case expression and the return address is still to be pushed.
-- 
-- There are cases where it doesn't look necessary to push the return
-- address: for example, just before doing a return to a known
-- continuation.  However, the continuation will expect to find the
-- return address on the stack in case it needs to do a heap check.
445
446

pushReturnAddress :: EndOfBlockInfo -> Code
447

Ian Lynagh's avatar
Ian Lynagh committed
448
pushReturnAddress (EndOfBlockInfo args_sp (CaseAlts lbl _ _))
449
450
  = do	{ sp_rel <- getSpRelOffset args_sp
	; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
451

452
pushReturnAddress _ = nopC
453
454
455
456

-- -----------------------------------------------------------------------------
-- Misc.

457
-- Passes no argument to the destination procedure
458
459
jumpToLbl :: CLabel -> Maybe [GlobalReg] -> Code
jumpToLbl lbl live = stmtC $ CmmJump (CmmLit $ CmmLabel lbl) live
460

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
assignToRegs reg_args 
  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
	    | (expr, reg_id) <- reg_args ] 
\end{code}


%************************************************************************
%*									*
\subsection[CgStackery-adjust]{Adjusting the stack pointers}
%*									*
%************************************************************************

This function adjusts the stack and heap pointers just before a tail
call or return.  The stack pointer is adjusted to its final position
(i.e. to point to the last argument for a tail call, or the activation
record for a return).  The heap pointer may be moved backwards, in
cases where we overallocated at the beginning of the basic block (see
CgCase.lhs for discussion).

These functions {\em do not} deal with high-water-mark adjustment.
That's done by functions which allocate stack space.

\begin{code}
adjustSpAndHp :: VirtualSpOffset 	-- New offset for Arg stack ptr
	      -> Code
adjustSpAndHp newRealSp 
  = do	{ -- Adjust stack, if necessary.
	  -- NB: the conditional on the monad-carried realSp
	  --     is out of line (via codeOnly), to avoid a black hole
	; new_sp <- getSpRelOffset newRealSp
	; checkedAbsC (CmmAssign spReg new_sp)	-- Will generate no code in the case
	; setRealSp newRealSp			-- where realSp==newRealSp

	  -- Adjust heap.  The virtual heap pointer may be less than the real Hp
	  -- because the latter was advanced to deal with the worst-case branch
	  -- of the code, and we may be in a better-case branch.  In that case,
 	  -- move the real Hp *back* and retract some ticky allocation count.
	; hp_usg <- getHpUsage
	; let rHp = realHp hp_usg
	      vHp = virtHp hp_usg
	; new_hp <- getHpRelOffset vHp
	; checkedAbsC (CmmAssign hpReg new_hp)	-- Generates nothing when vHp==rHp
	; tickyAllocHeap (vHp - rHp)		-- ...ditto
	; setRealHp vHp
	}
507
\end{code}
508