StgCmmExpr.hs 24.8 KB
Newer Older
1
2
3
4
5
6
7
8
-----------------------------------------------------------------------------
--
-- Stg to C-- code generation: expressions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

Ian Lynagh's avatar
Ian Lynagh committed
9
10
11
12
13
14
15
{-# OPTIONS -fno-warn-tabs #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and
-- detab the module (please do the detabbing in a separate patch). See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
-- for details

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
module StgCmmExpr ( cgExpr ) where

#define FAST_STRING_NOT_NEEDED
#include "HsVersions.h"

import {-# SOURCE #-} StgCmmBind ( cgBind )

import StgCmmMonad
import StgCmmHeap
import StgCmmEnv
import StgCmmCon
import StgCmmProf
import StgCmmLayout
import StgCmmPrim
import StgCmmHpc
import StgCmmTicky
import StgCmmUtils
import StgCmmClosure

import StgSyn

37
import MkGraph
38
import BlockId
39
import Cmm
40
41
import CoreSyn
import DataCon
42
import ForeignCall
43
import Id
44
import PrimOp
45
import TyCon
46
import Type
47
import CostCentre	( CostCentreStack, currentCCS )
48
import Control.Monad (when)
49
50
51
52
import Maybes
import Util
import FastString
import Outputable
53
import UniqSupply
54
55
56
57
58

------------------------------------------------------------------------
--		cgExpr: the main function
------------------------------------------------------------------------

59
cgExpr  :: StgExpr -> FCode ReturnKind
60
61

cgExpr (StgApp fun args)     = cgIdApp fun args
Ian Lynagh's avatar
Ian Lynagh committed
62
63
64
65
66

{- seq# a s ==> a -}
cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
  cgIdApp a []

67
68
cgExpr (StgOpApp op args ty) = cgOpApp op args ty
cgExpr (StgConApp con args)  = cgConApp con args
69
cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
70
cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr }
71
72
cgExpr (StgLit lit)       = do cmm_lit <- cgLit lit
                               emitReturn [CmmLit cmm_lit]
73

74
75
76
77
78
cgExpr (StgLet binds expr)             = do { cgBind binds;     cgExpr expr }
cgExpr (StgLetNoEscape _ _ binds expr) =
  do { us <- newUniqSupply
     ; let join_id = mkBlockId (uniqFromSupply us)
     ; cgLneBinds join_id binds
79
80
81
     ; r <- cgExpr expr
     ; emitLabel join_id
     ; return r }
82

83
84
cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
  cgCase expr bndr alt_type alts
85
86
87
88
89
90
91
92

cgExpr (StgLam {}) = panic "cgExpr: StgLam"

------------------------------------------------------------------------
--		Let no escape
------------------------------------------------------------------------

{- Generating code for a let-no-escape binding, aka join point is very
93
very similar to what we do for a case expression.  The duality is
94
95
96
97
98
99
100
101
102
103
104
105
106
between
	let-no-escape x = b
	in e
and
	case e of ... -> b

That is, the RHS of 'x' (ie 'b') will execute *later*, just like
the alternative of the case; it needs to be compiled in an environment
in which all volatile bindings are forgotten, and the free vars are
bound only to stable things like stack locations..  The 'e' part will
execute *next*, just like the scrutinee of a case. -}

-------------------------
107
108
109
110
111
112
113
114
115
116
117
118
119
120
cgLneBinds :: BlockId -> StgBinding -> FCode ()
cgLneBinds join_id (StgNonRec bndr rhs)
  = do  { local_cc <- saveCurrentCostCentre
                -- See Note [Saving the current cost centre]
        ; info <- cgLetNoEscapeRhs join_id local_cc bndr rhs 
        ; addBindC (cg_id info) info }

cgLneBinds join_id (StgRec pairs)
  = do  { local_cc <- saveCurrentCostCentre
        ; new_bindings <- fixC (\ new_bindings -> do
                { addBindsC new_bindings
                ; listFCs [ cgLetNoEscapeRhs join_id local_cc b e 
                          | (b,e) <- pairs ] })
        ; addBindsC new_bindings }
121

122

123
-------------------------
124
125
126
cgLetNoEscapeRhs
    :: BlockId          -- join point for successor of let-no-escape
    -> Maybe LocalReg	-- Saved cost centre
127
128
    -> Id
    -> StgRhs
129
130
    -> FCode CgIdInfo

131
cgLetNoEscapeRhs join_id local_cc bndr rhs =
132
133
  do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs 
     ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
134
     ; emitOutOfLine bid $ rhs_body <*> mkBranch join_id
135
136
137
     ; return info
     }

138
139
140
141
142
cgLetNoEscapeRhsBody
    :: Maybe LocalReg	-- Saved cost centre
    -> Id
    -> StgRhs
    -> FCode CgIdInfo
143
144
cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
  = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
145
cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
146
  = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
147
148
149
150
151
152
153
154
155
156
	-- For a constructor RHS we want to generate a single chunk of 
	-- code which can be jumped to from many places, which will 
	-- return the constructor. It's easy; just behave as if it 
	-- was an StgRhsClosure with a ConApp inside!

-------------------------
cgLetNoEscapeClosure
	:: Id			-- binder
	-> Maybe LocalReg	-- Slot for saved current cost centre
	-> CostCentreStack   	-- XXX: *** NOT USED *** why not?
157
	-> [NonVoid Id]		-- Args (as in \ args -> body)
158
    	-> StgExpr		-- Body (as in above)
159
	-> FCode CgIdInfo
160

161
cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
162
163
164
  = do  { arg_regs <- forkProc $ do	
		{ restoreCurrentCostCentre cc_slot
		; arg_regs <- bindArgsToRegs args
165
                ; _ <- altHeapCheck arg_regs (cgExpr body)
166
167
168
			-- Using altHeapCheck just reduces
			-- instructions to save on stack
		; return arg_regs }
169
	; return $ lneIdInfo bndr arg_regs}
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281


------------------------------------------------------------------------
--		Case expressions
------------------------------------------------------------------------

{- Note [Compiling case expressions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It is quite interesting to decide whether to put a heap-check at the
start of each alternative.  Of course we certainly have to do so if
the case forces an evaluation, or if there is a primitive op which can
trigger GC.

A more interesting situation is this (a Plan-B situation)

	!P!;
	...P...
	case x# of
	  0#      -> !Q!; ...Q...
	  default -> !R!; ...R...

where !x! indicates a possible heap-check point. The heap checks
in the alternatives *can* be omitted, in which case the topmost
heapcheck will take their worst case into account.

In favour of omitting !Q!, !R!:

 - *May* save a heap overflow test,
   if ...P... allocates anything.  

 - We can use relative addressing from a single Hp to 
   get at all the closures so allocated.

 - No need to save volatile vars etc across heap checks
   in !Q!, !R!

Against omitting !Q!, !R!

  - May put a heap-check into the inner loop.  Suppose 
	the main loop is P -> R -> P -> R...
	Q is the loop exit, and only it does allocation.
    This only hurts us if P does no allocation.  If P allocates,
    then there is a heap check in the inner loop anyway.

  - May do more allocation than reqd.  This sometimes bites us
    badly.  For example, nfib (ha!) allocates about 30\% more space if the
    worst-casing is done, because many many calls to nfib are leaf calls
    which don't need to allocate anything. 

    We can un-allocate, but that costs an instruction

Neither problem hurts us if there is only one alternative.

Suppose the inner loop is P->R->P->R etc.  Then here is
how many heap checks we get in the *inner loop* under various
conditions

  Alooc	  Heap check in branches (!Q!, !R!)?
  P Q R	     yes     no (absorb to !P!)
--------------------------------------
  n n n	     0		0
  n y n	     0		1
  n . y	     1		1
  y . y	     2		1
  y . n	     1		1

Best choices: absorb heap checks from Q and R into !P! iff
  a) P itself does some allocation
or
  b) P does allocation, or there is exactly one alternative

We adopt (b) because that is more likely to put the heap check at the
entry to a function, when not many things are live.  After a bunch of
single-branch cases, we may have lots of things live

Hence: two basic plans for

	case e of r { alts }

------ Plan A: the general case ---------

	...save current cost centre...

	...code for e, 
	   with sequel (SetLocals r)

        ...restore current cost centre...
	...code for alts...
	...alts do their own heap checks

------ Plan B: special case when ---------
  (i)  e does not allocate or call GC
  (ii) either upstream code performs allocation
       or there is just one alternative

  Then heap allocation in the (single) case branch
  is absorbed by the upstream check.
  Very common example: primops on unboxed values

	...code for e,
	   with sequel (SetLocals r)...

	...code for alts...
	...no heap check...
-}



-------------------------------------
data GcPlan
  = GcInAlts 		-- Put a GC check at the start the case alternatives,
	[LocalReg] 	-- which binds these registers
282
  | NoGcInAlts          -- The scrutinee is a primitive value, or a call to a
283
284
285
286
			-- primitive op which does no GC.  Absorb the allocation
			-- of the case alternative(s) into the upstream check

-------------------------------------
287
cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
288

289
cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
290
291
292
293
294
295
296
297
298
299
  | isEnumerationTyCon tycon -- Note [case on bool]
  = do { tag_expr <- do_enum_primop op args

       -- If the binder is not dead, convert the tag to a constructor
       -- and assign it.
       ; when (not (isDeadBinder bndr)) $ do
            { tmp_reg <- bindArgToReg (NonVoid bndr)
            ; emitAssign (CmmLocal tmp_reg)
                         (tagToClosure tycon tag_expr) }

300
       ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
301
                                              (NonVoid bndr) alts
302
       ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
303
       ; return AssignedDirectly
304
305
306
307
308
309
310
311
312
313
       }
  where
    do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
    do_enum_primop TagToEnumOp [arg]  -- No code!
      = getArgAmode (NonVoid arg)
    do_enum_primop primop args
      = do tmp <- newTemp bWord
           cgPrimOp [tmp] primop args
           return (CmmReg (CmmLocal tmp))

314
{-
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
Note [case on bool]

This special case handles code like

  case a <# b of
    True ->
    False ->

If we let the ordinary case code handle it, we'll get something like

 tmp1 = a < b
 tmp2 = Bool_closure_tbl[tmp1]
 if (tmp2 & 7 != 0) then ... // normal tagged case

but this junk won't optimise away.  What we really want is just an
inline comparison:

 if (a < b) then ...

So we add a special case to generate

 tmp1 = a < b
 if (tmp1 == 0) then ...

and later optimisations will further improve this.

We should really change all these primops to return Int# instead, that
would make this special case go away.
343
-}
344

345

346
347
348
349
350
351
352
353
354
355
  -- Note [ticket #3132]: we might be looking at a case of a lifted Id
  -- that was cast to an unlifted type.  The Id will always be bottom,
  -- but we don't want the code generator to fall over here.  If we
  -- just emit an assignment here, the assignment will be
  -- type-incorrect Cmm.  Hence, we emit the usual enter/return code,
  -- (and because bottom must be untagged, it will be entered and the
  -- program will crash).
  -- The Sequel is a type-correct assignment, albeit bogus.
  -- The (dead) continuation loops; it would be better to invoke some kind
  -- of panic function here.
356
357
358
359
360
361
362
363
364
  --
  -- However, we also want to allow an assignment to be generated
  -- in the case when the types are compatible, because this allows
  -- some slightly-dodgy but occasionally-useful casts to be used,
  -- such as in RtClosureInspect where we cast an HValue to a MutVar#
  -- so we can print out the contents of the MutVar#.  If we generate
  -- code that enters the HValue, then we'll get a runtime panic, because
  -- the HValue really is a MutVar#.  The types are compatible though,
  -- so we can just generate an assignment.
365
cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
366
367
  | isUnLiftedType (idType v)
  || reps_compatible
368
369
370
371
  = -- assignment suffices for unlifted types
    do { when (not reps_compatible) $
           panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
       ; v_info <- getCgIdInfo v
372
       ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)
373
       ; _ <- bindArgsToRegs [NonVoid bndr]
374
       ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
375
  where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
376
    reps_compatible = idPrimRep v == idPrimRep bndr
377

378
cgCase scrut@(StgApp v []) _ (PrimAlt _) _
379
  = -- fail at run-time, not compile-time
380
    do { mb_cc <- maybeSaveCostCentre True
381
       ; _ <- withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)
382
       ; restoreCurrentCostCentre mb_cc
383
384
385
386
       ; emitComment $ mkFastString "should be unreachable code"
       ; l <- newLabelC
       ; emitLabel l
       ; emit (mkBranch l)
387
       ; return AssignedDirectly
388
       }
389
390
391
392
393
394
395
396
397
398
399
400
{-
case seq# a s of v
  (# s', a' #) -> e

==>

case a of v
  (# s', a' #) -> e

(taking advantage of the fact that the return convention for (# State#, a #)
is the same as the return convention for just 'a')
-}
401

402
cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
403
  = -- handle seq#, same return convention as vanilla 'a'.
404
    cgCase (StgApp a []) bndr alt_type alts
405

406
cgCase scrut bndr alt_type alts
407
408
  = -- the general case
    do { up_hp_usg <- getVirtHp        -- Upstream heap usage
409
410
411
       ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
             alt_regs  = map idToReg ret_bndrs
             simple_scrut = isSimpleScrut scrut alt_type
412
413
414
415
416
             do_gc  | not simple_scrut = True
                    | isSingleton alts = False
                    | up_hp_usg > 0    = False
                    | otherwise        = True
             gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
417
418

       ; mb_cc <- maybeSaveCostCentre simple_scrut
419

420
421
422
423
424
425
426
       -- if do_gc then our sequel will be ReturnTo
       --   - generate code for the sequel now
       --   - pass info about the sequel to cgAlts for use in the heap check
       -- else sequel will be AssignTo

       ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut)
       ; restoreCurrentCostCentre mb_cc
427
       ; _ <- bindArgsToRegs ret_bndrs
428
429
430
       ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
       }

431
432
433
434
435
436
437
438
439
440

-----------------
maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
maybeSaveCostCentre simple_scrut
  | simple_scrut = saveCurrentCostCentre
  | otherwise    = return Nothing


-----------------
isSimpleScrut :: StgExpr -> AltType -> Bool
441
442
443
444
445
446
447
-- Simple scrutinee, does not block or allocate; hence safe to amalgamate
-- heap usage from alternatives into the stuff before the case
-- NB: if you get this wrong, and claim that the expression doesn't allocate
--     when it does, you'll deeply mess up allocation
isSimpleScrut (StgOpApp op _ _) _          = isSimpleOp op
isSimpleScrut (StgLit _)       _           = True	-- case 1# of { 0# -> ..; ... }
isSimpleScrut (StgApp _ [])    (PrimAlt _) = True	-- case x# of { 0# -> ..; ... }
448
449
isSimpleScrut _		       _           = False

450
451
452
453
isSimpleOp :: StgOp -> Bool
-- True iff the op cannot block or allocate
isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe)
isSimpleOp (StgPrimOp op)      			       = not (primOpOutOfLine op)
454
isSimpleOp (StgPrimCallOp _)                           = False
455

456
-----------------
457
chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
458
459
460
461
462
463
464
465
466
467
-- These are the binders of a case that are assigned
-- by the evaluation of the scrutinee
-- Only non-void ones come back
chooseReturnBndrs bndr (PrimAlt _) _alts
  = nonVoidIds [bndr]

chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
  = nonVoidIds ids	-- 'bndr' is not assigned!

chooseReturnBndrs bndr (AlgAlt _) _alts
468
  = nonVoidIds [bndr]	-- Only 'bndr' is assigned
469
470

chooseReturnBndrs bndr PolyAlt _alts
471
  = nonVoidIds [bndr]	-- Only 'bndr' is assigned
472
473
474
475
476

chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
	-- UbxTupALt has only one alternative

-------------------------------------
477
478
cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
       -> FCode ReturnKind
479
480
-- At this point the result of the case are in the binders
cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
481
482
  = maybeAltHeapCheck gc_plan (cgExpr rhs)

483
cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
484
  = maybeAltHeapCheck gc_plan (cgExpr rhs)
485
486
487
	-- Here bndrs are *already* in scope, so don't rebind them

cgAlts gc_plan bndr (PrimAlt _) alts
488
  = do  { tagged_cmms <- cgAltRhss gc_plan bndr alts
489
490
491
492
493
494
495
496

	; let bndr_reg = CmmLocal (idToReg bndr)
	      (DEFAULT,deflt) = head tagged_cmms
		-- PrimAlts always have a DEFAULT case
		-- and it always comes first

	      tagged_cmms' = [(lit,code) 
			     | (LitAlt lit, code) <- tagged_cmms]
497
498
        ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
        ; return AssignedDirectly }
499
500

cgAlts gc_plan bndr (AlgAlt tycon) alts
501
  = do  { (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
502

503
504
505
506
	; let fam_sz   = tyConFamilySize tycon
	      bndr_reg = CmmLocal (idToReg bndr)

                    -- Is the constructor tag in the node reg?
507
        ; if isSmallFamily fam_sz
508
509
          then do
                let   -- Yes, bndr_reg has constr. tag in ls bits
510
511
                   tag_expr = cmmConstrTag1 (CmmReg bndr_reg)
                   branches' = [(tag+1,branch) | (tag,branch) <- branches]
512
513
                emitSwitch tag_expr branches' mb_deflt 1 fam_sz
                return AssignedDirectly
514
515

	   else 	-- No, get tag from info table
516
517
518
519
520
                do dflags <- getDynFlags
                   let -- Note that ptr _always_ has tag 1
                       -- when the family size is big enough
                       untagged_ptr = cmmRegOffB bndr_reg (-1)
                       tag_expr = getConstrTag dflags (untagged_ptr)
521
522
                   emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
                   return AssignedDirectly }
523
524
525
526

cgAlts _ _ _ _ = panic "cgAlts"
	-- UbxTupAlt and PolyAlt have only one alternative

527

528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
-- Note [alg-alt heap check]
--
-- In an algebraic case with more than one alternative, we will have
-- code like
--
-- L0:
--   x = R1
--   goto L1
-- L1:
--   if (x & 7 >= 2) then goto L2 else goto L3
-- L2:
--   Hp = Hp + 16
--   if (Hp > HpLim) then goto L4
--   ...
-- L4:
--   call gc() returns to L5
-- L5:
--   x = R1
--   goto L1

548
-------------------
549
cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
550
551
             -> FCode ( Maybe CmmAGraph
                      , [(ConTagZ, CmmAGraph)] )
552
553
cgAlgAltRhss gc_plan bndr alts
  = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
554
555
556
557
558
559
560
561
562
563
564
565
566
567

       ; let { mb_deflt = case tagged_cmms of
                           ((DEFAULT,rhs) : _) -> Just rhs
                           _other              -> Nothing
                            -- DEFAULT is always first, if present

              ; branches = [ (dataConTagZ con, cmm)
                           | (DataAlt con, cmm) <- tagged_cmms ]
              }

       ; return (mb_deflt, branches)
       }


568
-------------------
569
cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
570
          -> FCode [(AltCon, CmmAGraph)]
571
cgAltRhss gc_plan bndr alts
572
573
574
575
576
577
  = forkAlts (map cg_alt alts)
  where
    base_reg = idToReg bndr
    cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
    cg_alt (con, bndrs, _uses, rhs)
      = getCodeR		  $
578
        maybeAltHeapCheck gc_plan $
579
	do { _ <- bindConArgs con base_reg bndrs
580
581
           ; _ <- cgExpr rhs
           ; return con }
582

583
584
585
586
587
588
maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
maybeAltHeapCheck (NoGcInAlts,_)  code = code
maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
  altHeapCheck regs code
maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
  altHeapCheckReturnsTo regs lret off code
589
590
591
592
593

-----------------------------------------------------------------------------
-- 	Tail calls
-----------------------------------------------------------------------------

594
cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
595
cgConApp con stg_args
596
597
598
599
600
601
  | isUnboxedTupleCon con	-- Unboxed tuple: assign and return
  = do { arg_exprs <- getNonVoidArgAmodes stg_args
       ; tickyUnboxedTupleReturn (length arg_exprs)
       ; emitReturn arg_exprs }

  | otherwise	--  Boxed constructors; allocate and return
602
  = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
603
    do	{ (idinfo, init) <- buildDynCon (dataConWorkId con) currentCCS con stg_args
604
605
606
	   	-- The first "con" says that the name bound to this closure is
		-- is "con", which is a bit of a fudge, but it only affects profiling

607
        ; emit init
608
609
	; emitReturn [idInfoToAmode idinfo] }

610

611
cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
612
cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
613
614
cgIdApp fun_id args
  = do 	{ fun_info <- getCgIdInfo fun_id
615
616
617
        ; case maybeLetNoEscape fun_info of
            Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args
            Nothing -> cgTailCall fun_id fun_info args }
618

619
cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind
620
cgLneJump blk_id lne_regs args	-- Join point; discard sequel
621
622
  = do  { adjustHpBackwards -- always do this before a tail-call
        ; cmm_args <- getNonVoidArgAmodes args
623
        ; emitMultiAssign lne_regs cmm_args
624
625
        ; emit (mkBranch blk_id)
        ; return AssignedDirectly }
626
    
627
cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind
628
629
630
cgTailCall fun_id fun_info args = do
    dflags <- getDynFlags
    case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
631

632
	    -- A value in WHNF, so we can just return it.
633
634
635
      	ReturnIt -> emitReturn [fun]	-- ToDo: does ReturnIt guarantee tagged?
    
      	EnterIt -> ASSERT( null args )	-- Discarding arguments
Simon Marlow's avatar
Simon Marlow committed
636
637
638
                   emitEnter fun

        SlowCall -> do      -- A slow function call via the RTS apply routines
639
      		{ tickySlowCall lf_info args
640
                ; emitComment $ mkFastString "slowCall"
641
642
643
644
645
      		; slowCall fun args }
    
      	-- A direct function call (possibly with some left-over arguments)
      	DirectEntry lbl arity -> do
		{ tickyDirectCall arity args
646
                ; if node_points dflags
647
648
                     then directCall NativeNodeCall   lbl arity (fun_arg:args)
                     else directCall NativeDirectCall lbl arity args }
649
650
651
652

	JumpToIt {} -> panic "cgTailCall"	-- ???

  where
653
654
    fun_arg     = StgVarArg fun_id
    fun_name    = idName            fun_id
655
656
    fun         = idInfoToAmode     fun_info
    lf_info     = cgIdInfoLF        fun_info
657
    node_points dflags = nodeMustPointToIt dflags lf_info
658
659


660
emitEnter :: CmmExpr -> FCode ReturnKind
Simon Marlow's avatar
Simon Marlow committed
661
emitEnter fun = do
662
663
  { dflags <- getDynFlags
  ; adjustHpBackwards
Simon Marlow's avatar
Simon Marlow committed
664
665
666
667
668
669
670
671
672
673
674
675
  ; sequel <- getSequel
  ; updfr_off <- getUpdFrameOff
  ; case sequel of
      -- For a return, we have the option of generating a tag-test or
      -- not.  If the value is tagged, we can return directly, which
      -- is quicker than entering the value.  This is a code
      -- size/speed trade-off: when optimising for speed rather than
      -- size we could generate the tag test.
      --
      -- Right now, we do what the old codegen did, and omit the tag
      -- test, just generating an enter.
      Return _ -> do
676
        { let entry = entryCode dflags $ closureInfoPtr $ CmmReg nodeReg
677
        ; emit $ mkForeignJump dflags NativeNodeCall entry
Simon Marlow's avatar
Simon Marlow committed
678
                    [cmmUntag fun] updfr_off
679
        ; return AssignedDirectly
Simon Marlow's avatar
Simon Marlow committed
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
        }

      -- The result will be scrutinised in the sequel.  This is where
      -- we generate a tag-test to avoid entering the closure if
      -- possible.
      --
      -- The generated code will be something like this:
      --
      --    R1 = fun  -- copyout
      --    if (fun & 7 != 0) goto Lcall else goto Lret
      --  Lcall:
      --    call [fun] returns to Lret
      --  Lret:
      --    fun' = R1  -- copyin
      --    ...
      --
      -- Note in particular that the label Lret is used as a
      -- destination by both the tag-test and the call.  This is
      -- becase Lret will necessarily be a proc-point, and we want to
      -- ensure that we generate only one proc-point for this
      -- sequence.
      --
702
703
704
705
706
      -- Furthermore, we tell the caller that we generated a native
      -- return continuation by returning (ReturnedTo Lret off), so
      -- that the continuation can be reused by the heap-check failure
      -- code in the enclosing case expression.
      --
Simon Marlow's avatar
Simon Marlow committed
707
708
      AssignTo res_regs _ -> do
       { lret <- newLabelC
709
       ; let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs
Simon Marlow's avatar
Simon Marlow committed
710
       ; lcall <- newLabelC
711
       ; updfr_off <- getUpdFrameOff
Simon Marlow's avatar
Simon Marlow committed
712
       ; let area = Young lret
713
       ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
Simon Marlow's avatar
Simon Marlow committed
714
                                          [fun] updfr_off (0,[])
715
716
717
         -- refer to fun via nodeReg after the copyout, to avoid having
         -- both live simultaneously; this sometimes enables fun to be
         -- inlined in the RHS of the R1 assignment.
718
       ; let entry = entryCode dflags (closureInfoPtr (CmmReg nodeReg))
719
             the_call = toCall entry (Just lret) updfr_off off outArgs regs
Simon Marlow's avatar
Simon Marlow committed
720
721
       ; emit $
           copyout <*>
722
           mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*>
Simon Marlow's avatar
Simon Marlow committed
723
724
725
           outOfLine lcall the_call <*>
           mkLabel lret <*>
           copyin
726
       ; return (ReturnedTo lret off)
Simon Marlow's avatar
Simon Marlow committed
727
728
       }
  }