StgCmmHeap.hs 24.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
-----------------------------------------------------------------------------
--
-- Stg to C--: heap management functions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmHeap (
10
11
        getVirtHp, setVirtHp, setRealHp,
        getHpRelOffset, hpRel,
12

13
        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14

15
16
        mkVirtHeapOffsets, mkVirtConstrOffsets,
        mkStaticClosureFields, mkStaticClosure,
17

18
        allocDynClosure, allocDynClosureCmm,
19
        emitSetDynHdr
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    ) where

#include "HsVersions.h"

import StgSyn
import CLabel
import StgCmmLayout
import StgCmmUtils
import StgCmmMonad
import StgCmmProf
import StgCmmTicky
import StgCmmGran
import StgCmmClosure
import StgCmmEnv

35
import MkGraph
36

37
import Hoopl
38
import SMRep
39
import Cmm
40
41
42
import CmmUtils
import CostCentre
import Outputable
43
import IdInfo( CafInfo(..), mayHaveCafRefs )
44
import Module
45
import DynFlags
46
import FastString( mkFastString, fsLit )
47
import Util
48

49
50
import Control.Monad (when)

51
-----------------------------------------------------------
52
--              Initialise dynamic heap objects
53
54
55
-----------------------------------------------------------

allocDynClosure
Simon Marlow's avatar
Simon Marlow committed
56
57
        :: CmmInfoTable
        -> LambdaFormInfo
58
59
60
61
62
63
64
        -> CmmExpr              -- Cost Centre to stick in the object
        -> CmmExpr              -- Cost Centre to blame for this alloc
                                -- (usually the same; sometimes "OVERHEAD")

        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
                                                -- ie Info ptr has offset zero.
                                                -- No void args in here
65
        -> FCode CmmExpr -- returns Hp+n
66

67
68
69
70
71
allocDynClosureCmm
        :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
        -> [(CmmExpr, VirtualHpOffset)]
        -> FCode CmmExpr -- returns Hp+n

72
-- allocDynClosure allocates the thing in the heap,
73
-- and modifies the virtual Hp to account for this.
74
75
76
-- The second return value is the graph that sets the value of the
-- returned LocalReg, which should point to the closure after executing
-- the graph.
77

78
79
80
81
82
83
84
85
86
87
-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
-- only valid until Hp is changed.  The caller should assign the
-- result to a LocalReg if it is required to remain live.
--
-- The reason we don't assign it to a LocalReg here is that the caller
-- is often about to call regIdInfo, which immediately assigns the
-- result of allocDynClosure to a new temp in order to add the tag.
-- So by not generating a LocalReg here we avoid a common source of
-- new temporaries and save some compile time.  This can be quite
-- significant - see test T4801.
88
89


Simon Marlow's avatar
Simon Marlow committed
90
allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets
91
92
  = do  { let (args, offsets) = unzip args_w_offsets
        ; cmm_args <- mapM getArgAmode args     -- No void args
93
        ; allocDynClosureCmm info_tbl lf_info
Simon Marlow's avatar
Simon Marlow committed
94
                             use_cc _blame_cc (zip cmm_args offsets)
95
96
        }

Simon Marlow's avatar
Simon Marlow committed
97
allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
98
99
100
  = do  { virt_hp <- getVirtHp

        -- SAY WHAT WE ARE ABOUT TO DO
Simon Marlow's avatar
Simon Marlow committed
101
102
103
        ; let rep = cit_rep info_tbl
        ; tickyDynAlloc rep lf_info
        ; profDynAlloc rep use_cc
104
105
106
107
108
109
110
111

        -- FIND THE OFFSET OF THE INFO-PTR WORD
        ; let   info_offset = virt_hp + 1
                -- info_offset is the VirtualHpOffset of the first
                -- word of the new object
                -- Remember, virtHp points to last allocated word,
                -- ie 1 *before* the info-ptr word of new object.

Simon Marlow's avatar
Simon Marlow committed
112
                info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
113
114
115

        -- ALLOCATE THE OBJECT
        ; base <- getHpRelOffset info_offset
116
        ; emitComment $ mkFastString "allocDynClosure"
117
118
119
120
121
        ; emitSetDynHdr base info_ptr  use_cc
        ; let (cmm_args, offsets) = unzip amodes_w_offsets
        ; hpStore base cmm_args offsets

        -- BUMP THE VIRTUAL HEAP POINTER
122
123
        ; dflags <- getDynFlags
        ; setVirtHp (virt_hp + heapClosureSize dflags rep)
124

125
126
        ; getHpRelOffset info_offset
        }
127
128

emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
129
emitSetDynHdr base info_ptr ccs
130
131
  = do dflags <- getDynFlags
       hpStore base (header dflags) [0..]
132
  where
133
134
    header :: DynFlags -> [CmmExpr]
    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
135
136
137
        -- ToDo: Gransim stuff
        -- ToDo: Parallel stuff
        -- No ticky header
138
139
140
141

hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
-- Store the item (expr,off) in base[off]
hpStore base vals offs
142
143
144
  = do dflags <- getDynFlags
       let mk_store val off = mkStore (cmmOffsetW dflags base off) val
       emit (catAGraphs (zipWith mk_store vals offs))
145
146
147


-----------------------------------------------------------
148
--              Layout of static closures
149
150
151
152
153
-----------------------------------------------------------

-- Make a static closure, adding on any extra padding needed for CAFs,
-- and adding a static link field if necessary.

154
mkStaticClosureFields
155
156
        :: DynFlags
        -> CmmInfoTable
157
        -> CostCentreStack
158
        -> CafInfo
159
160
        -> [CmmLit]             -- Payload
        -> [CmmLit]             -- The full closure
161
162
mkStaticClosureFields dflags info_tbl ccs caf_refs payload
  = mkStaticClosure dflags info_lbl ccs payload padding
163
        static_link_field saved_info_field
164
  where
Simon Marlow's avatar
Simon Marlow committed
165
    info_lbl = cit_lbl info_tbl
166
167
168
169
170
171
172
173
174

    -- CAFs must have consistent layout, regardless of whether they
    -- are actually updatable or not.  The layout of a CAF is:
    --
    --        3 saved_info
    --        2 static_link
    --        1 indirectee
    --        0 info ptr
    --
Simon Marlow's avatar
Simon Marlow committed
175
176
177
    -- the static_link and saved_info fields must always be in the
    -- same place.  So we use isThunkRep rather than closureUpdReqd
    -- here:
178

Simon Marlow's avatar
Simon Marlow committed
179
    is_caf = isThunkRep (cit_rep info_tbl)
180

181
182
    padding
        | not is_caf = []
183
        | otherwise  = ASSERT(null payload) [mkIntCLit dflags 0]
184
185

    static_link_field
186
        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
Simon Marlow's avatar
Simon Marlow committed
187
188
189
        = [static_link_value]
        | otherwise
        = []
190
191

    saved_info_field
192
        | is_caf     = [mkIntCLit dflags 0]
193
        | otherwise  = []
194

195
        -- For a static constructor which has NoCafRefs, we set the
196
197
        -- static link field to a non-zero value so the garbage
        -- collector will ignore it.
198
    static_link_value
199
200
        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
        | otherwise                = mkIntCLit dflags 1  -- No CAF refs
201
202


203
mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
204
  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
205
mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
206
207
  =  [CmmLabel info_lbl]
  ++ variable_header_words
208
  ++ concatMap (padLitToWord dflags) payload
209
  ++ padding
210
211
212
213
  ++ static_link_field
  ++ saved_info_field
  where
    variable_header_words
214
215
        =  staticGranHdr
        ++ staticParHdr
216
        ++ staticProfHdr dflags ccs
217
        ++ staticTickyHdr
218

219
220
-- JD: Simon had ellided this padding, but without it the C back end asserts
-- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
221
222
223
padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
padLitToWord dflags lit = lit : padding pad_length
  where width = typeWidth (cmmLitType dflags lit)
224
        pad_length = wORD_SIZE dflags - widthInBytes width :: Int
225
226
227
228
229
230
231

        padding n | n <= 0 = []
                  | n `rem` 2 /= 0 = CmmInt 0 W8  : padding (n-1)
                  | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
                  | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
                  | otherwise      = CmmInt 0 W64 : padding (n-8)

232
-----------------------------------------------------------
233
--              Heap overflow checking
234
235
236
237
238
239
240
241
242
243
244
245
-----------------------------------------------------------

{- Note [Heap checks]
   ~~~~~~~~~~~~~~~~~~
Heap checks come in various forms.  We provide the following entry
points to the runtime system, all of which use the native C-- entry
convention.

  * gc() performs garbage collection and returns
    nothing to its caller

  * A series of canned entry points like
246
        r = gc_1p( r )
247
248
    where r is a pointer.  This performs gc, and
    then returns its argument r to its caller.
249

250
  * A series of canned entry points like
251
        gcfun_2p( f, x, y )
252
253
254
255
256
257
258
259
260
    where f is a function closure of arity 2
    This performs garbage collection, keeping alive the
    three argument ptrs, and then tail-calls f(x,y)

These are used in the following circumstances

* entryHeapCheck: Function entry
    (a) With a canned GC entry sequence
        f( f_clo, x:ptr, y:ptr ) {
261
262
263
             Hp = Hp+8
             if Hp > HpLim goto L
             ...
264
265
266
          L: HpAlloc = 8
             jump gcfun_2p( f_clo, x, y ) }
     Note the tail call to the garbage collector;
267
     it should do no register shuffling
268
269
270

    (b) No canned sequence
        f( f_clo, x:ptr, y:ptr, ...etc... ) {
271
272
273
          T: Hp = Hp+8
             if Hp > HpLim goto L
             ...
274
          L: HpAlloc = 8
275
276
             call gc()  -- Needs an info table
             goto T }
277
278

* altHeapCheck: Immediately following an eval
279
280
  Started as
        case f x y of r { (p,q) -> rhs }
281
282
283
  (a) With a canned sequence for the results of f
       (which is the very common case since
       all boxed cases return just one pointer
284
285
286
287
288
289
           ...
           r = f( x, y )
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
290

291
292
        L: r = gc_1p( r )
           goto K }
293

294
295
296
297
        Here, the info table needed by the call
        to gc_1p should be the *same* as the
        one for the call to f; the C-- optimiser
        spots this sharing opportunity)
298
299
300

   (b) No canned sequence for results of f
       Note second info table
301
302
303
304
305
306
           ...
           (r1,r2,r3) = call f( x, y )
        K:
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
307

308
309
        L: call gc()    -- Extra info table here
           goto K
310
311
312

* generalHeapCheck: Anywhere else
  e.g. entry to thunk
313
       case branch *not* following eval,
314
315
316
       or let-no-escape
  Exactly the same as the previous case:

317
318
319
320
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...
321

322
323
        L: call gc()
           goto K
324
325
326
327
328
-}

--------------------------------------------------------------
-- A heap/stack check at a function or thunk entry point.

329
330
331
332
333
334
entryHeapCheck :: ClosureInfo
               -> Maybe LocalReg -- Function (closure environment)
               -> Int            -- Arity -- not same as len args b/c of voids
               -> [LocalReg]     -- Non-void args (empty for thunk)
               -> FCode ()
               -> FCode ()
335

336
entryHeapCheck cl_info nodeSet arity args code
337
338
  = do dflags <- getDynFlags
       let is_thunk = arity == 0
339
340
341
342
343
           is_fastf = case closureFunInfo cl_info of
                           Just (_, ArgGen _) -> False
                           _otherwise         -> True

           args' = map (CmmReg . CmmLocal) args
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
           node = case nodeSet of
                      Just r  -> CmmReg (CmmLocal r)
                      Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)

           {- Thunks:          jump stg_gc_enter_1

              Function (fast): call (NativeNode) stg_gc_fun(fun, args)

              Function (slow): R1 = fun
                               call (slow) stg_gc_fun(args)
               XXX: this is a bit naughty, we should really pass R1 as an
               argument and use a special calling convention.
           -}
           gc_call upd
               | is_thunk
                 = mkJump dflags stg_gc_enter1 [node] upd

               | is_fastf
                 = mkJump dflags stg_gc_fun (node : args') upd

               | otherwise
                 = mkAssign nodeReg node <*>
                   mkForeignJump dflags Slow stg_gc_fun args' upd
369
370

       updfr_sz <- getUpdFrameOff
371
372
373

       loop_id <- newLabelC
       emitLabel loop_id
374
       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
375
376
377
378
379
380
381
382
383

{-
    -- This code is slightly outdated now and we could easily keep the above
    -- GC methods. However, there may be some performance gains to be made by
    -- using more specialised GC entry points. Since the semi generic GCFun
    -- entry needs to check the node and figure out what registers to save...
    -- if we provided and used more specialised GC entry points then these
    -- runtime decisions could be turned into compile time decisions.

384
385
    args'     = case fun of Just f  -> f : args
                            Nothing -> args
386
    arg_exprs = map (CmmReg . CmmLocal) args'
387
    gc_call updfr_sz
388
        | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
389
390
391
392
393
394
        | otherwise =
            case gc_lbl args' of
                Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
                            -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
                            --         arg_exprs updfr_sz
                Nothing  -> mkCall generic_gc (GC, GC) [] [] updfr_sz
395

396
    gc_lbl :: [LocalReg] -> Maybe FastString
397
    gc_lbl [reg]
398
399
400
401
402
        | isGcPtrType ty  = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
        | isFloatType ty  = case width of
                              W32 -> Just (sLit "stg_gc_f1")
                              W64 -> Just (sLit "stg_gc_d1")
                              _other -> Nothing
403
404
405
        | width == wordWidth dflags = Just (mkGcLabel "stg_gc_unbx_r1")
        | width == W64              = Just (mkGcLabel "stg_gc_l1")
        | otherwise                 = Nothing
406
407
408
        where
          ty = localRegType reg
          width = typeWidth ty
409
410
411

    gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)

412
    gc_lbl_ptrs :: [Bool] -> Maybe FastString
413
    -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
414
415
416
    --gc_lbl_ptrs [True,True]      = Just (sLit "stg_gc_fun_2p")
    --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
    gc_lbl_ptrs _ = Nothing
417
418
419
-}


420
421
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
422

423
424
425
426
427
428
429
430
431
432
433
434
435
436
437

-- If there are multiple alts and we need to GC, but don't have a
-- continuation already (the scrut was simple), then we should
-- pre-generate the continuation.  (if there are multiple alts it is
-- always a canned GC point).

-- altHeapCheck:
-- If we have a return continuation,
--   then if it is a canned GC pattern,
--           then we do mkJumpReturnsTo
--           else we do a normal call to stg_gc_noregs
--   else if it is a canned GC pattern,
--           then generate the continuation and do mkCallReturnsTo
--           else we do a normal call to stg_gc_noregs

438
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
439
440
441
442
altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code

altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
altOrNoEscapeHeapCheck checkYield regs code = do
443
444
    dflags <- getDynFlags
    case cannedGCEntryPoint dflags regs of
445
      Nothing -> genericGC checkYield code
446
447
      Just gc -> do
        lret <- newLabelC
448
        let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs
449
450
451
        lcont <- newLabelC
        emitOutOfLine lret (copyin <*> mkBranch lcont)
        emitLabel lcont
452
        cannedGCReturnsTo checkYield False gc regs lret off code
453
454
455

altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
altHeapCheckReturnsTo regs lret off code
456
457
  = do dflags <- getDynFlags
       case cannedGCEntryPoint dflags regs of
458
459
460
461
462
463
464
465
           Nothing -> genericGC False code
           Just gc -> cannedGCReturnsTo False True gc regs lret off code

-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
-- is more efficient), but cannot be optimized away in the non-allocating
-- case because it may occur in a loop
noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
466

467
cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
468
469
                  -> FCode a
                  -> FCode a
470
cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
471
472
  = do dflags <- getDynFlags
       updfr_sz <- getUpdFrameOff
473
       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
474
475
  where
    reg_exprs = map (CmmReg . CmmLocal) regs
476
      -- Note [stg_gc arguments]
477

478
479
480
    gc_call dflags label sp
      | cont_on_stack = mkJumpReturnsTo dflags label GC reg_exprs lret off sp
      | otherwise     = mkCallReturnsTo dflags label GC reg_exprs lret off sp (0,[])
481

482
483
genericGC :: Bool -> FCode a -> FCode a
genericGC checkYield code
484
485
486
487
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[])
488
       heapCheck False checkYield (call <*> mkBranch lretry) code
489

490
491
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
492
493
494
495
496
497
498
499
500
  = case regs of
      []  -> Just (mkGcLabel "stg_gc_noregs")
      [reg]
          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
          | isFloatType ty -> case width of
                                  W32       -> Just (mkGcLabel "stg_gc_f1")
                                  W64       -> Just (mkGcLabel "stg_gc_d1")
                                  _         -> Nothing
        
501
502
503
          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
          | otherwise                 -> Nothing
504
505
506
507
          where
              ty = localRegType reg
              width = typeWidth ty
      _otherwise -> Nothing
508

509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
-- Note [stg_gc arguments]
-- It might seem that we could avoid passing the arguments to the
-- stg_gc function, because they are already in the right registers.
-- While this is usually the case, it isn't always.  Sometimes the
-- code generator has cleverly avoided the eval in a case, e.g. in
-- ffi/should_run/4221.hs we found
--
--   case a_r1mb of z
--     FunPtr x y -> ...
--
-- where a_r1mb is bound a top-level constructor, and is known to be
-- evaluated.  The codegen just assigns x, y and z, and continues;
-- R1 is never assigned.
--
-- So we'll have to rely on optimisations to eliminatethese
-- assignments where possible.

526

527
528
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
529
generic_gc = mkGcLabel "stg_gc_noregs"
530
531

-- | Create a CLabel for calling a garbage collector entry point
532
533
mkGcLabel :: String -> CmmExpr
mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
534
535

-------------------------------
536
537
heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
heapCheck checkStack checkYield do_gc code
538
  = getHeapUsage $ \ hpHw ->
539
540
    -- Emit heap checks, but be sure to do it lazily so
    -- that the conditionals on hpHw don't cause a black hole
541
    do  { codeOnly $ do_checks checkStack checkYield hpHw do_gc
542
543
544
545
        ; tickyAllocHeap hpHw
        ; doGranAllocate hpHw
        ; setRealHp hpHw
        ; code }
546

547
do_checks :: Bool       -- Should we check the stack?
548
          -> Bool       -- Should we check for preemption?
549
550
          -> WordOff    -- Heap headroom
          -> CmmAGraph  -- What to do on failure
551
          -> FCode ()
552
do_checks checkStack checkYield alloc do_gc = do
553
554
  dflags <- getDynFlags
  let
555
    alloc_lit = mkIntExpr dflags (alloc * wORD_SIZE dflags) -- Bytes
556
557
558
559
560
561
562
563
564
565
566
567
568
569
    bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit

    -- Sp overflow if (Sp - CmmHighStack < SpLim)
    sp_oflo = CmmMachOp (mo_wordULt dflags)
                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
                             [CmmReg spReg, CmmLit CmmHighStackMark],
                   CmmReg spLimReg]

    -- Hp overflow if (Hp > HpLim)
    -- (Hp has been incremented by now)
    -- HpLim points to the LAST WORD of valid allocation space.
    hp_oflo = CmmMachOp (mo_wordUGt dflags)
                        [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]

570
571
572
573
    -- Yielding if HpLim == 0
    yielding = CmmMachOp (mo_wordEq dflags)
                        [CmmReg (CmmGlobal HpLim), CmmLit (zeroCLit dflags)]

574
    alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
575
576
  gc_id <- newLabelC

577
  when checkStack $ do
578
     emit =<< mkCmmIfGoto sp_oflo gc_id
579

580
581
582
583
584
585
  if (alloc /= 0)
    then do
      emitAssign hpReg bump_hp
      emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
    else do
      when (not (dopt Opt_OmitYields dflags) && checkYield) (emit =<< mkCmmIfGoto yielding gc_id)
586
587

  emitOutOfLine gc_id $
588
589
     do_gc -- this is expected to jump back somewhere

590
591
592
593
594
595
                -- Test for stack pointer exhaustion, then
                -- bump heap pointer, and test for heap exhaustion
                -- Note that we don't move the heap pointer unless the
                -- stack check succeeds.  Otherwise we might end up
                -- with slop at the end of the current block, which can
                -- confuse the LDV profiler.
596
597
598
599
600
601
602
603
604
605

{-

{- Unboxed tuple alternatives and let-no-escapes (the two most annoying
constructs to generate code for!)  For unboxed tuple returns, there
are an arbitrary number of possibly unboxed return values, some of
which will be in registers, and the others will be on the stack.  We
always organise the stack-resident fields into pointers &
non-pointers, and pass the number of each to the heap check code. -}

606
607
608
609
610
611
612
unbxTupleHeapCheck
        :: [(Id, GlobalReg)]    -- Live registers
        -> WordOff      -- no. of stack slots containing ptrs
        -> WordOff      -- no. of stack slots containing nonptrs
        -> CmmAGraph    -- code to insert in the failure path
        -> FCode ()
        -> FCode ()
613
614

unbxTupleHeapCheck regs ptrs nptrs fail_code code
615
  -- We can't manage more than 255 pointers/non-pointers
616
617
  -- in a generic heap check.
  | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
618
  | otherwise
619
  = initHeapUsage $ \ hpHw -> do
620
621
622
623
624
        { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
                                    full_fail_code rts_label
                        ; tickyAllocHeap hpHw }
        ; setRealHp hpHw
        ; code }
625
626
  where
    full_fail_code  = fail_code `plusStmts` oneStmt assign_liveness
627
628
629
630
    assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9))      -- Ho ho ho!
                                (CmmLit (mkWordCLit liveness))
    liveness        = mkRegLiveness regs ptrs nptrs
    rts_label       = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
631
632


633
{- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
For GrAnSim the code for doing a heap check and doing a context switch
has been separated. Especially, the HEAP_CHK macro only performs a
heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
every slow entry code in order to simulate the fetching of
closures. If fetching is necessary (i.e. current closure is not local)
then an automatic context switch is done. -}


When failing a check, we save a return address on the stack and
jump to a pre-compiled code fragment that saves the live registers
and returns to the scheduler.

The return address in most cases will be the beginning of the basic
block in which the check resides, since we need to perform the check
again on re-entry because someone else might have stolen the resource
in the meantime.

%************************************************************************
653
%*                                                                      *
654
     Generic Heap/Stack Checks - used in the RTS
655
%*                                                                      *
656
657
658
659
660
661
662
663
%************************************************************************

\begin{code}
hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
hpChkGen bytes liveness reentry
  = do_checks' bytes True assigns stg_gc_gen
  where
    assigns = mkStmts [
664
665
666
                CmmAssign (CmmGlobal (VanillaReg 9))  liveness,
                CmmAssign (CmmGlobal (VanillaReg 10)) reentry
                ]
667
668
669
670
671
672
673
674
675
676
677
678

-- a heap check where R1 points to the closure to enter on return, and
-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
hpChkNodePointsAssignSp0 bytes sp0
  = do_checks' bytes True assign stg_gc_enter1
  where assign = oneStmt (CmmStore (CmmReg spReg) sp0)

stg_gc_gen    = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
\end{code}

-}