StgCmmHeap.hs 20.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Stg to C--: heap management functions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmHeap (
10 11
        getVirtHp, setVirtHp, setRealHp,
        getHpRelOffset, hpRel,
12

13
        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14 15
        heapStackCheckGen,
        entryHeapCheck',
16

17 18
        mkVirtHeapOffsets, mkVirtConstrOffsets,
        mkStaticClosureFields, mkStaticClosure,
19

20
        allocDynClosure, allocDynClosureCmm,
21
        emitSetDynHdr
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
    ) where

#include "HsVersions.h"

import StgSyn
import CLabel
import StgCmmLayout
import StgCmmUtils
import StgCmmMonad
import StgCmmProf
import StgCmmTicky
import StgCmmGran
import StgCmmClosure
import StgCmmEnv

37
import MkGraph
38

39
import Hoopl
40
import SMRep
41
import Cmm
42 43
import CmmUtils
import CostCentre
44
import IdInfo( CafInfo(..), mayHaveCafRefs )
45
import Module
46
import DynFlags
47
import FastString( mkFastString, fsLit )
48

49
import Control.Monad (when)
50
import Data.Maybe (isJust)
51

52
-----------------------------------------------------------
53
--              Initialise dynamic heap objects
54 55 56
-----------------------------------------------------------

allocDynClosure
57
        :: CmmInfoTable
Simon Marlow's avatar
Simon Marlow committed
58
        -> LambdaFormInfo
59 60 61 62 63 64 65
        -> CmmExpr              -- Cost Centre to stick in the object
        -> CmmExpr              -- Cost Centre to blame for this alloc
                                -- (usually the same; sometimes "OVERHEAD")

        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
                                                -- ie Info ptr has offset zero.
                                                -- No void args in here
66
        -> FCode CmmExpr -- returns Hp+n
67

68
allocDynClosureCmm
69
        :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
70 71 72
        -> [(CmmExpr, VirtualHpOffset)]
        -> FCode CmmExpr -- returns Hp+n

73
-- allocDynClosure allocates the thing in the heap,
74
-- and modifies the virtual Hp to account for this.
75 76 77
-- The second return value is the graph that sets the value of the
-- returned LocalReg, which should point to the closure after executing
-- the graph.
78

79 80 81 82 83 84 85 86 87 88
-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
-- only valid until Hp is changed.  The caller should assign the
-- result to a LocalReg if it is required to remain live.
--
-- The reason we don't assign it to a LocalReg here is that the caller
-- is often about to call regIdInfo, which immediately assigns the
-- result of allocDynClosure to a new temp in order to add the tag.
-- So by not generating a LocalReg here we avoid a common source of
-- new temporaries and save some compile time.  This can be quite
-- significant - see test T4801.
89 90


91
allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets
92 93
  = do  { let (args, offsets) = unzip args_w_offsets
        ; cmm_args <- mapM getArgAmode args     -- No void args
94
        ; allocDynClosureCmm info_tbl lf_info
Simon Marlow's avatar
Simon Marlow committed
95
                             use_cc _blame_cc (zip cmm_args offsets)
96 97
        }

98
allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
99 100 101
  = do  { virt_hp <- getVirtHp

        -- SAY WHAT WE ARE ABOUT TO DO
Simon Marlow's avatar
Simon Marlow committed
102
        ; let rep = cit_rep info_tbl
103
        ; tickyDynAlloc (toRednCountsLbl $ cit_lbl info_tbl) rep lf_info
Simon Marlow's avatar
Simon Marlow committed
104
        ; profDynAlloc rep use_cc
105 106 107 108 109 110 111 112

        -- FIND THE OFFSET OF THE INFO-PTR WORD
        ; let   info_offset = virt_hp + 1
                -- info_offset is the VirtualHpOffset of the first
                -- word of the new object
                -- Remember, virtHp points to last allocated word,
                -- ie 1 *before* the info-ptr word of new object.

Simon Marlow's avatar
Simon Marlow committed
113
                info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
114 115 116

        -- ALLOCATE THE OBJECT
        ; base <- getHpRelOffset info_offset
117
        ; emitComment $ mkFastString "allocDynClosure"
118 119 120 121 122
        ; emitSetDynHdr base info_ptr  use_cc
        ; let (cmm_args, offsets) = unzip amodes_w_offsets
        ; hpStore base cmm_args offsets

        -- BUMP THE VIRTUAL HEAP POINTER
123 124
        ; dflags <- getDynFlags
        ; setVirtHp (virt_hp + heapClosureSize dflags rep)
125

126 127
        ; getHpRelOffset info_offset
        }
128 129

emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
130
emitSetDynHdr base info_ptr ccs
131 132
  = do dflags <- getDynFlags
       hpStore base (header dflags) [0..]
133
  where
134 135
    header :: DynFlags -> [CmmExpr]
    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
136 137 138
        -- ToDo: Gransim stuff
        -- ToDo: Parallel stuff
        -- No ticky header
139 140 141 142

hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
-- Store the item (expr,off) in base[off]
hpStore base vals offs
143 144 145
  = do dflags <- getDynFlags
       let mk_store val off = mkStore (cmmOffsetW dflags base off) val
       emit (catAGraphs (zipWith mk_store vals offs))
146 147 148


-----------------------------------------------------------
149
--              Layout of static closures
150 151 152 153 154
-----------------------------------------------------------

-- Make a static closure, adding on any extra padding needed for CAFs,
-- and adding a static link field if necessary.

155
mkStaticClosureFields
156 157
        :: DynFlags
        -> CmmInfoTable
158
        -> CostCentreStack
159
        -> CafInfo
160 161
        -> [CmmLit]             -- Payload
        -> [CmmLit]             -- The full closure
162 163
mkStaticClosureFields dflags info_tbl ccs caf_refs payload
  = mkStaticClosure dflags info_lbl ccs payload padding
164
        static_link_field saved_info_field
165
  where
Simon Marlow's avatar
Simon Marlow committed
166
    info_lbl = cit_lbl info_tbl
167 168 169 170 171 172 173 174 175

    -- CAFs must have consistent layout, regardless of whether they
    -- are actually updatable or not.  The layout of a CAF is:
    --
    --        3 saved_info
    --        2 static_link
    --        1 indirectee
    --        0 info ptr
    --
Simon Marlow's avatar
Simon Marlow committed
176 177 178
    -- the static_link and saved_info fields must always be in the
    -- same place.  So we use isThunkRep rather than closureUpdReqd
    -- here:
179

Simon Marlow's avatar
Simon Marlow committed
180
    is_caf = isThunkRep (cit_rep info_tbl)
181

182
    padding
183 184
        | is_caf && null payload = [mkIntCLit dflags 0]
        | otherwise = []
185 186

    static_link_field
187
        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
Simon Marlow's avatar
Simon Marlow committed
188 189 190
        = [static_link_value]
        | otherwise
        = []
191 192

    saved_info_field
193
        | is_caf     = [mkIntCLit dflags 0]
194
        | otherwise  = []
195

196
        -- For a static constructor which has NoCafRefs, we set the
197 198
        -- static link field to a non-zero value so the garbage
        -- collector will ignore it.
199
    static_link_value
200 201
        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
        | otherwise                = mkIntCLit dflags 1  -- No CAF refs
202 203


204
mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
205
  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
206
mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
207 208
  =  [CmmLabel info_lbl]
  ++ variable_header_words
209
  ++ concatMap (padLitToWord dflags) payload
210
  ++ padding
211 212 213 214
  ++ static_link_field
  ++ saved_info_field
  where
    variable_header_words
215 216
        =  staticGranHdr
        ++ staticParHdr
217
        ++ staticProfHdr dflags ccs
218

219 220
-- JD: Simon had ellided this padding, but without it the C back end asserts
-- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
221 222 223
padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
padLitToWord dflags lit = lit : padding pad_length
  where width = typeWidth (cmmLitType dflags lit)
224
        pad_length = wORD_SIZE dflags - widthInBytes width :: Int
225 226 227 228 229 230 231

        padding n | n <= 0 = []
                  | n `rem` 2 /= 0 = CmmInt 0 W8  : padding (n-1)
                  | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
                  | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
                  | otherwise      = CmmInt 0 W64 : padding (n-8)

232
-----------------------------------------------------------
233
--              Heap overflow checking
234 235 236 237 238 239 240 241 242 243 244 245
-----------------------------------------------------------

{- Note [Heap checks]
   ~~~~~~~~~~~~~~~~~~
Heap checks come in various forms.  We provide the following entry
points to the runtime system, all of which use the native C-- entry
convention.

  * gc() performs garbage collection and returns
    nothing to its caller

  * A series of canned entry points like
246
        r = gc_1p( r )
247 248
    where r is a pointer.  This performs gc, and
    then returns its argument r to its caller.
249

250
  * A series of canned entry points like
251
        gcfun_2p( f, x, y )
252 253 254 255 256 257 258 259 260
    where f is a function closure of arity 2
    This performs garbage collection, keeping alive the
    three argument ptrs, and then tail-calls f(x,y)

These are used in the following circumstances

* entryHeapCheck: Function entry
    (a) With a canned GC entry sequence
        f( f_clo, x:ptr, y:ptr ) {
261 262 263
             Hp = Hp+8
             if Hp > HpLim goto L
             ...
264 265 266
          L: HpAlloc = 8
             jump gcfun_2p( f_clo, x, y ) }
     Note the tail call to the garbage collector;
267
     it should do no register shuffling
268 269 270

    (b) No canned sequence
        f( f_clo, x:ptr, y:ptr, ...etc... ) {
271 272 273
          T: Hp = Hp+8
             if Hp > HpLim goto L
             ...
274
          L: HpAlloc = 8
275 276
             call gc()  -- Needs an info table
             goto T }
277 278

* altHeapCheck: Immediately following an eval
279 280
  Started as
        case f x y of r { (p,q) -> rhs }
281 282 283
  (a) With a canned sequence for the results of f
       (which is the very common case since
       all boxed cases return just one pointer
284 285 286 287 288 289
           ...
           r = f( x, y )
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
290

291 292
        L: r = gc_1p( r )
           goto K }
293

294 295 296 297
        Here, the info table needed by the call
        to gc_1p should be the *same* as the
        one for the call to f; the C-- optimiser
        spots this sharing opportunity)
298 299 300

   (b) No canned sequence for results of f
       Note second info table
301 302 303 304 305 306
           ...
           (r1,r2,r3) = call f( x, y )
        K:
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
307

308 309
        L: call gc()    -- Extra info table here
           goto K
310 311 312

* generalHeapCheck: Anywhere else
  e.g. entry to thunk
313
       case branch *not* following eval,
314 315 316
       or let-no-escape
  Exactly the same as the previous case:

317 318 319 320
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...
321

322 323
        L: call gc()
           goto K
324 325 326 327 328
-}

--------------------------------------------------------------
-- A heap/stack check at a function or thunk entry point.

329 330 331 332 333 334
entryHeapCheck :: ClosureInfo
               -> Maybe LocalReg -- Function (closure environment)
               -> Int            -- Arity -- not same as len args b/c of voids
               -> [LocalReg]     -- Non-void args (empty for thunk)
               -> FCode ()
               -> FCode ()
335

336
entryHeapCheck cl_info nodeSet arity args code
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
  = entryHeapCheck' is_fastf node arity args code
  where
    node = case nodeSet of
              Just r  -> CmmReg (CmmLocal r)
              Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)

    is_fastf = case closureFunInfo cl_info of
                 Just (_, ArgGen _) -> False
                 _otherwise         -> True

-- | lower-level version for CmmParse
entryHeapCheck' :: Bool           -- is a known function pattern
                -> CmmExpr        -- expression for the closure pointer
                -> Int            -- Arity -- not same as len args b/c of voids
                -> [LocalReg]     -- Non-void args (empty for thunk)
                -> FCode ()
                -> FCode ()
entryHeapCheck' is_fastf node arity args code
355 356
  = do dflags <- getDynFlags
       let is_thunk = arity == 0
357 358

           args' = map (CmmReg . CmmLocal) args
359 360 361 362 363 364 365
           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)

           {- Thunks:          jump stg_gc_enter_1

              Function (fast): call (NativeNode) stg_gc_fun(fun, args)

366
              Function (slow): call (slow) stg_gc_fun(fun, args)
367 368 369
           -}
           gc_call upd
               | is_thunk
370
                 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
371 372

               | is_fastf
373
                 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
374 375

               | otherwise
376
                 = mkJump dflags Slow stg_gc_fun (node : args') upd
377 378

       updfr_sz <- getUpdFrameOff
379 380 381

       loop_id <- newLabelC
       emitLabel loop_id
382
       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
383

384 385
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
386

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401

-- If there are multiple alts and we need to GC, but don't have a
-- continuation already (the scrut was simple), then we should
-- pre-generate the continuation.  (if there are multiple alts it is
-- always a canned GC point).

-- altHeapCheck:
-- If we have a return continuation,
--   then if it is a canned GC pattern,
--           then we do mkJumpReturnsTo
--           else we do a normal call to stg_gc_noregs
--   else if it is a canned GC pattern,
--           then generate the continuation and do mkCallReturnsTo
--           else we do a normal call to stg_gc_noregs

402
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
403 404 405 406
altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code

altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
altOrNoEscapeHeapCheck checkYield regs code = do
407 408
    dflags <- getDynFlags
    case cannedGCEntryPoint dflags regs of
409
      Nothing -> genericGC checkYield code
410 411
      Just gc -> do
        lret <- newLabelC
412
        let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
413 414 415
        lcont <- newLabelC
        emitOutOfLine lret (copyin <*> mkBranch lcont)
        emitLabel lcont
416
        cannedGCReturnsTo checkYield False gc regs lret off code
417 418 419

altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
altHeapCheckReturnsTo regs lret off code
420 421
  = do dflags <- getDynFlags
       case cannedGCEntryPoint dflags regs of
422 423 424 425 426 427 428 429
           Nothing -> genericGC False code
           Just gc -> cannedGCReturnsTo False True gc regs lret off code

-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
-- is more efficient), but cannot be optimized away in the non-allocating
-- case because it may occur in a loop
noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
430

431
cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
432 433
                  -> FCode a
                  -> FCode a
434
cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
435 436
  = do dflags <- getDynFlags
       updfr_sz <- getUpdFrameOff
437
       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
438 439
  where
    reg_exprs = map (CmmReg . CmmLocal) regs
440
      -- Note [stg_gc arguments]
441

442 443 444 445
      -- NB. we use the NativeReturn convention for passing arguments
      -- to the canned heap-check routines, because we are in a case
      -- alternative and hence the [LocalReg] was passed to us in the
      -- NativeReturn convention.
446
    gc_call dflags label sp
447 448 449 450
      | cont_on_stack
      = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
      | otherwise
      = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
451

452 453
genericGC :: Bool -> FCode a -> FCode a
genericGC checkYield code
454 455 456
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
457
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
458
       heapCheck False checkYield (call <*> mkBranch lretry) code
459

460 461
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
462
  = case map localRegType regs of
463
      []  -> Just (mkGcLabel "stg_gc_noregs")
464
      [ty]
465 466 467 468 469 470
          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
          | isFloatType ty -> case width of
                                  W32       -> Just (mkGcLabel "stg_gc_f1")
                                  W64       -> Just (mkGcLabel "stg_gc_d1")
                                  _         -> Nothing
        
471 472 473
          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
          | otherwise                 -> Nothing
474 475
          where
              width = typeWidth ty
476 477 478 479 480 481 482 483 484 485 486 487
      [ty1,ty2]
          |  isGcPtrType ty1
          && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
      [ty1,ty2,ty3]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
      [ty1,ty2,ty3,ty4]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3
          && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
488
      _otherwise -> Nothing
489

490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
-- Note [stg_gc arguments]
-- It might seem that we could avoid passing the arguments to the
-- stg_gc function, because they are already in the right registers.
-- While this is usually the case, it isn't always.  Sometimes the
-- code generator has cleverly avoided the eval in a case, e.g. in
-- ffi/should_run/4221.hs we found
--
--   case a_r1mb of z
--     FunPtr x y -> ...
--
-- where a_r1mb is bound a top-level constructor, and is known to be
-- evaluated.  The codegen just assigns x, y and z, and continues;
-- R1 is never assigned.
--
-- So we'll have to rely on optimisations to eliminatethese
-- assignments where possible.

507

508 509
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
510
generic_gc = mkGcLabel "stg_gc_noregs"
511 512

-- | Create a CLabel for calling a garbage collector entry point
513 514
mkGcLabel :: String -> CmmExpr
mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
515 516

-------------------------------
517 518
heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
heapCheck checkStack checkYield do_gc code
519
  = getHeapUsage $ \ hpHw ->
520 521
    -- Emit heap checks, but be sure to do it lazily so
    -- that the conditionals on hpHw don't cause a black hole
522 523 524 525 526 527 528
    do  { dflags <- getDynFlags
        ; let mb_alloc_bytes
                 | hpHw > 0  = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
                 | otherwise = Nothing
              stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
                      | otherwise  = Nothing
        ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
nfrisby's avatar
nfrisby committed
529
        ; tickyAllocHeap True hpHw
530 531 532
        ; doGranAllocate hpHw
        ; setRealHp hpHw
        ; code }
533

534 535 536 537 538 539 540 541 542
heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
heapStackCheckGen stk_hwm mb_bytes
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
       do_checks stk_hwm False  mb_bytes (call <*> mkBranch lretry)

do_checks :: Maybe CmmExpr    -- Should we check the stack?
543
          -> Bool       -- Should we check for preemption?
544
          -> Maybe CmmExpr    -- Heap headroom (bytes)
545
          -> CmmAGraph  -- What to do on failure
546
          -> FCode ()
547
do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
548
  dflags <- getDynFlags
549 550
  gc_id <- newLabelC

551
  let
552 553 554
    Just alloc_lit = mb_alloc_lit

    bump_hp   = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
555 556

    -- Sp overflow if (Sp - CmmHighStack < SpLim)
557 558
    sp_oflo sp_hwm =
         CmmMachOp (mo_wordULt dflags)
559
                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
560
                             [CmmReg spReg, sp_hwm],
561 562 563 564 565 566
                   CmmReg spLimReg]

    -- Hp overflow if (Hp > HpLim)
    -- (Hp has been incremented by now)
    -- HpLim points to the LAST WORD of valid allocation space.
    hp_oflo = CmmMachOp (mo_wordUGt dflags)
567
                  [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
568

569
    alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
570

571 572
  case mb_stk_hwm of
    Nothing -> return ()
573
    Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id)
574

575
  if (isJust mb_alloc_lit)
576
    then do
577
     tickyHeapCheck
578 579
     emitAssign hpReg bump_hp
     emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
580
    else do
ian@well-typed.com's avatar
ian@well-typed.com committed
581
      when (not (gopt Opt_OmitYields dflags) && checkYield) $ do
582 583 584 585 586
         -- Yielding if HpLim == 0
         let yielding = CmmMachOp (mo_wordEq dflags)
                                  [CmmReg (CmmGlobal HpLim),
                                   CmmLit (zeroCLit dflags)]
         emit =<< mkCmmIfGoto yielding gc_id
587 588

  emitOutOfLine gc_id $
589 590
     do_gc -- this is expected to jump back somewhere

591 592 593 594 595 596
                -- Test for stack pointer exhaustion, then
                -- bump heap pointer, and test for heap exhaustion
                -- Note that we don't move the heap pointer unless the
                -- stack check succeeds.  Otherwise we might end up
                -- with slop at the end of the current block, which can
                -- confuse the LDV profiler.