StgCmmHeap.hs 24.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Stg to C--: heap management functions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmHeap (
10 11
        getVirtHp, setVirtHp, setRealHp,
        getHpRelOffset, hpRel,
12

13
        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14 15
        heapStackCheckGen,
        entryHeapCheck',
16

17
        mkStaticClosureFields, mkStaticClosure,
18

19
        allocDynClosure, allocDynClosureCmm,
20
        emitSetDynHdr
21 22 23 24 25 26 27 28 29
    ) where

#include "HsVersions.h"

import StgSyn
import CLabel
import StgCmmLayout
import StgCmmUtils
import StgCmmMonad
30
import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
31 32 33 34
import StgCmmTicky
import StgCmmClosure
import StgCmmEnv

35
import MkGraph
36

37
import Hoopl
38
import SMRep
39
import Cmm
40 41
import CmmUtils
import CostCentre
42
import IdInfo( CafInfo(..), mayHaveCafRefs )
43
import Id ( Id )
44
import Module
45
import DynFlags
46
import FastString( mkFastString, fsLit )
47

48
import Control.Monad (when)
49
import Data.Maybe (isJust)
50

51
-----------------------------------------------------------
52
--              Initialise dynamic heap objects
53 54 55
-----------------------------------------------------------

allocDynClosure
56 57
        :: Maybe Id
        -> CmmInfoTable
Simon Marlow's avatar
Simon Marlow committed
58
        -> LambdaFormInfo
59 60 61 62 63 64 65
        -> CmmExpr              -- Cost Centre to stick in the object
        -> CmmExpr              -- Cost Centre to blame for this alloc
                                -- (usually the same; sometimes "OVERHEAD")

        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
                                                -- ie Info ptr has offset zero.
                                                -- No void args in here
66
        -> FCode CmmExpr -- returns Hp+n
67

68
allocDynClosureCmm
69
        :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
70
        -> [(CmmExpr, ByteOff)]
71 72
        -> FCode CmmExpr -- returns Hp+n

73
-- allocDynClosure allocates the thing in the heap,
74
-- and modifies the virtual Hp to account for this.
75 76 77
-- The second return value is the graph that sets the value of the
-- returned LocalReg, which should point to the closure after executing
-- the graph.
78

79 80 81 82 83 84 85 86 87 88
-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
-- only valid until Hp is changed.  The caller should assign the
-- result to a LocalReg if it is required to remain live.
--
-- The reason we don't assign it to a LocalReg here is that the caller
-- is often about to call regIdInfo, which immediately assigns the
-- result of allocDynClosure to a new temp in order to add the tag.
-- So by not generating a LocalReg here we avoid a common source of
-- new temporaries and save some compile time.  This can be quite
-- significant - see test T4801.
89 90


91
allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets
92 93
  = do  { let (args, offsets) = unzip args_w_offsets
        ; cmm_args <- mapM getArgAmode args     -- No void args
94
        ; allocDynClosureCmm mb_id info_tbl lf_info
Simon Marlow's avatar
Simon Marlow committed
95
                             use_cc _blame_cc (zip cmm_args offsets)
96 97
        }

98
allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets
99 100 101
  = do  { virt_hp <- getVirtHp

        -- SAY WHAT WE ARE ABOUT TO DO
Simon Marlow's avatar
Simon Marlow committed
102
        ; let rep = cit_rep info_tbl
103
        ; tickyDynAlloc mb_id rep lf_info
Simon Marlow's avatar
Simon Marlow committed
104
        ; profDynAlloc rep use_cc
105 106 107 108 109 110 111 112

        -- FIND THE OFFSET OF THE INFO-PTR WORD
        ; let   info_offset = virt_hp + 1
                -- info_offset is the VirtualHpOffset of the first
                -- word of the new object
                -- Remember, virtHp points to last allocated word,
                -- ie 1 *before* the info-ptr word of new object.

Simon Marlow's avatar
Simon Marlow committed
113
                info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
114 115 116

        -- ALLOCATE THE OBJECT
        ; base <- getHpRelOffset info_offset
117
        ; emitComment $ mkFastString "allocDynClosure"
118 119 120 121 122
        ; emitSetDynHdr base info_ptr  use_cc
        ; let (cmm_args, offsets) = unzip amodes_w_offsets
        ; hpStore base cmm_args offsets

        -- BUMP THE VIRTUAL HEAP POINTER
123 124
        ; dflags <- getDynFlags
        ; setVirtHp (virt_hp + heapClosureSize dflags rep)
125

126 127
        ; getHpRelOffset info_offset
        }
128 129

emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
130
emitSetDynHdr base info_ptr ccs
131
  = do dflags <- getDynFlags
132
       hpStore base (header dflags) [0, wORD_SIZE dflags ..]
133
  where
134 135
    header :: DynFlags -> [CmmExpr]
    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
Jan Stolarek's avatar
Jan Stolarek committed
136
        -- ToDof: Parallel stuff
137
        -- No ticky header
138

139
hpStore :: CmmExpr -> [CmmExpr] -> [ByteOff] -> FCode ()
140 141
-- Store the item (expr,off) in base[off]
hpStore base vals offs
142
  = do dflags <- getDynFlags
143
       let mk_store val off = mkStore (cmmOffsetB dflags base off) val
144
       emit (catAGraphs (zipWith mk_store vals offs))
145 146 147


-----------------------------------------------------------
148
--              Layout of static closures
149 150 151 152 153
-----------------------------------------------------------

-- Make a static closure, adding on any extra padding needed for CAFs,
-- and adding a static link field if necessary.

154
mkStaticClosureFields
155 156
        :: DynFlags
        -> CmmInfoTable
157
        -> CostCentreStack
158
        -> CafInfo
159 160
        -> [CmmLit]             -- Payload
        -> [CmmLit]             -- The full closure
161 162
mkStaticClosureFields dflags info_tbl ccs caf_refs payload
  = mkStaticClosure dflags info_lbl ccs payload padding
163
        static_link_field saved_info_field
164
  where
Simon Marlow's avatar
Simon Marlow committed
165
    info_lbl = cit_lbl info_tbl
166 167 168 169 170 171 172 173 174

    -- CAFs must have consistent layout, regardless of whether they
    -- are actually updatable or not.  The layout of a CAF is:
    --
    --        3 saved_info
    --        2 static_link
    --        1 indirectee
    --        0 info ptr
    --
Simon Marlow's avatar
Simon Marlow committed
175 176 177
    -- the static_link and saved_info fields must always be in the
    -- same place.  So we use isThunkRep rather than closureUpdReqd
    -- here:
178

Simon Marlow's avatar
Simon Marlow committed
179
    is_caf = isThunkRep (cit_rep info_tbl)
180

181
    padding
182 183
        | is_caf && null payload = [mkIntCLit dflags 0]
        | otherwise = []
184 185

    static_link_field
186
        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
Simon Marlow's avatar
Simon Marlow committed
187 188 189
        = [static_link_value]
        | otherwise
        = []
190 191

    saved_info_field
192
        | is_caf     = [mkIntCLit dflags 0]
193
        | otherwise  = []
194

195
        -- For a static constructor which has NoCafRefs, we set the
196 197
        -- static link field to a non-zero value so the garbage
        -- collector will ignore it.
198
    static_link_value
199 200
        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
        | otherwise                = mkIntCLit dflags 1  -- No CAF refs
201 202


203
mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
204
  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
205
mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
206
  =  [CmmLabel info_lbl]
Jan Stolarek's avatar
Jan Stolarek committed
207
  ++ staticProfHdr dflags ccs
208
  ++ concatMap (padLitToWord dflags) payload
209
  ++ padding
210 211 212
  ++ static_link_field
  ++ saved_info_field

213 214
-- JD: Simon had ellided this padding, but without it the C back end asserts
-- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
215 216 217
padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
padLitToWord dflags lit = lit : padding pad_length
  where width = typeWidth (cmmLitType dflags lit)
218
        pad_length = wORD_SIZE dflags - widthInBytes width :: Int
219 220 221 222 223 224 225

        padding n | n <= 0 = []
                  | n `rem` 2 /= 0 = CmmInt 0 W8  : padding (n-1)
                  | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
                  | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
                  | otherwise      = CmmInt 0 W64 : padding (n-8)

226
-----------------------------------------------------------
227
--              Heap overflow checking
228 229 230 231 232 233 234 235 236 237 238 239
-----------------------------------------------------------

{- Note [Heap checks]
   ~~~~~~~~~~~~~~~~~~
Heap checks come in various forms.  We provide the following entry
points to the runtime system, all of which use the native C-- entry
convention.

  * gc() performs garbage collection and returns
    nothing to its caller

  * A series of canned entry points like
240
        r = gc_1p( r )
241 242
    where r is a pointer.  This performs gc, and
    then returns its argument r to its caller.
243

244
  * A series of canned entry points like
245
        gcfun_2p( f, x, y )
246 247 248 249 250 251 252 253 254
    where f is a function closure of arity 2
    This performs garbage collection, keeping alive the
    three argument ptrs, and then tail-calls f(x,y)

These are used in the following circumstances

* entryHeapCheck: Function entry
    (a) With a canned GC entry sequence
        f( f_clo, x:ptr, y:ptr ) {
255 256 257
             Hp = Hp+8
             if Hp > HpLim goto L
             ...
258 259 260
          L: HpAlloc = 8
             jump gcfun_2p( f_clo, x, y ) }
     Note the tail call to the garbage collector;
261
     it should do no register shuffling
262 263 264

    (b) No canned sequence
        f( f_clo, x:ptr, y:ptr, ...etc... ) {
265 266 267
          T: Hp = Hp+8
             if Hp > HpLim goto L
             ...
268
          L: HpAlloc = 8
269 270
             call gc()  -- Needs an info table
             goto T }
271 272

* altHeapCheck: Immediately following an eval
273 274
  Started as
        case f x y of r { (p,q) -> rhs }
275 276 277
  (a) With a canned sequence for the results of f
       (which is the very common case since
       all boxed cases return just one pointer
278 279 280 281 282 283
           ...
           r = f( x, y )
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
284

285 286
        L: r = gc_1p( r )
           goto K }
287

288 289 290 291
        Here, the info table needed by the call
        to gc_1p should be the *same* as the
        one for the call to f; the C-- optimiser
        spots this sharing opportunity)
292 293 294

   (b) No canned sequence for results of f
       Note second info table
295 296 297 298 299 300
           ...
           (r1,r2,r3) = call f( x, y )
        K:
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
301

302 303
        L: call gc()    -- Extra info table here
           goto K
304 305 306

* generalHeapCheck: Anywhere else
  e.g. entry to thunk
307
       case branch *not* following eval,
308 309 310
       or let-no-escape
  Exactly the same as the previous case:

311 312 313 314
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...
315

316 317
        L: call gc()
           goto K
318 319 320 321 322
-}

--------------------------------------------------------------
-- A heap/stack check at a function or thunk entry point.

323 324 325 326 327 328
entryHeapCheck :: ClosureInfo
               -> Maybe LocalReg -- Function (closure environment)
               -> Int            -- Arity -- not same as len args b/c of voids
               -> [LocalReg]     -- Non-void args (empty for thunk)
               -> FCode ()
               -> FCode ()
329

330
entryHeapCheck cl_info nodeSet arity args code
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
  = entryHeapCheck' is_fastf node arity args code
  where
    node = case nodeSet of
              Just r  -> CmmReg (CmmLocal r)
              Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)

    is_fastf = case closureFunInfo cl_info of
                 Just (_, ArgGen _) -> False
                 _otherwise         -> True

-- | lower-level version for CmmParse
entryHeapCheck' :: Bool           -- is a known function pattern
                -> CmmExpr        -- expression for the closure pointer
                -> Int            -- Arity -- not same as len args b/c of voids
                -> [LocalReg]     -- Non-void args (empty for thunk)
                -> FCode ()
                -> FCode ()
entryHeapCheck' is_fastf node arity args code
349 350
  = do dflags <- getDynFlags
       let is_thunk = arity == 0
351 352

           args' = map (CmmReg . CmmLocal) args
353 354 355 356 357 358 359
           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)

           {- Thunks:          jump stg_gc_enter_1

              Function (fast): call (NativeNode) stg_gc_fun(fun, args)

360
              Function (slow): call (slow) stg_gc_fun(fun, args)
361 362 363
           -}
           gc_call upd
               | is_thunk
364
                 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
365 366

               | is_fastf
367
                 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
368 369

               | otherwise
370
                 = mkJump dflags Slow stg_gc_fun (node : args') upd
371 372

       updfr_sz <- getUpdFrameOff
373 374 375

       loop_id <- newLabelC
       emitLabel loop_id
376
       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
377

378 379
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
380

381 382 383 384 385 386 387 388 389 390 391 392 393 394 395

-- If there are multiple alts and we need to GC, but don't have a
-- continuation already (the scrut was simple), then we should
-- pre-generate the continuation.  (if there are multiple alts it is
-- always a canned GC point).

-- altHeapCheck:
-- If we have a return continuation,
--   then if it is a canned GC pattern,
--           then we do mkJumpReturnsTo
--           else we do a normal call to stg_gc_noregs
--   else if it is a canned GC pattern,
--           then generate the continuation and do mkCallReturnsTo
--           else we do a normal call to stg_gc_noregs

396
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
397 398 399 400
altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code

altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
altOrNoEscapeHeapCheck checkYield regs code = do
401 402
    dflags <- getDynFlags
    case cannedGCEntryPoint dflags regs of
403
      Nothing -> genericGC checkYield code
404 405
      Just gc -> do
        lret <- newLabelC
406
        let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
407 408 409
        lcont <- newLabelC
        emitOutOfLine lret (copyin <*> mkBranch lcont)
        emitLabel lcont
410
        cannedGCReturnsTo checkYield False gc regs lret off code
411 412 413

altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
altHeapCheckReturnsTo regs lret off code
414 415
  = do dflags <- getDynFlags
       case cannedGCEntryPoint dflags regs of
416 417 418 419 420 421 422 423
           Nothing -> genericGC False code
           Just gc -> cannedGCReturnsTo False True gc regs lret off code

-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
-- is more efficient), but cannot be optimized away in the non-allocating
-- case because it may occur in a loop
noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
424

425
cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
426 427
                  -> FCode a
                  -> FCode a
428
cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
429 430
  = do dflags <- getDynFlags
       updfr_sz <- getUpdFrameOff
431
       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
432 433
  where
    reg_exprs = map (CmmReg . CmmLocal) regs
434
      -- Note [stg_gc arguments]
435

436 437 438 439
      -- NB. we use the NativeReturn convention for passing arguments
      -- to the canned heap-check routines, because we are in a case
      -- alternative and hence the [LocalReg] was passed to us in the
      -- NativeReturn convention.
440
    gc_call dflags label sp
441 442 443 444
      | cont_on_stack
      = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
      | otherwise
      = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
445

446 447
genericGC :: Bool -> FCode a -> FCode a
genericGC checkYield code
448 449 450
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
451
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
452
       heapCheck False checkYield (call <*> mkBranch lretry) code
453

454 455
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
456
  = case map localRegType regs of
457
      []  -> Just (mkGcLabel "stg_gc_noregs")
458
      [ty]
459 460 461 462 463
          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
          | isFloatType ty -> case width of
                                  W32       -> Just (mkGcLabel "stg_gc_f1")
                                  W64       -> Just (mkGcLabel "stg_gc_d1")
                                  _         -> Nothing
464

465 466 467
          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
          | otherwise                 -> Nothing
468 469
          where
              width = typeWidth ty
470 471 472 473 474 475 476 477 478 479 480 481
      [ty1,ty2]
          |  isGcPtrType ty1
          && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
      [ty1,ty2,ty3]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
      [ty1,ty2,ty3,ty4]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3
          && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
482
      _otherwise -> Nothing
483

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
-- Note [stg_gc arguments]
-- It might seem that we could avoid passing the arguments to the
-- stg_gc function, because they are already in the right registers.
-- While this is usually the case, it isn't always.  Sometimes the
-- code generator has cleverly avoided the eval in a case, e.g. in
-- ffi/should_run/4221.hs we found
--
--   case a_r1mb of z
--     FunPtr x y -> ...
--
-- where a_r1mb is bound a top-level constructor, and is known to be
-- evaluated.  The codegen just assigns x, y and z, and continues;
-- R1 is never assigned.
--
-- So we'll have to rely on optimisations to eliminatethese
-- assignments where possible.

501

502 503
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
504
generic_gc = mkGcLabel "stg_gc_noregs"
505 506

-- | Create a CLabel for calling a garbage collector entry point
507 508
mkGcLabel :: String -> CmmExpr
mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
509 510

-------------------------------
511 512
heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
heapCheck checkStack checkYield do_gc code
513
  = getHeapUsage $ \ hpHw ->
514 515
    -- Emit heap checks, but be sure to do it lazily so
    -- that the conditionals on hpHw don't cause a black hole
516 517 518 519 520 521 522
    do  { dflags <- getDynFlags
        ; let mb_alloc_bytes
                 | hpHw > 0  = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
                 | otherwise = Nothing
              stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
                      | otherwise  = Nothing
        ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
nfrisby's avatar
nfrisby committed
523
        ; tickyAllocHeap True hpHw
524 525
        ; setRealHp hpHw
        ; code }
526

527 528 529 530 531 532
heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
heapStackCheckGen stk_hwm mb_bytes
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
533
       do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
534

535 536
-- Note [Single stack check]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~
537 538 539
-- When compiling a function we can determine how much stack space it
-- will use. We therefore need to perform only a single stack check at
-- the beginning of a function to see if we have enough stack space.
540
--
541 542 543 544 545
-- The check boils down to comparing Sp-N with SpLim, where N is the
-- amount of stack space needed (see Note [Stack usage] below).  *BUT*
-- at this stage of the pipeline we are not supposed to refer to Sp
-- itself, because the stack is not yet manifest, so we don't quite
-- know where Sp pointing.
546 547 548 549 550

-- So instead of referring directly to Sp - as we used to do in the
-- past - the code generator uses (old + 0) in the stack check. That
-- is the address of the first word of the old area, so if we add N
-- we'll get the address of highest used word.
551
--
552 553 554 555 556 557 558 559 560 561 562 563
-- This makes the check robust.  For example, while we need to perform
-- only one stack check for each function, we could in theory place
-- more stack checks later in the function. They would be redundant,
-- but not incorrect (in a sense that they should not change program
-- behaviour). We need to make sure however that a stack check
-- inserted after incrementing the stack pointer checks for a
-- respectively smaller stack space. This would not be the case if the
-- code generator produced direct references to Sp. By referencing
-- (old + 0) we make sure that we always check for a correct amount of
-- stack: when converting (old + 0) to Sp the stack layout phase takes
-- into account changes already made to stack pointer. The idea for
-- this change came from observations made while debugging #8275.
564

565 566 567 568 569 570
-- Note [Stack usage]
-- ~~~~~~~~~~~~~~~~~~
-- At the moment we convert from STG to Cmm we don't know N, the
-- number of bytes of stack that the function will use, so we use a
-- special late-bound CmmLit, namely
--       CmmHighStackMark
571
-- to stand for the number of bytes needed. When the stack is made
572 573 574
-- manifest, the number of bytes needed is calculated, and used to
-- replace occurrences of CmmHighStackMark
--
575
-- The (Maybe CmmExpr) passed to do_checks is usually
576 577 578 579 580
--     Just (CmmLit CmmHighStackMark)
-- but can also (in certain hand-written RTS functions)
--     Just (CmmLit 8)  or some other fixed valuet
-- If it is Nothing, we don't generate a stack check at all.

581
do_checks :: Maybe CmmExpr    -- Should we check the stack?
582 583
                              -- See Note [Stack usage]
          -> Bool             -- Should we check for preemption?
584
          -> Maybe CmmExpr    -- Heap headroom (bytes)
585
          -> CmmAGraph        -- What to do on failure
586
          -> FCode ()
587
do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
588
  dflags <- getDynFlags
589 590
  gc_id <- newLabelC

591
  let
592 593 594
    Just alloc_lit = mb_alloc_lit

    bump_hp   = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
595

596 597 598
    -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
    -- At the beginning of a function old + 0 = Sp
    -- See Note [Single stack check]
599 600
    sp_oflo sp_hwm =
         CmmMachOp (mo_wordULt dflags)
601
                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
602
                             [CmmStackSlot Old 0, sp_hwm],
603 604 605 606 607 608
                   CmmReg spLimReg]

    -- Hp overflow if (Hp > HpLim)
    -- (Hp has been incremented by now)
    -- HpLim points to the LAST WORD of valid allocation space.
    hp_oflo = CmmMachOp (mo_wordUGt dflags)
609
                  [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
610

611
    alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
612

613 614
  case mb_stk_hwm of
    Nothing -> return ()
615
    Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id)
616

617 618 619 620 621 622 623 624 625
  -- Emit new label that might potentially be a header
  -- of a self-recursive tail call.
  -- See Note [Self-recursive loop header].
  self_loop_info <- getSelfLoop
  case self_loop_info of
    Just (_, loop_header_id, _)
        | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
    _otherwise -> return ()

626
  if (isJust mb_alloc_lit)
627
    then do
628
     tickyHeapCheck
629 630
     emitAssign hpReg bump_hp
     emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
631
    else do
632
      when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
633 634 635 636 637
         -- Yielding if HpLim == 0
         let yielding = CmmMachOp (mo_wordEq dflags)
                                  [CmmReg (CmmGlobal HpLim),
                                   CmmLit (zeroCLit dflags)]
         emit =<< mkCmmIfGoto yielding gc_id
638 639

  emitOutOfLine gc_id $
640 641
     do_gc -- this is expected to jump back somewhere

642 643 644 645 646 647
                -- Test for stack pointer exhaustion, then
                -- bump heap pointer, and test for heap exhaustion
                -- Note that we don't move the heap pointer unless the
                -- stack check succeeds.  Otherwise we might end up
                -- with slop at the end of the current block, which can
                -- confuse the LDV profiler.
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671

-- Note [Self-recursive loop header]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Self-recursive loop header is required by loopification optimization (See
-- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
--
--  1. There is information about self-loop in the FCode environment. We don't
--     check the binder (first component of the self_loop_info) because we are
--     certain that if the self-loop info is present then we are compiling the
--     binder body. Reason: the only possible way to get here with the
--     self_loop_info present is from closureCodeBody.
--
--  2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
--     to preempt the heap check (see #367 for motivation behind this check). It
--     is True for heap checks placed at the entry to a function and
--     let-no-escape heap checks but false for other heap checks (eg. in case
--     alternatives or created from hand-written high-level Cmm). The second
--     check (isJust mb_stk_hwm) is true for heap checks at the entry to a
--     function and some heap checks created in hand-written Cmm. Otherwise it
--     is Nothing. In other words the only situation when both conditions are
--     true is when compiling stack and heap checks at the entry to a
--     function. This is the only situation when we want to emit a self-loop
--     label.