StgCmmHeap.hs 25.6 KB
Newer Older
1 2
{-# LANGUAGE CPP #-}

3 4 5 6 7 8 9 10 11
-----------------------------------------------------------------------------
--
-- Stg to C--: heap management functions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmHeap (
12
        getVirtHp, setVirtHp, setRealHp,
13
        getHpRelOffset,
14

15
        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
16 17
        heapStackCheckGen,
        entryHeapCheck',
18

19
        mkStaticClosureFields, mkStaticClosure,
20

21
        allocDynClosure, allocDynClosureCmm, allocHeapClosure,
22
        emitSetDynHdr
23 24 25 26
    ) where

#include "HsVersions.h"

27 28
import GhcPrelude hiding ((<*>))

29 30 31 32 33
import StgSyn
import CLabel
import StgCmmLayout
import StgCmmUtils
import StgCmmMonad
34
import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
35 36 37 38
import StgCmmTicky
import StgCmmClosure
import StgCmmEnv

39
import MkGraph
40

41
import Hoopl.Label
42
import SMRep
43
import BlockId
44
import Cmm
45 46
import CmmUtils
import CostCentre
47
import IdInfo( CafInfo(..), mayHaveCafRefs )
48
import Id ( Id )
49
import Module
50
import DynFlags
51
import FastString( mkFastString, fsLit )
52
import Panic( sorry )
53

54
import Control.Monad (when)
55
import Data.Maybe (isJust)
56

57
-----------------------------------------------------------
58
--              Initialise dynamic heap objects
59 60 61
-----------------------------------------------------------

allocDynClosure
62 63
        :: Maybe Id
        -> CmmInfoTable
Simon Marlow's avatar
Simon Marlow committed
64
        -> LambdaFormInfo
65 66 67 68 69 70 71
        -> CmmExpr              -- Cost Centre to stick in the object
        -> CmmExpr              -- Cost Centre to blame for this alloc
                                -- (usually the same; sometimes "OVERHEAD")

        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
                                                -- ie Info ptr has offset zero.
                                                -- No void args in here
72
        -> FCode CmmExpr -- returns Hp+n
73

74
allocDynClosureCmm
75
        :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
76
        -> [(CmmExpr, ByteOff)]
77 78
        -> FCode CmmExpr -- returns Hp+n

79
-- allocDynClosure allocates the thing in the heap,
80
-- and modifies the virtual Hp to account for this.
81 82 83
-- The second return value is the graph that sets the value of the
-- returned LocalReg, which should point to the closure after executing
-- the graph.
84

85 86 87 88 89 90 91 92 93 94
-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
-- only valid until Hp is changed.  The caller should assign the
-- result to a LocalReg if it is required to remain live.
--
-- The reason we don't assign it to a LocalReg here is that the caller
-- is often about to call regIdInfo, which immediately assigns the
-- result of allocDynClosure to a new temp in order to add the tag.
-- So by not generating a LocalReg here we avoid a common source of
-- new temporaries and save some compile time.  This can be quite
-- significant - see test T4801.
95 96


97 98 99 100 101
allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
  let (args, offsets) = unzip args_w_offsets
  cmm_args <- mapM getArgAmode args     -- No void args
  allocDynClosureCmm mb_id info_tbl lf_info
                     use_cc _blame_cc (zip cmm_args offsets)
102 103


104 105 106 107 108 109
allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
  -- SAY WHAT WE ARE ABOUT TO DO
  let rep = cit_rep info_tbl
  tickyDynAlloc mb_id rep lf_info
  let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
  allocHeapClosure rep info_ptr use_cc amodes_w_offsets
110 111


112 113 114 115 116
-- | Low-level heap object allocation.
allocHeapClosure
  :: SMRep                            -- ^ representation of the object
  -> CmmExpr                          -- ^ info pointer
  -> CmmExpr                          -- ^ cost centre
117
  -> [(CmmExpr,ByteOff)]              -- ^ payload
118 119
  -> FCode CmmExpr                    -- ^ returns the address of the object
allocHeapClosure rep info_ptr use_cc payload = do
120 121
  profDynAlloc rep use_cc

122
  virt_hp <- getVirtHp
123

124 125 126 127 128 129
  -- Find the offset of the info-ptr word
  let info_offset = virt_hp + 1
            -- info_offset is the VirtualHpOffset of the first
            -- word of the new object
            -- Remember, virtHp points to last allocated word,
            -- ie 1 *before* the info-ptr word of new object.
130

131
  base <- getHpRelOffset info_offset
132
  emitComment $ mkFastString "allocHeapClosure"
133 134 135 136 137 138 139 140 141 142
  emitSetDynHdr base info_ptr use_cc

  -- Fill in the fields
  hpStore base payload

  -- Bump the virtual heap pointer
  dflags <- getDynFlags
  setVirtHp (virt_hp + heapClosureSizeW dflags rep)

  return base
143

144 145

emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
146
emitSetDynHdr base info_ptr ccs
147
  = do dflags <- getDynFlags
148
       hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
149
  where
150 151
    header :: DynFlags -> [CmmExpr]
    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
Jan Stolarek's avatar
Jan Stolarek committed
152
        -- ToDof: Parallel stuff
153
        -- No ticky header
154 155

-- Store the item (expr,off) in base[off]
156
hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
157 158 159
hpStore base vals = do
  dflags <- getDynFlags
  sequence_ $
160
    [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
161 162

-----------------------------------------------------------
163
--              Layout of static closures
164 165 166 167 168
-----------------------------------------------------------

-- Make a static closure, adding on any extra padding needed for CAFs,
-- and adding a static link field if necessary.

169
mkStaticClosureFields
170 171
        :: DynFlags
        -> CmmInfoTable
172
        -> CostCentreStack
173
        -> CafInfo
174 175
        -> [CmmLit]             -- Payload
        -> [CmmLit]             -- The full closure
176 177
mkStaticClosureFields dflags info_tbl ccs caf_refs payload
  = mkStaticClosure dflags info_lbl ccs payload padding
178
        static_link_field saved_info_field
179
  where
Simon Marlow's avatar
Simon Marlow committed
180
    info_lbl = cit_lbl info_tbl
181 182 183 184 185 186 187 188 189

    -- CAFs must have consistent layout, regardless of whether they
    -- are actually updatable or not.  The layout of a CAF is:
    --
    --        3 saved_info
    --        2 static_link
    --        1 indirectee
    --        0 info ptr
    --
Simon Marlow's avatar
Simon Marlow committed
190 191 192
    -- the static_link and saved_info fields must always be in the
    -- same place.  So we use isThunkRep rather than closureUpdReqd
    -- here:
193

Simon Marlow's avatar
Simon Marlow committed
194
    is_caf = isThunkRep (cit_rep info_tbl)
195

196
    padding
197 198
        | is_caf && null payload = [mkIntCLit dflags 0]
        | otherwise = []
199 200

    static_link_field
201
        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
Simon Marlow's avatar
Simon Marlow committed
202 203 204
        = [static_link_value]
        | otherwise
        = []
205 206

    saved_info_field
207
        | is_caf     = [mkIntCLit dflags 0]
208
        | otherwise  = []
209

210
        -- For a static constructor which has NoCafRefs, we set the
211 212
        -- static link field to a non-zero value so the garbage
        -- collector will ignore it.
213
    static_link_value
214
        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
215 216 217
        | otherwise                = mkIntCLit dflags 3  -- No CAF refs
                                      -- See Note [STATIC_LINK fields]
                                      -- in rts/sm/Storage.h
218

219
mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
220
  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
221
mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
222
  =  [CmmLabel info_lbl]
Jan Stolarek's avatar
Jan Stolarek committed
223
  ++ staticProfHdr dflags ccs
224
  ++ concatMap (padLitToWord dflags) payload
225
  ++ padding
226 227 228
  ++ static_link_field
  ++ saved_info_field

229
-- JD: Simon had elided this padding, but without it the C back end asserts
230
-- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
231 232 233
padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
padLitToWord dflags lit = lit : padding pad_length
  where width = typeWidth (cmmLitType dflags lit)
234
        pad_length = wORD_SIZE dflags - widthInBytes width :: Int
235 236 237 238 239 240 241

        padding n | n <= 0 = []
                  | n `rem` 2 /= 0 = CmmInt 0 W8  : padding (n-1)
                  | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
                  | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
                  | otherwise      = CmmInt 0 W64 : padding (n-8)

242
-----------------------------------------------------------
243
--              Heap overflow checking
244 245 246 247 248 249 250 251 252 253 254 255
-----------------------------------------------------------

{- Note [Heap checks]
   ~~~~~~~~~~~~~~~~~~
Heap checks come in various forms.  We provide the following entry
points to the runtime system, all of which use the native C-- entry
convention.

  * gc() performs garbage collection and returns
    nothing to its caller

  * A series of canned entry points like
256
        r = gc_1p( r )
257 258
    where r is a pointer.  This performs gc, and
    then returns its argument r to its caller.
259

260
  * A series of canned entry points like
261
        gcfun_2p( f, x, y )
262 263 264 265 266 267 268 269 270
    where f is a function closure of arity 2
    This performs garbage collection, keeping alive the
    three argument ptrs, and then tail-calls f(x,y)

These are used in the following circumstances

* entryHeapCheck: Function entry
    (a) With a canned GC entry sequence
        f( f_clo, x:ptr, y:ptr ) {
271 272 273
             Hp = Hp+8
             if Hp > HpLim goto L
             ...
274 275 276
          L: HpAlloc = 8
             jump gcfun_2p( f_clo, x, y ) }
     Note the tail call to the garbage collector;
277
     it should do no register shuffling
278 279 280

    (b) No canned sequence
        f( f_clo, x:ptr, y:ptr, ...etc... ) {
281 282 283
          T: Hp = Hp+8
             if Hp > HpLim goto L
             ...
284
          L: HpAlloc = 8
285 286
             call gc()  -- Needs an info table
             goto T }
287 288

* altHeapCheck: Immediately following an eval
289 290
  Started as
        case f x y of r { (p,q) -> rhs }
291 292 293
  (a) With a canned sequence for the results of f
       (which is the very common case since
       all boxed cases return just one pointer
294 295 296 297 298 299
           ...
           r = f( x, y )
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
300

301 302
        L: r = gc_1p( r )
           goto K }
303

304 305 306 307
        Here, the info table needed by the call
        to gc_1p should be the *same* as the
        one for the call to f; the C-- optimiser
        spots this sharing opportunity)
308 309 310

   (b) No canned sequence for results of f
       Note second info table
311 312 313 314 315 316
           ...
           (r1,r2,r3) = call f( x, y )
        K:
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
317

318 319
        L: call gc()    -- Extra info table here
           goto K
320 321 322

* generalHeapCheck: Anywhere else
  e.g. entry to thunk
323
       case branch *not* following eval,
324 325 326
       or let-no-escape
  Exactly the same as the previous case:

327 328 329 330
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...
331

332 333
        L: call gc()
           goto K
334 335 336 337 338
-}

--------------------------------------------------------------
-- A heap/stack check at a function or thunk entry point.

339 340 341 342 343 344
entryHeapCheck :: ClosureInfo
               -> Maybe LocalReg -- Function (closure environment)
               -> Int            -- Arity -- not same as len args b/c of voids
               -> [LocalReg]     -- Non-void args (empty for thunk)
               -> FCode ()
               -> FCode ()
345

346
entryHeapCheck cl_info nodeSet arity args code
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
  = entryHeapCheck' is_fastf node arity args code
  where
    node = case nodeSet of
              Just r  -> CmmReg (CmmLocal r)
              Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)

    is_fastf = case closureFunInfo cl_info of
                 Just (_, ArgGen _) -> False
                 _otherwise         -> True

-- | lower-level version for CmmParse
entryHeapCheck' :: Bool           -- is a known function pattern
                -> CmmExpr        -- expression for the closure pointer
                -> Int            -- Arity -- not same as len args b/c of voids
                -> [LocalReg]     -- Non-void args (empty for thunk)
                -> FCode ()
                -> FCode ()
entryHeapCheck' is_fastf node arity args code
365 366
  = do dflags <- getDynFlags
       let is_thunk = arity == 0
367

368
           args' = map (CmmReg . CmmLocal) args
369 370 371 372 373 374 375
           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)

           {- Thunks:          jump stg_gc_enter_1

              Function (fast): call (NativeNode) stg_gc_fun(fun, args)

376
              Function (slow): call (slow) stg_gc_fun(fun, args)
377 378 379
           -}
           gc_call upd
               | is_thunk
380
                 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
381 382

               | is_fastf
383
                 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
384 385

               | otherwise
386
                 = mkJump dflags Slow stg_gc_fun (node : args') upd
387 388

       updfr_sz <- getUpdFrameOff
389

390
       loop_id <- newBlockId
391
       emitLabel loop_id
392
       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
393

394 395
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
396

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411

-- If there are multiple alts and we need to GC, but don't have a
-- continuation already (the scrut was simple), then we should
-- pre-generate the continuation.  (if there are multiple alts it is
-- always a canned GC point).

-- altHeapCheck:
-- If we have a return continuation,
--   then if it is a canned GC pattern,
--           then we do mkJumpReturnsTo
--           else we do a normal call to stg_gc_noregs
--   else if it is a canned GC pattern,
--           then generate the continuation and do mkCallReturnsTo
--           else we do a normal call to stg_gc_noregs

412
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
413 414 415 416
altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code

altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
altOrNoEscapeHeapCheck checkYield regs code = do
417 418
    dflags <- getDynFlags
    case cannedGCEntryPoint dflags regs of
419
      Nothing -> genericGC checkYield code
420
      Just gc -> do
421
        lret <- newBlockId
422
        let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
423
        lcont <- newBlockId
Peter Wortmann's avatar
Peter Wortmann committed
424 425
        tscope <- getTickScope
        emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
426
        emitLabel lcont
427
        cannedGCReturnsTo checkYield False gc regs lret off code
428 429 430

altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
altHeapCheckReturnsTo regs lret off code
431 432
  = do dflags <- getDynFlags
       case cannedGCEntryPoint dflags regs of
433 434 435 436 437 438 439 440
           Nothing -> genericGC False code
           Just gc -> cannedGCReturnsTo False True gc regs lret off code

-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
-- is more efficient), but cannot be optimized away in the non-allocating
-- case because it may occur in a loop
noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
441

442
cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
443 444
                  -> FCode a
                  -> FCode a
445
cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
446 447
  = do dflags <- getDynFlags
       updfr_sz <- getUpdFrameOff
448
       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
449
  where
450
    reg_exprs = map (CmmReg . CmmLocal) regs
451
      -- Note [stg_gc arguments]
452

453 454 455 456
      -- NB. we use the NativeReturn convention for passing arguments
      -- to the canned heap-check routines, because we are in a case
      -- alternative and hence the [LocalReg] was passed to us in the
      -- NativeReturn convention.
457
    gc_call dflags label sp
458 459 460 461
      | cont_on_stack
      = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
      | otherwise
      = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
462

463 464
genericGC :: Bool -> FCode a -> FCode a
genericGC checkYield code
465
  = do updfr_sz <- getUpdFrameOff
466
       lretry <- newBlockId
467
       emitLabel lretry
468
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
469
       heapCheck False checkYield (call <*> mkBranch lretry) code
470

471 472
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
473
  = case map localRegType regs of
474
      []  -> Just (mkGcLabel "stg_gc_noregs")
475
      [ty]
476 477 478 479 480
          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
          | isFloatType ty -> case width of
                                  W32       -> Just (mkGcLabel "stg_gc_f1")
                                  W64       -> Just (mkGcLabel "stg_gc_d1")
                                  _         -> Nothing
481

482 483 484
          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
          | otherwise                 -> Nothing
485 486
          where
              width = typeWidth ty
487 488 489 490 491 492 493 494 495 496 497 498
      [ty1,ty2]
          |  isGcPtrType ty1
          && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
      [ty1,ty2,ty3]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
      [ty1,ty2,ty3,ty4]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3
          && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
499
      _otherwise -> Nothing
500

501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
-- Note [stg_gc arguments]
-- It might seem that we could avoid passing the arguments to the
-- stg_gc function, because they are already in the right registers.
-- While this is usually the case, it isn't always.  Sometimes the
-- code generator has cleverly avoided the eval in a case, e.g. in
-- ffi/should_run/4221.hs we found
--
--   case a_r1mb of z
--     FunPtr x y -> ...
--
-- where a_r1mb is bound a top-level constructor, and is known to be
-- evaluated.  The codegen just assigns x, y and z, and continues;
-- R1 is never assigned.
--
-- So we'll have to rely on optimisations to eliminatethese
-- assignments where possible.

518

519 520
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
521
generic_gc = mkGcLabel "stg_gc_noregs"
522 523

-- | Create a CLabel for calling a garbage collector entry point
524
mkGcLabel :: String -> CmmExpr
525
mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
526 527

-------------------------------
528 529
heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
heapCheck checkStack checkYield do_gc code
530
  = getHeapUsage $ \ hpHw ->
531 532
    -- Emit heap checks, but be sure to do it lazily so
    -- that the conditionals on hpHw don't cause a black hole
533 534
    do  { dflags <- getDynFlags
        ; let mb_alloc_bytes
535 536 537 538 539 540 541
                 | hpHw > mBLOCK_SIZE = sorry $ unlines
                    [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
                     "",
                     "This is currently not possible due to a limitation of GHC's code generator.",
                     "See http://hackage.haskell.org/trac/ghc/ticket/4505 for details.",
                     "Suggestion: read data from a file instead of having large static data",
                     "structures in code."]
542 543
                 | hpHw > 0  = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
                 | otherwise = Nothing
544
                 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
545 546 547
              stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
                      | otherwise  = Nothing
        ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
nfrisby's avatar
nfrisby committed
548
        ; tickyAllocHeap True hpHw
549 550
        ; setRealHp hpHw
        ; code }
551

552 553 554
heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
heapStackCheckGen stk_hwm mb_bytes
  = do updfr_sz <- getUpdFrameOff
555
       lretry <- newBlockId
556 557
       emitLabel lretry
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
558
       do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
559

560 561
-- Note [Single stack check]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~
562 563 564
-- When compiling a function we can determine how much stack space it
-- will use. We therefore need to perform only a single stack check at
-- the beginning of a function to see if we have enough stack space.
565
--
566 567 568 569 570
-- The check boils down to comparing Sp-N with SpLim, where N is the
-- amount of stack space needed (see Note [Stack usage] below).  *BUT*
-- at this stage of the pipeline we are not supposed to refer to Sp
-- itself, because the stack is not yet manifest, so we don't quite
-- know where Sp pointing.
571 572 573 574 575

-- So instead of referring directly to Sp - as we used to do in the
-- past - the code generator uses (old + 0) in the stack check. That
-- is the address of the first word of the old area, so if we add N
-- we'll get the address of highest used word.
576
--
577 578 579 580 581 582 583 584 585 586 587 588
-- This makes the check robust.  For example, while we need to perform
-- only one stack check for each function, we could in theory place
-- more stack checks later in the function. They would be redundant,
-- but not incorrect (in a sense that they should not change program
-- behaviour). We need to make sure however that a stack check
-- inserted after incrementing the stack pointer checks for a
-- respectively smaller stack space. This would not be the case if the
-- code generator produced direct references to Sp. By referencing
-- (old + 0) we make sure that we always check for a correct amount of
-- stack: when converting (old + 0) to Sp the stack layout phase takes
-- into account changes already made to stack pointer. The idea for
-- this change came from observations made while debugging #8275.
589

590 591 592 593 594 595
-- Note [Stack usage]
-- ~~~~~~~~~~~~~~~~~~
-- At the moment we convert from STG to Cmm we don't know N, the
-- number of bytes of stack that the function will use, so we use a
-- special late-bound CmmLit, namely
--       CmmHighStackMark
596
-- to stand for the number of bytes needed. When the stack is made
597 598 599
-- manifest, the number of bytes needed is calculated, and used to
-- replace occurrences of CmmHighStackMark
--
600
-- The (Maybe CmmExpr) passed to do_checks is usually
601 602 603 604 605
--     Just (CmmLit CmmHighStackMark)
-- but can also (in certain hand-written RTS functions)
--     Just (CmmLit 8)  or some other fixed valuet
-- If it is Nothing, we don't generate a stack check at all.

606
do_checks :: Maybe CmmExpr    -- Should we check the stack?
607 608
                              -- See Note [Stack usage]
          -> Bool             -- Should we check for preemption?
609
          -> Maybe CmmExpr    -- Heap headroom (bytes)
610
          -> CmmAGraph        -- What to do on failure
611
          -> FCode ()
612
do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
613
  dflags <- getDynFlags
614
  gc_id <- newBlockId
615

616
  let
617 618 619
    Just alloc_lit = mb_alloc_lit

    bump_hp   = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
620

621 622 623
    -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
    -- At the beginning of a function old + 0 = Sp
    -- See Note [Single stack check]
624 625
    sp_oflo sp_hwm =
         CmmMachOp (mo_wordULt dflags)
626
                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
627
                             [CmmStackSlot Old 0, sp_hwm],
628 629 630 631 632 633
                   CmmReg spLimReg]

    -- Hp overflow if (Hp > HpLim)
    -- (Hp has been incremented by now)
    -- HpLim points to the LAST WORD of valid allocation space.
    hp_oflo = CmmMachOp (mo_wordUGt dflags)
634
                  [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
635

636
    alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
637

638 639
  case mb_stk_hwm of
    Nothing -> return ()
640 641
    Just stk_hwm -> tickyStackCheck
      >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
642

643 644 645 646 647 648 649 650 651
  -- Emit new label that might potentially be a header
  -- of a self-recursive tail call.
  -- See Note [Self-recursive loop header].
  self_loop_info <- getSelfLoop
  case self_loop_info of
    Just (_, loop_header_id, _)
        | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
    _otherwise -> return ()

652
  if (isJust mb_alloc_lit)
653
    then do
654
     tickyHeapCheck
655
     emitAssign hpReg bump_hp
656
     emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
657
    else do
658
      when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
659 660 661 662
         -- Yielding if HpLim == 0
         let yielding = CmmMachOp (mo_wordEq dflags)
                                  [CmmReg (CmmGlobal HpLim),
                                   CmmLit (zeroCLit dflags)]
663
         emit =<< mkCmmIfGoto' yielding gc_id (Just False)
664

Peter Wortmann's avatar
Peter Wortmann committed
665 666 667
  tscope <- getTickScope
  emitOutOfLine gc_id
   (do_gc, tscope) -- this is expected to jump back somewhere
668

669 670 671 672 673 674
                -- Test for stack pointer exhaustion, then
                -- bump heap pointer, and test for heap exhaustion
                -- Note that we don't move the heap pointer unless the
                -- stack check succeeds.  Otherwise we might end up
                -- with slop at the end of the current block, which can
                -- confuse the LDV profiler.
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698

-- Note [Self-recursive loop header]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Self-recursive loop header is required by loopification optimization (See
-- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
--
--  1. There is information about self-loop in the FCode environment. We don't
--     check the binder (first component of the self_loop_info) because we are
--     certain that if the self-loop info is present then we are compiling the
--     binder body. Reason: the only possible way to get here with the
--     self_loop_info present is from closureCodeBody.
--
--  2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
--     to preempt the heap check (see #367 for motivation behind this check). It
--     is True for heap checks placed at the entry to a function and
--     let-no-escape heap checks but false for other heap checks (eg. in case
--     alternatives or created from hand-written high-level Cmm). The second
--     check (isJust mb_stk_hwm) is true for heap checks at the entry to a
--     function and some heap checks created in hand-written Cmm. Otherwise it
--     is Nothing. In other words the only situation when both conditions are
--     true is when compiling stack and heap checks at the entry to a
--     function. This is the only situation when we want to emit a self-loop
--     label.