StgCmmHeap.hs 24 KB
Newer Older
1 2 3 4 5 6 7 8 9
-----------------------------------------------------------------------------
--
-- Stg to C--: heap management functions
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------

module StgCmmHeap (
10 11
        getVirtHp, setVirtHp, setRealHp,
        getHpRelOffset, hpRel,
12

13
        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14 15
        heapStackCheckGen,
        entryHeapCheck',
16

17 18
        mkVirtHeapOffsets, mkVirtConstrOffsets,
        mkStaticClosureFields, mkStaticClosure,
19

20
        allocDynClosure, allocDynClosureCmm,
21
        emitSetDynHdr
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
    ) where

#include "HsVersions.h"

import StgSyn
import CLabel
import StgCmmLayout
import StgCmmUtils
import StgCmmMonad
import StgCmmProf
import StgCmmTicky
import StgCmmGran
import StgCmmClosure
import StgCmmEnv

37
import MkGraph
38

39
import Hoopl
40
import SMRep
41
import Cmm
42 43 44
import CmmUtils
import CostCentre
import Outputable
45
import IdInfo( CafInfo(..), mayHaveCafRefs )
46
import Module
47
import DynFlags
48
import FastString( mkFastString, fsLit )
49
import Util
50

51
import Control.Monad (when)
52
import Data.Maybe (isJust)
53

54
-----------------------------------------------------------
55
--              Initialise dynamic heap objects
56 57 58
-----------------------------------------------------------

allocDynClosure
Simon Marlow's avatar
Simon Marlow committed
59 60
        :: CmmInfoTable
        -> LambdaFormInfo
61 62 63 64 65 66 67
        -> CmmExpr              -- Cost Centre to stick in the object
        -> CmmExpr              -- Cost Centre to blame for this alloc
                                -- (usually the same; sometimes "OVERHEAD")

        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
                                                -- ie Info ptr has offset zero.
                                                -- No void args in here
68
        -> FCode CmmExpr -- returns Hp+n
69

70 71 72 73 74
allocDynClosureCmm
        :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
        -> [(CmmExpr, VirtualHpOffset)]
        -> FCode CmmExpr -- returns Hp+n

75
-- allocDynClosure allocates the thing in the heap,
76
-- and modifies the virtual Hp to account for this.
77 78 79
-- The second return value is the graph that sets the value of the
-- returned LocalReg, which should point to the closure after executing
-- the graph.
80

81 82 83 84 85 86 87 88 89 90
-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
-- only valid until Hp is changed.  The caller should assign the
-- result to a LocalReg if it is required to remain live.
--
-- The reason we don't assign it to a LocalReg here is that the caller
-- is often about to call regIdInfo, which immediately assigns the
-- result of allocDynClosure to a new temp in order to add the tag.
-- So by not generating a LocalReg here we avoid a common source of
-- new temporaries and save some compile time.  This can be quite
-- significant - see test T4801.
91 92


Simon Marlow's avatar
Simon Marlow committed
93
allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets
94 95
  = do  { let (args, offsets) = unzip args_w_offsets
        ; cmm_args <- mapM getArgAmode args     -- No void args
96
        ; allocDynClosureCmm info_tbl lf_info
Simon Marlow's avatar
Simon Marlow committed
97
                             use_cc _blame_cc (zip cmm_args offsets)
98 99
        }

Simon Marlow's avatar
Simon Marlow committed
100
allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
101 102 103
  = do  { virt_hp <- getVirtHp

        -- SAY WHAT WE ARE ABOUT TO DO
Simon Marlow's avatar
Simon Marlow committed
104 105 106
        ; let rep = cit_rep info_tbl
        ; tickyDynAlloc rep lf_info
        ; profDynAlloc rep use_cc
107 108 109 110 111 112 113 114

        -- FIND THE OFFSET OF THE INFO-PTR WORD
        ; let   info_offset = virt_hp + 1
                -- info_offset is the VirtualHpOffset of the first
                -- word of the new object
                -- Remember, virtHp points to last allocated word,
                -- ie 1 *before* the info-ptr word of new object.

Simon Marlow's avatar
Simon Marlow committed
115
                info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
116 117 118

        -- ALLOCATE THE OBJECT
        ; base <- getHpRelOffset info_offset
119
        ; emitComment $ mkFastString "allocDynClosure"
120 121 122 123 124
        ; emitSetDynHdr base info_ptr  use_cc
        ; let (cmm_args, offsets) = unzip amodes_w_offsets
        ; hpStore base cmm_args offsets

        -- BUMP THE VIRTUAL HEAP POINTER
125 126
        ; dflags <- getDynFlags
        ; setVirtHp (virt_hp + heapClosureSize dflags rep)
127

128 129
        ; getHpRelOffset info_offset
        }
130 131

emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
132
emitSetDynHdr base info_ptr ccs
133 134
  = do dflags <- getDynFlags
       hpStore base (header dflags) [0..]
135
  where
136 137
    header :: DynFlags -> [CmmExpr]
    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
138 139 140
        -- ToDo: Gransim stuff
        -- ToDo: Parallel stuff
        -- No ticky header
141 142 143 144

hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
-- Store the item (expr,off) in base[off]
hpStore base vals offs
145 146 147
  = do dflags <- getDynFlags
       let mk_store val off = mkStore (cmmOffsetW dflags base off) val
       emit (catAGraphs (zipWith mk_store vals offs))
148 149 150


-----------------------------------------------------------
151
--              Layout of static closures
152 153 154 155 156
-----------------------------------------------------------

-- Make a static closure, adding on any extra padding needed for CAFs,
-- and adding a static link field if necessary.

157
mkStaticClosureFields
158 159
        :: DynFlags
        -> CmmInfoTable
160
        -> CostCentreStack
161
        -> CafInfo
162 163
        -> [CmmLit]             -- Payload
        -> [CmmLit]             -- The full closure
164 165
mkStaticClosureFields dflags info_tbl ccs caf_refs payload
  = mkStaticClosure dflags info_lbl ccs payload padding
166
        static_link_field saved_info_field
167
  where
Simon Marlow's avatar
Simon Marlow committed
168
    info_lbl = cit_lbl info_tbl
169 170 171 172 173 174 175 176 177

    -- CAFs must have consistent layout, regardless of whether they
    -- are actually updatable or not.  The layout of a CAF is:
    --
    --        3 saved_info
    --        2 static_link
    --        1 indirectee
    --        0 info ptr
    --
Simon Marlow's avatar
Simon Marlow committed
178 179 180
    -- the static_link and saved_info fields must always be in the
    -- same place.  So we use isThunkRep rather than closureUpdReqd
    -- here:
181

Simon Marlow's avatar
Simon Marlow committed
182
    is_caf = isThunkRep (cit_rep info_tbl)
183

184 185
    padding
        | not is_caf = []
186
        | otherwise  = ASSERT(null payload) [mkIntCLit dflags 0]
187 188

    static_link_field
189
        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
Simon Marlow's avatar
Simon Marlow committed
190 191 192
        = [static_link_value]
        | otherwise
        = []
193 194

    saved_info_field
195
        | is_caf     = [mkIntCLit dflags 0]
196
        | otherwise  = []
197

198
        -- For a static constructor which has NoCafRefs, we set the
199 200
        -- static link field to a non-zero value so the garbage
        -- collector will ignore it.
201
    static_link_value
202 203
        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
        | otherwise                = mkIntCLit dflags 1  -- No CAF refs
204 205


206
mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
207
  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
208
mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
209 210
  =  [CmmLabel info_lbl]
  ++ variable_header_words
211
  ++ concatMap (padLitToWord dflags) payload
212
  ++ padding
213 214 215 216
  ++ static_link_field
  ++ saved_info_field
  where
    variable_header_words
217 218
        =  staticGranHdr
        ++ staticParHdr
219
        ++ staticProfHdr dflags ccs
220
        ++ staticTickyHdr
221

222 223
-- JD: Simon had ellided this padding, but without it the C back end asserts
-- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
224 225 226
padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
padLitToWord dflags lit = lit : padding pad_length
  where width = typeWidth (cmmLitType dflags lit)
227
        pad_length = wORD_SIZE dflags - widthInBytes width :: Int
228 229 230 231 232 233 234

        padding n | n <= 0 = []
                  | n `rem` 2 /= 0 = CmmInt 0 W8  : padding (n-1)
                  | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
                  | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
                  | otherwise      = CmmInt 0 W64 : padding (n-8)

235
-----------------------------------------------------------
236
--              Heap overflow checking
237 238 239 240 241 242 243 244 245 246 247 248
-----------------------------------------------------------

{- Note [Heap checks]
   ~~~~~~~~~~~~~~~~~~
Heap checks come in various forms.  We provide the following entry
points to the runtime system, all of which use the native C-- entry
convention.

  * gc() performs garbage collection and returns
    nothing to its caller

  * A series of canned entry points like
249
        r = gc_1p( r )
250 251
    where r is a pointer.  This performs gc, and
    then returns its argument r to its caller.
252

253
  * A series of canned entry points like
254
        gcfun_2p( f, x, y )
255 256 257 258 259 260 261 262 263
    where f is a function closure of arity 2
    This performs garbage collection, keeping alive the
    three argument ptrs, and then tail-calls f(x,y)

These are used in the following circumstances

* entryHeapCheck: Function entry
    (a) With a canned GC entry sequence
        f( f_clo, x:ptr, y:ptr ) {
264 265 266
             Hp = Hp+8
             if Hp > HpLim goto L
             ...
267 268 269
          L: HpAlloc = 8
             jump gcfun_2p( f_clo, x, y ) }
     Note the tail call to the garbage collector;
270
     it should do no register shuffling
271 272 273

    (b) No canned sequence
        f( f_clo, x:ptr, y:ptr, ...etc... ) {
274 275 276
          T: Hp = Hp+8
             if Hp > HpLim goto L
             ...
277
          L: HpAlloc = 8
278 279
             call gc()  -- Needs an info table
             goto T }
280 281

* altHeapCheck: Immediately following an eval
282 283
  Started as
        case f x y of r { (p,q) -> rhs }
284 285 286
  (a) With a canned sequence for the results of f
       (which is the very common case since
       all boxed cases return just one pointer
287 288 289 290 291 292
           ...
           r = f( x, y )
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
293

294 295
        L: r = gc_1p( r )
           goto K }
296

297 298 299 300
        Here, the info table needed by the call
        to gc_1p should be the *same* as the
        one for the call to f; the C-- optimiser
        spots this sharing opportunity)
301 302 303

   (b) No canned sequence for results of f
       Note second info table
304 305 306 307 308 309
           ...
           (r1,r2,r3) = call f( x, y )
        K:
           Hp = Hp+8
           if Hp > HpLim goto L
           ...code for rhs...
310

311 312
        L: call gc()    -- Extra info table here
           goto K
313 314 315

* generalHeapCheck: Anywhere else
  e.g. entry to thunk
316
       case branch *not* following eval,
317 318 319
       or let-no-escape
  Exactly the same as the previous case:

320 321 322 323
        K:      -- K needs an info table
           Hp = Hp+8
           if Hp > HpLim goto L
           ...
324

325 326
        L: call gc()
           goto K
327 328 329 330 331
-}

--------------------------------------------------------------
-- A heap/stack check at a function or thunk entry point.

332 333 334 335 336 337
entryHeapCheck :: ClosureInfo
               -> Maybe LocalReg -- Function (closure environment)
               -> Int            -- Arity -- not same as len args b/c of voids
               -> [LocalReg]     -- Non-void args (empty for thunk)
               -> FCode ()
               -> FCode ()
338

339
entryHeapCheck cl_info nodeSet arity args code
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
  = entryHeapCheck' is_fastf node arity args code
  where
    node = case nodeSet of
              Just r  -> CmmReg (CmmLocal r)
              Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)

    is_fastf = case closureFunInfo cl_info of
                 Just (_, ArgGen _) -> False
                 _otherwise         -> True

-- | lower-level version for CmmParse
entryHeapCheck' :: Bool           -- is a known function pattern
                -> CmmExpr        -- expression for the closure pointer
                -> Int            -- Arity -- not same as len args b/c of voids
                -> [LocalReg]     -- Non-void args (empty for thunk)
                -> FCode ()
                -> FCode ()
entryHeapCheck' is_fastf node arity args code
358 359
  = do dflags <- getDynFlags
       let is_thunk = arity == 0
360 361

           args' = map (CmmReg . CmmLocal) args
362 363 364 365 366 367 368
           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)

           {- Thunks:          jump stg_gc_enter_1

              Function (fast): call (NativeNode) stg_gc_fun(fun, args)

369
              Function (slow): call (slow) stg_gc_fun(fun, args)
370 371 372
           -}
           gc_call upd
               | is_thunk
373
                 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
374 375

               | is_fastf
376
                 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
377 378

               | otherwise
379
                 = mkJump dflags Slow stg_gc_fun (node : args') upd
380 381

       updfr_sz <- getUpdFrameOff
382 383 384

       loop_id <- newLabelC
       emitLabel loop_id
385
       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
386

387 388
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
389

390 391 392 393 394 395 396 397 398 399 400 401 402 403 404

-- If there are multiple alts and we need to GC, but don't have a
-- continuation already (the scrut was simple), then we should
-- pre-generate the continuation.  (if there are multiple alts it is
-- always a canned GC point).

-- altHeapCheck:
-- If we have a return continuation,
--   then if it is a canned GC pattern,
--           then we do mkJumpReturnsTo
--           else we do a normal call to stg_gc_noregs
--   else if it is a canned GC pattern,
--           then generate the continuation and do mkCallReturnsTo
--           else we do a normal call to stg_gc_noregs

405
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
406 407 408 409
altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code

altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
altOrNoEscapeHeapCheck checkYield regs code = do
410 411
    dflags <- getDynFlags
    case cannedGCEntryPoint dflags regs of
412
      Nothing -> genericGC checkYield code
413 414
      Just gc -> do
        lret <- newLabelC
415
        let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
416 417 418
        lcont <- newLabelC
        emitOutOfLine lret (copyin <*> mkBranch lcont)
        emitLabel lcont
419
        cannedGCReturnsTo checkYield False gc regs lret off code
420 421 422

altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
altHeapCheckReturnsTo regs lret off code
423 424
  = do dflags <- getDynFlags
       case cannedGCEntryPoint dflags regs of
425 426 427 428 429 430 431 432
           Nothing -> genericGC False code
           Just gc -> cannedGCReturnsTo False True gc regs lret off code

-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
-- is more efficient), but cannot be optimized away in the non-allocating
-- case because it may occur in a loop
noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
433

434
cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
435 436
                  -> FCode a
                  -> FCode a
437
cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
438 439
  = do dflags <- getDynFlags
       updfr_sz <- getUpdFrameOff
440
       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
441 442
  where
    reg_exprs = map (CmmReg . CmmLocal) regs
443
      -- Note [stg_gc arguments]
444

445 446 447 448
      -- NB. we use the NativeReturn convention for passing arguments
      -- to the canned heap-check routines, because we are in a case
      -- alternative and hence the [LocalReg] was passed to us in the
      -- NativeReturn convention.
449
    gc_call dflags label sp
450 451 452 453
      | cont_on_stack
      = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
      | otherwise
      = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
454

455 456
genericGC :: Bool -> FCode a -> FCode a
genericGC checkYield code
457 458 459
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
460
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
461
       heapCheck False checkYield (call <*> mkBranch lretry) code
462

463 464
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
465
  = case map localRegType regs of
466
      []  -> Just (mkGcLabel "stg_gc_noregs")
467
      [ty]
468 469 470 471 472 473
          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
          | isFloatType ty -> case width of
                                  W32       -> Just (mkGcLabel "stg_gc_f1")
                                  W64       -> Just (mkGcLabel "stg_gc_d1")
                                  _         -> Nothing
        
474 475 476
          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
          | otherwise                 -> Nothing
477 478
          where
              width = typeWidth ty
479 480 481 482 483 484 485 486 487 488 489 490
      [ty1,ty2]
          |  isGcPtrType ty1
          && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
      [ty1,ty2,ty3]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
      [ty1,ty2,ty3,ty4]
          |  isGcPtrType ty1
          && isGcPtrType ty2
          && isGcPtrType ty3
          && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
491
      _otherwise -> Nothing
492

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
-- Note [stg_gc arguments]
-- It might seem that we could avoid passing the arguments to the
-- stg_gc function, because they are already in the right registers.
-- While this is usually the case, it isn't always.  Sometimes the
-- code generator has cleverly avoided the eval in a case, e.g. in
-- ffi/should_run/4221.hs we found
--
--   case a_r1mb of z
--     FunPtr x y -> ...
--
-- where a_r1mb is bound a top-level constructor, and is known to be
-- evaluated.  The codegen just assigns x, y and z, and continues;
-- R1 is never assigned.
--
-- So we'll have to rely on optimisations to eliminatethese
-- assignments where possible.

510

511 512
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
513
generic_gc = mkGcLabel "stg_gc_noregs"
514 515

-- | Create a CLabel for calling a garbage collector entry point
516 517
mkGcLabel :: String -> CmmExpr
mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
518 519

-------------------------------
520 521
heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
heapCheck checkStack checkYield do_gc code
522
  = getHeapUsage $ \ hpHw ->
523 524
    -- Emit heap checks, but be sure to do it lazily so
    -- that the conditionals on hpHw don't cause a black hole
525 526 527 528 529 530 531
    do  { dflags <- getDynFlags
        ; let mb_alloc_bytes
                 | hpHw > 0  = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
                 | otherwise = Nothing
              stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
                      | otherwise  = Nothing
        ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
532 533 534 535
        ; tickyAllocHeap hpHw
        ; doGranAllocate hpHw
        ; setRealHp hpHw
        ; code }
536

537 538 539 540 541 542 543 544 545
heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
heapStackCheckGen stk_hwm mb_bytes
  = do updfr_sz <- getUpdFrameOff
       lretry <- newLabelC
       emitLabel lretry
       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
       do_checks stk_hwm False  mb_bytes (call <*> mkBranch lretry)

do_checks :: Maybe CmmExpr    -- Should we check the stack?
546
          -> Bool       -- Should we check for preemption?
547
          -> Maybe CmmExpr    -- Heap headroom (bytes)
548
          -> CmmAGraph  -- What to do on failure
549
          -> FCode ()
550
do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
551
  dflags <- getDynFlags
552 553
  gc_id <- newLabelC

554
  let
555 556 557
    Just alloc_lit = mb_alloc_lit

    bump_hp   = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
558 559

    -- Sp overflow if (Sp - CmmHighStack < SpLim)
560 561
    sp_oflo sp_hwm =
         CmmMachOp (mo_wordULt dflags)
562
                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
563
                             [CmmReg spReg, sp_hwm],
564 565 566 567 568 569
                   CmmReg spLimReg]

    -- Hp overflow if (Hp > HpLim)
    -- (Hp has been incremented by now)
    -- HpLim points to the LAST WORD of valid allocation space.
    hp_oflo = CmmMachOp (mo_wordUGt dflags)
570
                  [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
571

572
    alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
573

574 575 576
  case mb_stk_hwm of
    Nothing -> return ()
    Just stk_hwm -> emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id
577

578
  if (isJust mb_alloc_lit)
579
    then do
580 581
     emitAssign hpReg bump_hp
     emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
582
    else do
ian@well-typed.com's avatar
ian@well-typed.com committed
583
      when (not (gopt Opt_OmitYields dflags) && checkYield) $ do
584 585 586 587 588
         -- Yielding if HpLim == 0
         let yielding = CmmMachOp (mo_wordEq dflags)
                                  [CmmReg (CmmGlobal HpLim),
                                   CmmLit (zeroCLit dflags)]
         emit =<< mkCmmIfGoto yielding gc_id
589 590

  emitOutOfLine gc_id $
591 592
     do_gc -- this is expected to jump back somewhere

593 594 595 596 597 598
                -- Test for stack pointer exhaustion, then
                -- bump heap pointer, and test for heap exhaustion
                -- Note that we don't move the heap pointer unless the
                -- stack check succeeds.  Otherwise we might end up
                -- with slop at the end of the current block, which can
                -- confuse the LDV profiler.
599 600 601 602 603 604 605 606 607 608

{-

{- Unboxed tuple alternatives and let-no-escapes (the two most annoying
constructs to generate code for!)  For unboxed tuple returns, there
are an arbitrary number of possibly unboxed return values, some of
which will be in registers, and the others will be on the stack.  We
always organise the stack-resident fields into pointers &
non-pointers, and pass the number of each to the heap check code. -}

609 610 611 612 613 614 615
unbxTupleHeapCheck
        :: [(Id, GlobalReg)]    -- Live registers
        -> WordOff      -- no. of stack slots containing ptrs
        -> WordOff      -- no. of stack slots containing nonptrs
        -> CmmAGraph    -- code to insert in the failure path
        -> FCode ()
        -> FCode ()
616 617

unbxTupleHeapCheck regs ptrs nptrs fail_code code
618
  -- We can't manage more than 255 pointers/non-pointers
619 620
  -- in a generic heap check.
  | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
621
  | otherwise
622
  = initHeapUsage $ \ hpHw -> do
623 624 625 626 627
        { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
                                    full_fail_code rts_label
                        ; tickyAllocHeap hpHw }
        ; setRealHp hpHw
        ; code }
628 629
  where
    full_fail_code  = fail_code `plusStmts` oneStmt assign_liveness
630 631 632 633
    assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9))      -- Ho ho ho!
                                (CmmLit (mkWordCLit liveness))
    liveness        = mkRegLiveness regs ptrs nptrs
    rts_label       = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
634 635


636
{- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
For GrAnSim the code for doing a heap check and doing a context switch
has been separated. Especially, the HEAP_CHK macro only performs a
heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
every slow entry code in order to simulate the fetching of
closures. If fetching is necessary (i.e. current closure is not local)
then an automatic context switch is done. -}


When failing a check, we save a return address on the stack and
jump to a pre-compiled code fragment that saves the live registers
and returns to the scheduler.

The return address in most cases will be the beginning of the basic
block in which the check resides, since we need to perform the check
again on re-entry because someone else might have stolen the resource
in the meantime.

%************************************************************************
656
%*                                                                      *
657
     Generic Heap/Stack Checks - used in the RTS
658
%*                                                                      *
659 660 661 662 663 664 665 666
%************************************************************************

\begin{code}
hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
hpChkGen bytes liveness reentry
  = do_checks' bytes True assigns stg_gc_gen
  where
    assigns = mkStmts [
667 668 669
                CmmAssign (CmmGlobal (VanillaReg 9))  liveness,
                CmmAssign (CmmGlobal (VanillaReg 10)) reentry
                ]
670 671 672 673 674 675 676 677 678 679 680 681

-- a heap check where R1 points to the closure to enter on return, and
-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
hpChkNodePointsAssignSp0 bytes sp0
  = do_checks' bytes True assign stg_gc_enter1
  where assign = oneStmt (CmmStore (CmmReg spReg) sp0)

stg_gc_gen    = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
\end{code}

-}