CmmLayoutStack.hs 36.9 KB
Newer Older
Simon Marlow's avatar
Simon Marlow committed
1 2
{-# LANGUAGE RecordWildCards, GADTs #-}
module CmmLayoutStack (
3
       cmmLayoutStack, setInfoTableStackMap
Simon Marlow's avatar
Simon Marlow committed
4 5
  ) where

6 7 8
import StgCmmUtils      ( callerSaveVolatileRegs ) -- XXX
import StgCmmForeign    ( saveThreadState, loadThreadState ) -- XXX

Simon Marlow's avatar
Simon Marlow committed
9 10
import Cmm
import BlockId
11
import CLabel
Simon Marlow's avatar
Simon Marlow committed
12
import CmmUtils
13 14 15
import MkGraph
import Module
import ForeignCall
Simon Marlow's avatar
Simon Marlow committed
16 17 18
import CmmLive
import CmmProcPoint
import SMRep
19
import Hoopl
Simon Marlow's avatar
Simon Marlow committed
20 21 22 23 24 25
import Constants
import UniqSupply
import Maybes
import UniqFM
import Util

26
import DynFlags
Simon Marlow's avatar
Simon Marlow committed
27 28 29 30 31
import FastString
import Outputable
import qualified Data.Set as Set
import Control.Monad.Fix
import Data.Array as Array
32
import Data.Bits
33
import Data.List (nub)
Simon Marlow's avatar
Simon Marlow committed
34
import Control.Monad (liftM)
Simon Marlow's avatar
Simon Marlow committed
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

#include "HsVersions.h"


data StackSlot = Occupied | Empty
     -- Occupied: a return address or part of an update frame

instance Outputable StackSlot where
  ppr Occupied = ptext (sLit "XXX")
  ppr Empty    = ptext (sLit "---")

-- All stack locations are expressed as positive byte offsets from the
-- "base", which is defined to be the address above the return address
-- on the stack on entry to this CmmProc.
--
-- Lower addresses have higher StackLocs.
--
type StackLoc = ByteOff

54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
{-
 A StackMap describes the stack at any given point.  At a continuation
 it has a particular layout, like this:

         |             | <- base
         |-------------|
         |     ret0    | <- base + 8
         |-------------|
         .  upd frame  . <- base + sm_ret_off
         |-------------|
         |             |
         .    vars     .
         . (live/dead) .
         |             | <- base + sm_sp - sm_args
         |-------------|
         |    ret1     |
         .  ret vals   . <- base + sm_sp    (<--- Sp points here)
         |-------------|

Why do we include the final return address (ret0) in our stack map?  I
have absolutely no idea, but it seems to be done that way consistently
in the rest of the code generator, so I played along here. --SDM

Note that we will be constructing an info table for the continuation
(ret1), which needs to describe the stack down to, but not including,
the update frame (or ret0, if there is no update frame).
-}

Simon Marlow's avatar
Simon Marlow committed
82 83 84 85 86 87 88 89
data StackMap = StackMap
 {  sm_sp   :: StackLoc
       -- ^ the offset of Sp relative to the base on entry
       -- to this block.
 ,  sm_args :: ByteOff
       -- ^ the number of bytes of arguments in the area for this block
       -- Defn: the offset of young(L) relative to the base is given by
       -- (sm_sp - sm_args) of the StackMap for block L.
90 91 92
 ,  sm_ret_off :: ByteOff
       -- ^ Number of words of stack that we do not describe with an info
       -- table, because it contains an update frame.
Simon Marlow's avatar
Simon Marlow committed
93 94 95 96 97 98 99 100 101 102 103 104
 ,  sm_regs :: UniqFM (LocalReg,StackLoc)
       -- ^ regs on the stack
 }

instance Outputable StackMap where
  ppr StackMap{..} =
     text "Sp = " <> int sm_sp $$
     text "sm_args = " <> int sm_args $$
     text "sm_ret_off = " <> int sm_ret_off $$
     text "sm_regs = " <> ppr (eltsUFM sm_regs)


105
cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
106
               -> UniqSM (CmmGraph, BlockEnv StackMap)
107
cmmLayoutStack dflags procpoints entry_args
108
               graph0@(CmmGraph { g_entry = entry })
Simon Marlow's avatar
Simon Marlow committed
109
  = do
110
    -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
111 112 113 114 115 116 117

    -- We need liveness info.  We could do removeDeadAssignments at
    -- the same time, but it buys nothing over doing cmmSink later,
    -- and costs a lot more than just cmmLiveness.
    -- (graph, liveness) <- removeDeadAssignments graph0
    let (graph, liveness) = (graph0, cmmLiveness graph0)

118
    -- pprTrace "liveness" (ppr liveness) $ return ()
Simon Marlow's avatar
Simon Marlow committed
119 120
    let blocks = postorderDfs graph

121
    (final_stackmaps, _final_high_sp, new_blocks) <-
Simon Marlow's avatar
Simon Marlow committed
122
          mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
123
            layout dflags procpoints liveness entry entry_args
Simon Marlow's avatar
Simon Marlow committed
124 125
                   rec_stackmaps rec_high_sp blocks

126
    new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks
127

128 129
    -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
    return (ofBlockList entry new_blocks', final_stackmaps)
Simon Marlow's avatar
Simon Marlow committed
130 131 132



133 134
layout :: DynFlags
       -> BlockSet                      -- proc points
Simon Marlow's avatar
Simon Marlow committed
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
       -> BlockEnv CmmLive              -- liveness
       -> BlockId                       -- entry
       -> ByteOff                       -- stack args on entry

       -> BlockEnv StackMap             -- [final] stack maps
       -> ByteOff                       -- [final] Sp high water mark

       -> [CmmBlock]                    -- [in] blocks

       -> UniqSM
          ( BlockEnv StackMap           -- [out] stack maps
          , ByteOff                     -- [out] Sp high water mark
          , [CmmBlock]                  -- [out] new blocks
          )

150
layout dflags procpoints liveness entry entry_args final_stackmaps final_hwm blocks
Simon Marlow's avatar
Simon Marlow committed
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
  = go blocks init_stackmap entry_args []
  where
    (updfr, cont_info)  = collectContInfo blocks

    init_stackmap = mapSingleton entry StackMap{ sm_sp   = entry_args
                                               , sm_args = entry_args
                                               , sm_ret_off = updfr
                                               , sm_regs = emptyUFM
                                               }

    go [] acc_stackmaps acc_hwm acc_blocks
      = return (acc_stackmaps, acc_hwm, acc_blocks)

    go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
      = do
       let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
    
       let stack0@StackMap { sm_sp = sp0 }
               = mapFindWithDefault
                     (pprPanic "no stack map for" (ppr entry_lbl))
                     entry_lbl acc_stackmaps
    
173
       -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
174
    
Simon Marlow's avatar
Simon Marlow committed
175 176
       -- (a) Update the stack map to include the effects of
       --     assignments in this block
Simon Marlow's avatar
Simon Marlow committed
177 178
       let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
    
Simon Marlow's avatar
Simon Marlow committed
179 180
       -- (b) Insert assignments to reload all the live variables if this
       --     block is a proc point
Simon Marlow's avatar
Simon Marlow committed
181 182 183 184
       let middle1 = if entry_lbl `setMember` procpoints
                        then foldr blockCons middle0 (insertReloads stack0)
                        else middle0
    
Simon Marlow's avatar
Simon Marlow committed
185 186 187 188 189
       -- (c) Look at the last node and if we are making a call or
       --     jumping to a proc point, we must save the live
       --     variables, adjust Sp, and construct the StackMaps for
       --     each of the successor blocks.  See handleLastNode for
       --     details.
190
       (middle2, sp_off, last1, fixup_blocks, out)
191
           <- handleLastNode dflags procpoints liveness cont_info
Simon Marlow's avatar
Simon Marlow committed
192
                             acc_stackmaps stack1 middle0 last0
Simon Marlow's avatar
Simon Marlow committed
193
    
194
       -- pprTrace "layout(out)" (ppr out) $ return ()
195

Simon Marlow's avatar
Simon Marlow committed
196 197 198
       -- (d) Manifest Sp: run over the nodes in the block and replace
       --     CmmStackSlot with CmmLoad from Sp with a concrete offset.
       --
199 200 201 202 203 204
       -- our block:
       --    middle1          -- the original middle nodes
       --    middle2          -- live variable saves from handleLastNode
       --    Sp = Sp + sp_off -- Sp adjustment goes here
       --    last1            -- the last node
       --
Simon Marlow's avatar
Simon Marlow committed
205
       let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
206

Simon Marlow's avatar
Simon Marlow committed
207 208 209 210 211 212
           sp_high = final_hwm - entry_args
              -- The stack check value is adjusted by the Sp offset on
              -- entry to the proc, which is entry_args.  We are
              -- assuming that we only do a stack check at the
              -- beginning of a proc, and we don't modify Sp before the
              -- check.
213

214
           final_blocks = manifestSp dflags final_stackmaps stack0 sp0 sp_high entry0
215
                              middle_pre sp_off last1 fixup_blocks
216

217
           acc_stackmaps' = mapUnion acc_stackmaps out
218

219 220 221 222 223 224 225 226 227 228 229
           -- If this block jumps to the GC, then we do not take its
           -- stack usage into account for the high-water mark.
           -- Otherwise, if the only stack usage is in the stack-check
           -- failure block itself, we will do a redundant stack
           -- check.  The stack has a buffer designed to accommodate
           -- the largest amount of stack needed for calling the GC.
           --
           this_sp_hwm | isGcJump last0 = 0
                       | otherwise      = sp0 - sp_off

           hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
230

231
       go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
232 233


234 235 236 237 238 239 240 241
-- -----------------------------------------------------------------------------

-- Not foolproof, but GCFun is the culprit we most want to catch
isGcJump :: CmmNode O C -> Bool
isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
  = l == GCFun || l == GCEnter1
isGcJump _something_else = False

Simon Marlow's avatar
Simon Marlow committed
242
-- -----------------------------------------------------------------------------
243

Simon Marlow's avatar
Simon Marlow committed
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
-- This doesn't seem right somehow.  We need to find out whether this
-- proc will push some update frame material at some point, so that we
-- can avoid using that area of the stack for spilling.  The
-- updfr_space field of the CmmProc *should* tell us, but it doesn't
-- (I think maybe it gets filled in later when we do proc-point
-- splitting).
--
-- So we'll just take the max of all the cml_ret_offs.  This could be
-- unnecessarily pessimistic, but probably not in the code we
-- generate.

collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
collectContInfo blocks
  = (maximum ret_offs, mapFromList (catMaybes mb_argss))
 where
  (mb_argss, ret_offs) = mapAndUnzip get_cont blocks

  get_cont b =
     case lastNode b of
        CmmCall { cml_cont = Just l, .. }
           -> (Just (l, cml_ret_args), cml_ret_off)
        CmmForeignCall { .. }
           -> (Just (succ, 0), updfr) -- ??
        _other -> (Nothing, 0)


Simon Marlow's avatar
Simon Marlow committed
270 271
-- -----------------------------------------------------------------------------
-- Updating the StackMap from middle nodes
Simon Marlow's avatar
Simon Marlow committed
272

Simon Marlow's avatar
Simon Marlow committed
273
-- Look for loads from stack slots, and update the StackMap.  This is
274
-- purely for optimisation reasons, so that we can avoid saving a
Simon Marlow's avatar
Simon Marlow committed
275 276 277 278 279 280 281
-- variable back to a different stack slot if it is already on the
-- stack.
--
-- This happens a lot: for example when function arguments are passed
-- on the stack and need to be immediately saved across a call, we
-- want to just leave them where they are on the stack.
--
Simon Marlow's avatar
Simon Marlow committed
282 283 284
procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
procMiddle stackmaps node sm
  = case node of
285
     CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
Simon Marlow's avatar
Simon Marlow committed
286 287 288 289 290 291 292 293 294 295 296 297 298 299
       -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
        where loc = getStackLoc area off stackmaps
     CmmAssign (CmmLocal r) _other
       -> sm { sm_regs = delFromUFM (sm_regs sm) r }
     _other
       -> sm

getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
getStackLoc Old       n _         = n
getStackLoc (Young l) n stackmaps =
  case mapLookup l stackmaps of
    Nothing -> pprPanic "getStackLoc" (ppr l)
    Just sm -> sm_sp sm - sm_args sm + n

Simon Marlow's avatar
Simon Marlow committed
300

Simon Marlow's avatar
Simon Marlow committed
301 302 303
-- -----------------------------------------------------------------------------
-- Handling stack allocation for a last node

304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
-- We take a single last node and turn it into:
--
--    C1 (some statements)
--    Sp = Sp + N
--    C2 (some more statements)
--    call f()          -- the actual last node
--
-- plus possibly some more blocks (we may have to add some fixup code
-- between the last node and the continuation).
--
-- C1: is the code for saving the variables across this last node onto
-- the stack, if the continuation is a call or jumps to a proc point.
--
-- C2: if the last node is a safe foreign call, we have to inject some
-- extra code that goes *after* the Sp adjustment.

Simon Marlow's avatar
Simon Marlow committed
320
handleLastNode
321
   :: DynFlags -> ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
Simon Marlow's avatar
Simon Marlow committed
322
   -> BlockEnv StackMap -> StackMap
Simon Marlow's avatar
Simon Marlow committed
323
   -> Block CmmNode O O
Simon Marlow's avatar
Simon Marlow committed
324 325
   -> CmmNode O C
   -> UniqSM
326 327
      ( [CmmNode O O]      -- nodes to go *before* the Sp adjustment
      , ByteOff            -- amount to adjust Sp
Simon Marlow's avatar
Simon Marlow committed
328 329
      , CmmNode O C        -- new last node
      , [CmmBlock]         -- new blocks
330
      , BlockEnv StackMap  -- stackmaps for the continuations
Simon Marlow's avatar
Simon Marlow committed
331 332
      )

333
handleLastNode dflags procpoints liveness cont_info stackmaps
Simon Marlow's avatar
Simon Marlow committed
334
               stack0@StackMap { sm_sp = sp0 } middle last
Simon Marlow's avatar
Simon Marlow committed
335 336 337 338 339 340
 = case last of
    --  At each return / tail call,
    --  adjust Sp to point to the last argument pushed, which
    --  is cml_args, after popping any other junk from the stack.
    CmmCall{ cml_cont = Nothing, .. } -> do
      let sp_off = sp0 - cml_args
341
      return ([], sp_off, last, [], mapEmpty)
Simon Marlow's avatar
Simon Marlow committed
342 343

    --  At each CmmCall with a continuation:
344
    CmmCall{ cml_cont = Just cont_lbl, .. } ->
Simon Marlow's avatar
Simon Marlow committed
345
       return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
346

347
    CmmForeignCall{ succ = cont_lbl, .. } -> do
Simon Marlow's avatar
Simon Marlow committed
348
       return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0)
349
            -- one word each for args and results: the return address
350

351 352 353
    CmmBranch{..}     ->  handleBranches
    CmmCondBranch{..} ->  handleBranches
    CmmSwitch{..}     ->  handleBranches
354 355

  where
Simon Marlow's avatar
Simon Marlow committed
356 357 358 359 360 361 362 363 364 365 366 367 368
     -- Calls and ForeignCalls are handled the same way:
     lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
              -> ( [CmmNode O O]
                 , ByteOff
                 , CmmNode O C
                 , [CmmBlock]
                 , BlockEnv StackMap
                 )
     lastCall lbl cml_args cml_ret_args cml_ret_off
      =  ( assignments
         , spOffsetForCall sp0 cont_stack cml_args
         , last
         , [] -- no new blocks
369
         , mapSingleton lbl cont_stack )
Simon Marlow's avatar
Simon Marlow committed
370
      where
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
         (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off


     prepareStack lbl cml_ret_args cml_ret_off
       | Just cont_stack <- mapLookup lbl stackmaps
             -- If we have already seen this continuation before, then
             -- we just have to make the stack look the same:
       = (fixupStack stack0 cont_stack, cont_stack)
             -- Otherwise, we have to allocate the stack frame
       | otherwise
       = (save_assignments, new_cont_stack)
       where
        (new_cont_stack, save_assignments)
           = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0


387
     -- For other last nodes (branches), if any of the targets is a
Simon Marlow's avatar
Simon Marlow committed
388 389 390
     -- proc point, we have to set up the stack to match what the proc
     -- point is expecting.
     --
391
     handleBranches :: UniqSM ( [CmmNode O O]
Simon Marlow's avatar
Simon Marlow committed
392 393
                                , ByteOff
                                , CmmNode O C
394 395
                                , [CmmBlock]
                                , BlockEnv StackMap )
Simon Marlow's avatar
Simon Marlow committed
396

397
     handleBranches
398 399
         -- Note [diamond proc point]
       | Just l <- futureContinuation middle
Simon Marlow's avatar
Simon Marlow committed
400
       , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
401 402 403 404 405 406 407 408 409 410
       = do
         let cont_args = mapFindWithDefault 0 l cont_info
             (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
             out = mapFromList [ (l', cont_stack)
                               | l' <- successors last ]
         return ( assigs
                , spOffsetForCall sp0 cont_stack wORD_SIZE
                , last
                , []
                , out)
Simon Marlow's avatar
Simon Marlow committed
411

Simon Marlow's avatar
Simon Marlow committed
412
        | otherwise = do
413
          pps <- mapM handleBranch (successors last)
Simon Marlow's avatar
Simon Marlow committed
414 415
          let lbl_map :: LabelMap Label
              lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
416
              fix_lbl l = mapFindWithDefault l l lbl_map
Simon Marlow's avatar
Simon Marlow committed
417 418 419
          return ( []
                 , 0
                 , mapSuccessors fix_lbl last
420 421
                 , concat [ blk | (_,_,_,blk) <- pps ]
                 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
Simon Marlow's avatar
Simon Marlow committed
422

423 424 425 426 427 428 429 430 431
     -- For each successor of this block
     handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
     handleBranch l
        --   (a) if the successor already has a stackmap, we need to
        --       shuffle the current stack to make it look the same.
        --       We have to insert a new block to make this happen.
        | Just stack2 <- mapLookup l stackmaps
        = do
             let assigs = fixupStack stack0 stack2
432
             (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
433 434 435 436 437 438 439 440
             return (l, tmp_lbl, stack2, block)

        --   (b) if the successor is a proc point, save everything
        --       on the stack.
        | l `setMember` procpoints
        = do
             let cont_args = mapFindWithDefault 0 l cont_info
                 (stack2, assigs) =
441 442
                      --pprTrace "first visit to proc point"
                      --             (ppr l <+> ppr stack1) $
443
                      setupStackFrame l liveness (sm_ret_off stack0)
Simon Marlow's avatar
Simon Marlow committed
444
                                                       cont_args stack0
445
             --
446
             (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
447 448 449 450 451 452 453 454 455 456 457 458 459
             return (l, tmp_lbl, stack2, block)

        --   (c) otherwise, the current StackMap is the StackMap for
        --       the continuation.  But we must remember to remove any
        --       variables from the StackMap that are *not* live at
        --       the destination, because this StackMap might be used
        --       by fixupStack if this is a join point.
        | otherwise = return (l, l, stack1, [])
        where live = mapFindWithDefault (panic "handleBranch") l liveness
              stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
              is_live (r,_) = r `elemRegSet` live


460 461 462
makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap -> [CmmNode O O]
               -> UniqSM (Label, [CmmBlock])
makeFixupBlock dflags sp0 l stack assigs
463 464 465 466 467
  | null assigs && sp0 == sm_sp stack = return (l, [])
  | otherwise = do
    tmp_lbl <- liftM mkBlockId $ getUniqueM
    let sp_off = sp0 - sm_sp stack
        block = blockJoin (CmmEntry tmp_lbl)
468
                          (maybeAddSpAdj dflags sp_off (blockFromList assigs))
469 470
                          (CmmBranch l)
    return (tmp_lbl, [block])
Simon Marlow's avatar
Simon Marlow committed
471 472 473 474 475 476 477 478 479 480


-- Sp is currently pointing to current_sp,
-- we want it to point to
--    (sm_sp cont_stack - sm_args cont_stack + args)
-- so the difference is
--    sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
spOffsetForCall current_sp cont_stack args
  = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
Simon Marlow's avatar
Simon Marlow committed
481 482 483 484 485 486 487


-- | create a sequence of assignments to establish the new StackMap,
-- given the old StackMap.
fixupStack :: StackMap -> StackMap -> [CmmNode O O]
fixupStack old_stack new_stack = concatMap move new_locs
 where
Simon Marlow's avatar
Simon Marlow committed
488
     old_map  = sm_regs old_stack
Simon Marlow's avatar
Simon Marlow committed
489 490 491
     new_locs = stackSlotRegs new_stack

     move (r,n)
Simon Marlow's avatar
Simon Marlow committed
492
       | Just (_,m) <- lookupUFM old_map r, n == m = []
Simon Marlow's avatar
Simon Marlow committed
493 494 495
       | otherwise = [CmmStore (CmmStackSlot Old n)
                               (CmmReg (CmmLocal r))]

Simon Marlow's avatar
Simon Marlow committed
496 497 498 499 500 501 502 503 504 505 506


setupStackFrame
             :: BlockId                 -- label of continuation
             -> BlockEnv CmmLive        -- liveness
             -> ByteOff      -- updfr
             -> ByteOff      -- bytes of return values on stack
             -> StackMap     -- current StackMap
             -> (StackMap, [CmmNode O O])

setupStackFrame lbl liveness updfr_off ret_args stack0
Simon Marlow's avatar
Simon Marlow committed
507
  = (cont_stack, assignments)
Simon Marlow's avatar
Simon Marlow committed
508 509 510 511 512 513 514 515 516 517 518 519 520 521
  where
      -- get the set of LocalRegs live in the continuation
      live = mapFindWithDefault Set.empty lbl liveness

      -- the stack from the base to updfr_off is off-limits.
      -- our new stack frame contains:
      --   * saved live variables
      --   * the return address [young(C) + 8]
      --   * the args for the call,
      --     which are replaced by the return values at the return
      --     point.

      -- everything up to updfr_off is off-limits
      -- stack1 contains updfr_off, plus everything we need to save
Simon Marlow's avatar
Simon Marlow committed
522
      (stack1, assignments) = allocate updfr_off live stack0
Simon Marlow's avatar
Simon Marlow committed
523 524 525 526 527 528 529 530 531

      -- And the Sp at the continuation is:
      --   sm_sp stack1 + ret_args
      cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
                         , sm_args = ret_args
                         , sm_ret_off = updfr_off
                         }


532 533 534 535 536 537
-- -----------------------------------------------------------------------------
-- Note [diamond proc point]
--
-- This special case looks for the pattern we get from a typical
-- tagged case expression:
--
Simon Marlow's avatar
Simon Marlow committed
538 539
--    Sp[young(L1)] = L1
--    if (R1 & 7) != 0 goto L1 else goto L2
540
--  L2:
Simon Marlow's avatar
Simon Marlow committed
541
--    call [R1] returns to L1
542
--  L1: live: {y}
Simon Marlow's avatar
Simon Marlow committed
543
--    x = R1
544 545 546
--
-- If we let the generic case handle this, we get
--
Simon Marlow's avatar
Simon Marlow committed
547 548
--    Sp[-16] = L1
--    if (R1 & 7) != 0 goto L1a else goto L2
549
--  L2:
Simon Marlow's avatar
Simon Marlow committed
550 551 552
--    Sp[-8] = y
--    Sp = Sp - 16
--    call [R1] returns to L1
553
--  L1a:
Simon Marlow's avatar
Simon Marlow committed
554 555 556
--    Sp[-8] = y
--    Sp = Sp - 16
--    goto L1
557
--  L1:
Simon Marlow's avatar
Simon Marlow committed
558
--    x = R1
559 560
--
-- The code for saving the live vars is duplicated in each branch, and
Simon Marlow's avatar
Simon Marlow committed
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
-- furthermore there is an extra jump in the fast path (assuming L1 is
-- a proc point, which it probably is if there is a heap check).
--
-- So to fix this we want to set up the stack frame before the
-- conditional jump.  How do we know when to do this, and when it is
-- safe?  The basic idea is, when we see the assignment
-- 
--   Sp[young(L)] = L
-- 
-- we know that
--   * we are definitely heading for L
--   * there can be no more reads from another stack area, because young(L)
--     overlaps with it.
--
-- We don't necessarily know that everything live at L is live now
-- (some might be assigned between here and the jump to L).  So we
-- simplify and only do the optimisation when we see
578 579 580 581 582
--
--   (1) a block containing an assignment of a return address L
--   (2) ending in a branch where one (and only) continuation goes to L,
--       and no other continuations go to proc points.
--
Simon Marlow's avatar
Simon Marlow committed
583 584
-- then we allocate the stack frame for L at the end of the block,
-- before the branch.
585 586 587 588 589 590
--
-- We could generalise (2), but that would make it a bit more
-- complicated to handle, and this currently catches the common case.

futureContinuation :: Block CmmNode O O -> Maybe BlockId
futureContinuation middle = foldBlockNodesB f middle Nothing
591 592
   where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
         f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
593 594 595 596 597 598 599 600 601 602 603 604 605 606
               = Just l
         f _ r = r

-- -----------------------------------------------------------------------------
-- Saving live registers

-- | Given a set of live registers and a StackMap, save all the registers
-- on the stack and return the new StackMap and the assignments to do
-- the saving.
--
allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
allocate ret_off live stackmap@StackMap{ sm_sp = sp0
                                       , sm_regs = regs0 }
 =
607
  -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693

   -- we only have to save regs that are not already in a slot
   let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
       regs1   = filterUFM (\(r,_) -> elemRegSet r live) regs0
   in

   -- make a map of the stack
   let stack = reverse $ Array.elems $
               accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
                 ret_words ++ live_words
            where ret_words =
                   [ (x, Occupied)
                   | x <- [ 1 .. toWords ret_off] ]
                  live_words =
                   [ (toWords x, Occupied)
                   | (r,off) <- eltsUFM regs1,
                     let w = localRegBytes r,
                     x <- [ off, off-wORD_SIZE .. off - w + 1] ]
   in

   -- Pass over the stack: find slots to save all the new live variables,
   -- choosing the oldest slots first (hence a foldr).
   let
       save slot ([], stack, n, assigs, regs) -- no more regs to save
          = ([], slot:stack, n `plusW` 1, assigs, regs)
       save slot (to_save, stack, n, assigs, regs)
          = case slot of
               Occupied ->  (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
               Empty
                 | Just (stack', r, to_save') <-
                       select_save to_save (slot:stack)
                 -> let assig = CmmStore (CmmStackSlot Old n')
                                         (CmmReg (CmmLocal r))
                        n' = n `plusW` 1
                   in
                        (to_save', stack', n', assig : assigs, (r,(r,n')):regs)

                 | otherwise
                 -> (to_save, slot:stack, n `plusW` 1, assigs, regs)

       -- we should do better here: right now we'll fit the smallest first,
       -- but it would make more sense to fit the biggest first.
       select_save :: [LocalReg] -> [StackSlot]
                   -> Maybe ([StackSlot], LocalReg, [LocalReg])
       select_save regs stack = go regs []
         where go []     _no_fit = Nothing
               go (r:rs) no_fit
                 | Just rest <- dropEmpty words stack
                 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
                 | otherwise
                 = go rs (r:no_fit)
                 where words = localRegWords r

       -- fill in empty slots as much as possible
       (still_to_save, save_stack, n, save_assigs, save_regs)
          = foldr save (to_save, [], 0, [], []) stack

       -- push any remaining live vars on the stack
       (push_sp, push_assigs, push_regs)
          = foldr push (n, [], []) still_to_save
          where
              push r (n, assigs, regs)
                = (n', assig : assigs, (r,(r,n')) : regs)
                where
                  n' = n + localRegBytes r
                  assig = CmmStore (CmmStackSlot Old n')
                                   (CmmReg (CmmLocal r))

       trim_sp
          | not (null push_regs) = push_sp
          | otherwise
          = n `plusW` (- length (takeWhile isEmpty save_stack))

       final_regs = regs1 `addListToUFM` push_regs
                          `addListToUFM` save_regs

   in
  -- XXX should be an assert
   if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else

   if (trim_sp .&. (wORD_SIZE - 1)) /= 0  then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else

   ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
   , push_assigs ++ save_assigs )


Simon Marlow's avatar
Simon Marlow committed
694
-- -----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
695
-- Manifesting Sp
Simon Marlow's avatar
Simon Marlow committed
696

Simon Marlow's avatar
Simon Marlow committed
697 698 699 700 701 702 703 704 705 706 707 708 709
-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp.  The
-- block looks like this:
--
--    middle_pre       -- the middle nodes
--    Sp = Sp + sp_off -- Sp adjustment goes here
--    last             -- the last node
--
-- And we have some extra blocks too (that don't contain Sp adjustments)
--
-- The adjustment for middle_pre will be different from that for
-- middle_post, because the Sp adjustment intervenes.
--
manifestSp
710 711
   :: DynFlags
   -> BlockEnv StackMap  -- StackMaps for other blocks
Simon Marlow's avatar
Simon Marlow committed
712 713 714 715 716 717 718 719 720 721
   -> StackMap           -- StackMap for this block
   -> ByteOff            -- Sp on entry to the block
   -> ByteOff            -- SpHigh
   -> CmmNode C O        -- first node
   -> [CmmNode O O]      -- middle
   -> ByteOff            -- sp_off
   -> CmmNode O C        -- last node
   -> [CmmBlock]         -- new blocks
   -> [CmmBlock]         -- final blocks with Sp manifest

722
manifestSp dflags stackmaps stack0 sp0 sp_high
Simon Marlow's avatar
Simon Marlow committed
723 724 725 726 727 728
           first middle_pre sp_off last fixup_blocks
  = final_block : fixup_blocks'
  where
    area_off = getAreaOff stackmaps

    adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
729 730
    adj_pre_sp  = mapExpDeep (areaToSp dflags sp0            sp_high area_off)
    adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
Simon Marlow's avatar
Simon Marlow committed
731

732
    final_middle = maybeAddSpAdj dflags sp_off $
Simon Marlow's avatar
Simon Marlow committed
733 734 735 736 737 738 739 740 741
                   blockFromList $
                   map adj_pre_sp $
                   elimStackStores stack0 stackmaps area_off $
                   middle_pre

    final_last    = optStackCheck (adj_post_sp last)

    final_block   = blockJoin first final_middle final_last

742
    fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
Simon Marlow's avatar
Simon Marlow committed
743 744 745 746 747 748 749 750 751 752


getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
getAreaOff _ Old = 0
getAreaOff stackmaps (Young l) =
  case mapLookup l stackmaps of
    Just sm -> sm_sp sm - sm_args sm
    Nothing -> pprPanic "getAreaOff" (ppr l)


753 754 755 756
maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
maybeAddSpAdj _      0      block = block
maybeAddSpAdj dflags sp_off block
   = block `blockSnoc` CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off)
Simon Marlow's avatar
Simon Marlow committed
757 758 759


{-
Simon Marlow's avatar
Simon Marlow committed
760 761 762 763 764 765
Sp(L) is the Sp offset on entry to block L relative to the base of the
OLD area.

SpArgs(L) is the size of the young area for L, i.e. the number of
arguments.

766
 - in block L, each reference to [old + N] turns into
Simon Marlow's avatar
Simon Marlow committed
767 768
   [Sp + Sp(L) - N]

769
 - in block L, each reference to [young(L') + N] turns into
Simon Marlow's avatar
Simon Marlow committed
770 771 772 773 774 775
   [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]

 - be careful with the last node of each block: Sp has already been adjusted
   to be Sp + Sp(L) - Sp(L')
-}

776 777 778
areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) =
  cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
779 780 781 782 783 784
areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) = mkIntExpr dflags sp_hwm
areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _)  -- Note [null stack check]
                          [CmmMachOp (MO_Sub _)
                                  [ CmmReg (CmmGlobal Sp)
                                  , CmmLit (CmmInt 0 _)],
                           CmmReg (CmmGlobal SpLim)]) = zeroExpr dflags
785
areaToSp _ _ _ _ other = other
Simon Marlow's avatar
Simon Marlow committed
786

Simon Marlow's avatar
Simon Marlow committed
787
-- -----------------------------------------------------------------------------
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
-- Note [null stack check]
--
-- If the high-water Sp is zero, then we end up with
--
--   if (Sp - 0 < SpLim) then .. else ..
--
-- and possibly some dead code for the failure case.  Optimising this
-- away depends on knowing that SpLim <= Sp, so it is really the job
-- of the stack layout algorithm, hence we do it now.  This is also
-- convenient because control-flow optimisation later will drop the
-- dead code.

optStackCheck :: CmmNode O C -> CmmNode O C
optStackCheck n = -- Note [null stack check]
 case n of
   CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
   other -> other

Simon Marlow's avatar
Simon Marlow committed
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835

-- -----------------------------------------------------------------------------

-- | Eliminate stores of the form
--
--    Sp[area+n] = r
--
-- when we know that r is already in the same slot as Sp[area+n].  We
-- could do this in a later optimisation pass, but that would involve
-- a separate analysis and we already have the information to hand
-- here.  It helps clean up some extra stack stores in common cases.
--
-- Note that we may have to modify the StackMap as we walk through the
-- code using procMiddle, since an assignment to a variable in the
-- StackMap will invalidate its mapping there.
--
elimStackStores :: StackMap
                -> BlockEnv StackMap
                -> (Area -> ByteOff)
                -> [CmmNode O O]
                -> [CmmNode O O]
elimStackStores stackmap stackmaps area_off nodes
  = go stackmap nodes
  where
    go _stackmap [] = []
    go stackmap (n:ns)
     = case n of
         CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
            | Just (_,off) <- lookupUFM (sm_regs stackmap) r
            , area_off area + m == off
836 837
            -> -- pprTrace "eliminated a node!" (ppr r) $
               go stackmap ns
Simon Marlow's avatar
Simon Marlow committed
838 839 840 841
         _otherwise
            -> n : go (procMiddle stackmaps n stackmap) ns


842 843 844 845 846
-- -----------------------------------------------------------------------------
-- Update info tables to include stack liveness


setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
847 848
setInfoTableStackMap stackmaps (CmmProc top_info@TopInfo{..} l g)
  = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l g
849
  where
850 851 852
    fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
       info_tbl { cit_rep = StackRep (get_liveness lbl) }
    fix_info _ other = other
853 854 855 856

    get_liveness :: BlockId -> Liveness
    get_liveness lbl
      = case mapLookup lbl stackmaps of
857
          Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
858 859
          Just sm -> stackMapToLiveness sm

860 861 862
setInfoTableStackMap _ d = d


863 864 865 866 867 868 869 870 871 872
stackMapToLiveness :: StackMap -> Liveness
stackMapToLiveness StackMap{..} =
   reverse $ Array.elems $
        accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
                                     toWords (sm_sp - sm_args)) live_words
   where
     live_words =  [ (toWords off, False)
                   | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]


873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
-- -----------------------------------------------------------------------------
-- Lowering safe foreign calls

{-
Note [lower safe foreign calls]

We start with

   Sp[young(L1)] = L1
 ,-----------------------
 | r1 = foo(x,y,z) returns to L1
 '-----------------------
 L1:
   R1 = r1 -- copyIn, inserted by mkSafeCall
   ...

the stack layout algorithm will arrange to save and reload everything
live across the call.  Our job now is to expand the call so we get

   Sp[young(L1)] = L1
 ,-----------------------
 | SAVE_THREAD_STATE()
 | token = suspendThread(BaseReg, interruptible)
 | r = foo(x,y,z)
 | BaseReg = resumeThread(token)
 | LOAD_THREAD_STATE()
 | R1 = r  -- copyOut
900
 | jump Sp[0]
901 902 903 904 905 906 907 908 909
 '-----------------------
 L1:
   r = R1 -- copyIn, inserted by mkSafeCall
   ...

Note the copyOut, which saves the results in the places that L1 is
expecting them (see Note {safe foreign call convention]).
-}

910 911
lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
lowerSafeForeignCall dflags block
912 913
  | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
  = do
914 915
    -- Both 'id' and 'new_base' are KindNonPtr because they're
    -- RTS-only objects and are not subject to garbage collection
916 917
    id <- newTemp (bWord dflags)
    new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
918
    let (caller_save, caller_load) = callerSaveVolatileRegs dflags
919 920
    load_tso <- newTemp (gcWord dflags)
    load_stack <- newTemp (gcWord dflags)
921
    let suspend = saveThreadState dflags <*>
922
                  caller_save <*>
923
                  mkMiddle (callSuspendThread dflags id intrbl)
924 925 926 927 928 929
        midCall = mkUnsafeCall tgt res args
        resume  = mkMiddle (callResumeThread new_base id) <*>
                  -- Assign the result to BaseReg: we
                  -- might now have a different Capability!
                  mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
                  caller_load <*>
930
                  loadThreadState dflags load_tso load_stack
931

932
        (ret_args, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ)
933 934 935
                                           (map (CmmReg . CmmLocal) res)
                                           updfr (0, [])

936 937 938 939 940
        -- NB. after resumeThread returns, the top-of-stack probably contains
        -- the stack frame for succ, but it might not: if the current thread
        -- received an exception during the call, then the stack might be
        -- different.  Hence we continue by jumping to the top stack frame,
        -- not by jumping to succ.
941
        jump = CmmCall { cml_target    = CmmLoad (CmmReg spReg) (bWord dflags)
942 943
                       , cml_cont      = Just succ
                       , cml_args_regs = regs
944
                       , cml_args      = widthInBytes (wordWidth dflags)
945 946
                       , cml_ret_args  = ret_args
                       , cml_ret_off   = updfr }
947 948 949 950 951 952 953 954

    graph' <- lgraphOfAGraph $ suspend <*>
                               midCall <*>
                               resume  <*>
                               copyout <*>
                               mkLast jump

    case toBlockList graph' of
955 956
      [one] -> let (_, middle', last) = blockSplit one
               in return (blockJoin entry (middle `blockAppend` middle') last)
957 958
      _ -> panic "lowerSafeForeignCall0"

959 960
  -- Block doesn't end in a safe foreign call:
  | otherwise = return block
961 962 963 964 965 966 967 968


foreignLbl :: FastString -> CmmExpr
foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))

newTemp :: CmmType -> UniqSM LocalReg
newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)

969 970
callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
callSuspendThread dflags id intrbl =
971 972 973
  CmmUnsafeForeignCall
       (ForeignTarget (foreignLbl (fsLit "suspendThread"))
             (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
974
       [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)]
975 976 977 978 979 980 981 982

callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
callResumeThread new_base id =
  CmmUnsafeForeignCall
       (ForeignTarget (foreignLbl (fsLit "resumeThread"))
            (ForeignConvention CCallConv [AddrHint] [AddrHint]))
       [new_base] [CmmReg (CmmLocal id)]

Simon Marlow's avatar
Simon Marlow committed
983 984 985 986 987 988 989 990
-- -----------------------------------------------------------------------------

plusW :: ByteOff -> WordOff -> ByteOff
plusW b w = b + w * wORD_SIZE

dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
dropEmpty 0 ss           = Just ss
dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
991
dropEmpty _ _            = Nothing
Simon Marlow's avatar
Simon Marlow committed
992

993 994 995
isEmpty :: StackSlot -> Bool
isEmpty Empty = True
isEmpty _ = False
Simon Marlow's avatar
Simon Marlow committed
996 997

localRegBytes :: LocalReg -> ByteOff
998
localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
Simon Marlow's avatar
Simon Marlow committed
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016

localRegWords :: LocalReg -> WordOff
localRegWords = toWords . localRegBytes

toWords :: ByteOff -> WordOff
toWords x = x `quot` wORD_SIZE


insertReloads :: StackMap -> [CmmNode O O]
insertReloads stackmap =
   [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
                                     (localRegType r))
   | (r,sp) <- stackSlotRegs stackmap
   ]


stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
stackSlotRegs sm = eltsUFM (sm_regs sm)
1017