CmmLayoutStack.hs 36.6 KB
Newer Older
Simon Marlow's avatar
Simon Marlow committed
1
2
{-# LANGUAGE RecordWildCards, GADTs #-}
module CmmLayoutStack (
3
       cmmLayoutStack, setInfoTableStackMap
Simon Marlow's avatar
Simon Marlow committed
4
5
  ) where

6
7
8
import StgCmmUtils      ( callerSaveVolatileRegs ) -- XXX
import StgCmmForeign    ( saveThreadState, loadThreadState ) -- XXX

Simon Marlow's avatar
Simon Marlow committed
9
10
import Cmm
import BlockId
11
import CLabel
Simon Marlow's avatar
Simon Marlow committed
12
import CmmUtils
13
14
15
import MkGraph
import Module
import ForeignCall
Simon Marlow's avatar
Simon Marlow committed
16
17
18
import CmmLive
import CmmProcPoint
import SMRep
19
import Hoopl
Simon Marlow's avatar
Simon Marlow committed
20
21
22
23
24
25
import Constants
import UniqSupply
import Maybes
import UniqFM
import Util

26
import DynFlags
Simon Marlow's avatar
Simon Marlow committed
27
28
29
30
31
import FastString
import Outputable
import qualified Data.Set as Set
import Control.Monad.Fix
import Data.Array as Array
32
import Data.Bits
33
import Data.List (nub)
Simon Marlow's avatar
Simon Marlow committed
34
import Control.Monad (liftM)
Simon Marlow's avatar
Simon Marlow committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

#include "HsVersions.h"


data StackSlot = Occupied | Empty
     -- Occupied: a return address or part of an update frame

instance Outputable StackSlot where
  ppr Occupied = ptext (sLit "XXX")
  ppr Empty    = ptext (sLit "---")

-- All stack locations are expressed as positive byte offsets from the
-- "base", which is defined to be the address above the return address
-- on the stack on entry to this CmmProc.
--
-- Lower addresses have higher StackLocs.
--
type StackLoc = ByteOff

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
{-
 A StackMap describes the stack at any given point.  At a continuation
 it has a particular layout, like this:

         |             | <- base
         |-------------|
         |     ret0    | <- base + 8
         |-------------|
         .  upd frame  . <- base + sm_ret_off
         |-------------|
         |             |
         .    vars     .
         . (live/dead) .
         |             | <- base + sm_sp - sm_args
         |-------------|
         |    ret1     |
         .  ret vals   . <- base + sm_sp    (<--- Sp points here)
         |-------------|

Why do we include the final return address (ret0) in our stack map?  I
have absolutely no idea, but it seems to be done that way consistently
in the rest of the code generator, so I played along here. --SDM

Note that we will be constructing an info table for the continuation
(ret1), which needs to describe the stack down to, but not including,
the update frame (or ret0, if there is no update frame).
-}

Simon Marlow's avatar
Simon Marlow committed
82
83
84
85
86
87
88
89
data StackMap = StackMap
 {  sm_sp   :: StackLoc
       -- ^ the offset of Sp relative to the base on entry
       -- to this block.
 ,  sm_args :: ByteOff
       -- ^ the number of bytes of arguments in the area for this block
       -- Defn: the offset of young(L) relative to the base is given by
       -- (sm_sp - sm_args) of the StackMap for block L.
90
91
92
 ,  sm_ret_off :: ByteOff
       -- ^ Number of words of stack that we do not describe with an info
       -- table, because it contains an update frame.
Simon Marlow's avatar
Simon Marlow committed
93
94
95
96
97
98
99
100
101
102
103
104
 ,  sm_regs :: UniqFM (LocalReg,StackLoc)
       -- ^ regs on the stack
 }

instance Outputable StackMap where
  ppr StackMap{..} =
     text "Sp = " <> int sm_sp $$
     text "sm_args = " <> int sm_args $$
     text "sm_ret_off = " <> int sm_ret_off $$
     text "sm_regs = " <> ppr (eltsUFM sm_regs)


105
cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
106
               -> UniqSM (CmmGraph, BlockEnv StackMap)
107
cmmLayoutStack dflags procpoints entry_args
108
               graph0@(CmmGraph { g_entry = entry })
Simon Marlow's avatar
Simon Marlow committed
109
  = do
110
    -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
111
112
113
114
115
116
117

    -- We need liveness info.  We could do removeDeadAssignments at
    -- the same time, but it buys nothing over doing cmmSink later,
    -- and costs a lot more than just cmmLiveness.
    -- (graph, liveness) <- removeDeadAssignments graph0
    let (graph, liveness) = (graph0, cmmLiveness graph0)

118
    -- pprTrace "liveness" (ppr liveness) $ return ()
Simon Marlow's avatar
Simon Marlow committed
119
120
    let blocks = postorderDfs graph

121
    (final_stackmaps, _final_high_sp, new_blocks) <-
Simon Marlow's avatar
Simon Marlow committed
122
123
124
125
          mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
            layout procpoints liveness entry entry_args
                   rec_stackmaps rec_high_sp blocks

126
    new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks
127

128
129
    -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
    return (ofBlockList entry new_blocks', final_stackmaps)
Simon Marlow's avatar
Simon Marlow committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171



layout :: BlockSet                      -- proc points
       -> BlockEnv CmmLive              -- liveness
       -> BlockId                       -- entry
       -> ByteOff                       -- stack args on entry

       -> BlockEnv StackMap             -- [final] stack maps
       -> ByteOff                       -- [final] Sp high water mark

       -> [CmmBlock]                    -- [in] blocks

       -> UniqSM
          ( BlockEnv StackMap           -- [out] stack maps
          , ByteOff                     -- [out] Sp high water mark
          , [CmmBlock]                  -- [out] new blocks
          )

layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
  = go blocks init_stackmap entry_args []
  where
    (updfr, cont_info)  = collectContInfo blocks

    init_stackmap = mapSingleton entry StackMap{ sm_sp   = entry_args
                                               , sm_args = entry_args
                                               , sm_ret_off = updfr
                                               , sm_regs = emptyUFM
                                               }

    go [] acc_stackmaps acc_hwm acc_blocks
      = return (acc_stackmaps, acc_hwm, acc_blocks)

    go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
      = do
       let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
    
       let stack0@StackMap { sm_sp = sp0 }
               = mapFindWithDefault
                     (pprPanic "no stack map for" (ppr entry_lbl))
                     entry_lbl acc_stackmaps
    
172
       -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
173
    
Simon Marlow's avatar
Simon Marlow committed
174
175
       -- (a) Update the stack map to include the effects of
       --     assignments in this block
Simon Marlow's avatar
Simon Marlow committed
176
177
       let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
    
Simon Marlow's avatar
Simon Marlow committed
178
179
       -- (b) Insert assignments to reload all the live variables if this
       --     block is a proc point
Simon Marlow's avatar
Simon Marlow committed
180
181
182
183
       let middle1 = if entry_lbl `setMember` procpoints
                        then foldr blockCons middle0 (insertReloads stack0)
                        else middle0
    
Simon Marlow's avatar
Simon Marlow committed
184
185
186
187
188
       -- (c) Look at the last node and if we are making a call or
       --     jumping to a proc point, we must save the live
       --     variables, adjust Sp, and construct the StackMaps for
       --     each of the successor blocks.  See handleLastNode for
       --     details.
189
       (middle2, sp_off, last1, fixup_blocks, out)
Simon Marlow's avatar
Simon Marlow committed
190
           <- handleLastNode procpoints liveness cont_info
Simon Marlow's avatar
Simon Marlow committed
191
                             acc_stackmaps stack1 middle0 last0
Simon Marlow's avatar
Simon Marlow committed
192
    
193
       -- pprTrace "layout(out)" (ppr out) $ return ()
194

Simon Marlow's avatar
Simon Marlow committed
195
196
197
       -- (d) Manifest Sp: run over the nodes in the block and replace
       --     CmmStackSlot with CmmLoad from Sp with a concrete offset.
       --
198
199
200
201
202
203
       -- our block:
       --    middle1          -- the original middle nodes
       --    middle2          -- live variable saves from handleLastNode
       --    Sp = Sp + sp_off -- Sp adjustment goes here
       --    last1            -- the last node
       --
Simon Marlow's avatar
Simon Marlow committed
204
       let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
205

Simon Marlow's avatar
Simon Marlow committed
206
207
208
209
210
211
           sp_high = final_hwm - entry_args
              -- The stack check value is adjusted by the Sp offset on
              -- entry to the proc, which is entry_args.  We are
              -- assuming that we only do a stack check at the
              -- beginning of a proc, and we don't modify Sp before the
              -- check.
212

Simon Marlow's avatar
Simon Marlow committed
213
           final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0
214
                              middle_pre sp_off last1 fixup_blocks
215

216
           acc_stackmaps' = mapUnion acc_stackmaps out
217

218
219
220
221
222
223
224
225
226
227
228
           -- If this block jumps to the GC, then we do not take its
           -- stack usage into account for the high-water mark.
           -- Otherwise, if the only stack usage is in the stack-check
           -- failure block itself, we will do a redundant stack
           -- check.  The stack has a buffer designed to accommodate
           -- the largest amount of stack needed for calling the GC.
           --
           this_sp_hwm | isGcJump last0 = 0
                       | otherwise      = sp0 - sp_off

           hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
229

230
       go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
231
232


233
234
235
236
237
238
239
240
-- -----------------------------------------------------------------------------

-- Not foolproof, but GCFun is the culprit we most want to catch
isGcJump :: CmmNode O C -> Bool
isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
  = l == GCFun || l == GCEnter1
isGcJump _something_else = False

Simon Marlow's avatar
Simon Marlow committed
241
-- -----------------------------------------------------------------------------
242

Simon Marlow's avatar
Simon Marlow committed
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
-- This doesn't seem right somehow.  We need to find out whether this
-- proc will push some update frame material at some point, so that we
-- can avoid using that area of the stack for spilling.  The
-- updfr_space field of the CmmProc *should* tell us, but it doesn't
-- (I think maybe it gets filled in later when we do proc-point
-- splitting).
--
-- So we'll just take the max of all the cml_ret_offs.  This could be
-- unnecessarily pessimistic, but probably not in the code we
-- generate.

collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
collectContInfo blocks
  = (maximum ret_offs, mapFromList (catMaybes mb_argss))
 where
  (mb_argss, ret_offs) = mapAndUnzip get_cont blocks

  get_cont b =
     case lastNode b of
        CmmCall { cml_cont = Just l, .. }
           -> (Just (l, cml_ret_args), cml_ret_off)
        CmmForeignCall { .. }
           -> (Just (succ, 0), updfr) -- ??
        _other -> (Nothing, 0)


Simon Marlow's avatar
Simon Marlow committed
269
270
-- -----------------------------------------------------------------------------
-- Updating the StackMap from middle nodes
Simon Marlow's avatar
Simon Marlow committed
271

Simon Marlow's avatar
Simon Marlow committed
272
-- Look for loads from stack slots, and update the StackMap.  This is
273
-- purely for optimisation reasons, so that we can avoid saving a
Simon Marlow's avatar
Simon Marlow committed
274
275
276
277
278
279
280
-- variable back to a different stack slot if it is already on the
-- stack.
--
-- This happens a lot: for example when function arguments are passed
-- on the stack and need to be immediately saved across a call, we
-- want to just leave them where they are on the stack.
--
Simon Marlow's avatar
Simon Marlow committed
281
282
283
procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
procMiddle stackmaps node sm
  = case node of
284
     CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
Simon Marlow's avatar
Simon Marlow committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
       -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
        where loc = getStackLoc area off stackmaps
     CmmAssign (CmmLocal r) _other
       -> sm { sm_regs = delFromUFM (sm_regs sm) r }
     _other
       -> sm

getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
getStackLoc Old       n _         = n
getStackLoc (Young l) n stackmaps =
  case mapLookup l stackmaps of
    Nothing -> pprPanic "getStackLoc" (ppr l)
    Just sm -> sm_sp sm - sm_args sm + n

Simon Marlow's avatar
Simon Marlow committed
299

Simon Marlow's avatar
Simon Marlow committed
300
301
302
-- -----------------------------------------------------------------------------
-- Handling stack allocation for a last node

303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
-- We take a single last node and turn it into:
--
--    C1 (some statements)
--    Sp = Sp + N
--    C2 (some more statements)
--    call f()          -- the actual last node
--
-- plus possibly some more blocks (we may have to add some fixup code
-- between the last node and the continuation).
--
-- C1: is the code for saving the variables across this last node onto
-- the stack, if the continuation is a call or jumps to a proc point.
--
-- C2: if the last node is a safe foreign call, we have to inject some
-- extra code that goes *after* the Sp adjustment.

Simon Marlow's avatar
Simon Marlow committed
319
320
321
handleLastNode
   :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
   -> BlockEnv StackMap -> StackMap
Simon Marlow's avatar
Simon Marlow committed
322
   -> Block CmmNode O O
Simon Marlow's avatar
Simon Marlow committed
323
324
   -> CmmNode O C
   -> UniqSM
325
326
      ( [CmmNode O O]      -- nodes to go *before* the Sp adjustment
      , ByteOff            -- amount to adjust Sp
Simon Marlow's avatar
Simon Marlow committed
327
328
      , CmmNode O C        -- new last node
      , [CmmBlock]         -- new blocks
329
      , BlockEnv StackMap  -- stackmaps for the continuations
Simon Marlow's avatar
Simon Marlow committed
330
331
332
      )

handleLastNode procpoints liveness cont_info stackmaps
Simon Marlow's avatar
Simon Marlow committed
333
               stack0@StackMap { sm_sp = sp0 } middle last
Simon Marlow's avatar
Simon Marlow committed
334
335
336
337
338
339
 = case last of
    --  At each return / tail call,
    --  adjust Sp to point to the last argument pushed, which
    --  is cml_args, after popping any other junk from the stack.
    CmmCall{ cml_cont = Nothing, .. } -> do
      let sp_off = sp0 - cml_args
340
      return ([], sp_off, last, [], mapEmpty)
Simon Marlow's avatar
Simon Marlow committed
341
342

    --  At each CmmCall with a continuation:
343
    CmmCall{ cml_cont = Just cont_lbl, .. } ->
Simon Marlow's avatar
Simon Marlow committed
344
       return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
345

346
    CmmForeignCall{ succ = cont_lbl, .. } -> do
Simon Marlow's avatar
Simon Marlow committed
347
       return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0)
348
            -- one word each for args and results: the return address
349

350
351
352
    CmmBranch{..}     ->  handleBranches
    CmmCondBranch{..} ->  handleBranches
    CmmSwitch{..}     ->  handleBranches
353
354

  where
Simon Marlow's avatar
Simon Marlow committed
355
356
357
358
359
360
361
362
363
364
365
366
367
     -- Calls and ForeignCalls are handled the same way:
     lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
              -> ( [CmmNode O O]
                 , ByteOff
                 , CmmNode O C
                 , [CmmBlock]
                 , BlockEnv StackMap
                 )
     lastCall lbl cml_args cml_ret_args cml_ret_off
      =  ( assignments
         , spOffsetForCall sp0 cont_stack cml_args
         , last
         , [] -- no new blocks
368
         , mapSingleton lbl cont_stack )
Simon Marlow's avatar
Simon Marlow committed
369
      where
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
         (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off


     prepareStack lbl cml_ret_args cml_ret_off
       | Just cont_stack <- mapLookup lbl stackmaps
             -- If we have already seen this continuation before, then
             -- we just have to make the stack look the same:
       = (fixupStack stack0 cont_stack, cont_stack)
             -- Otherwise, we have to allocate the stack frame
       | otherwise
       = (save_assignments, new_cont_stack)
       where
        (new_cont_stack, save_assignments)
           = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0


386
     -- For other last nodes (branches), if any of the targets is a
Simon Marlow's avatar
Simon Marlow committed
387
388
389
     -- proc point, we have to set up the stack to match what the proc
     -- point is expecting.
     --
390
     handleBranches :: UniqSM ( [CmmNode O O]
Simon Marlow's avatar
Simon Marlow committed
391
392
                                , ByteOff
                                , CmmNode O C
393
394
                                , [CmmBlock]
                                , BlockEnv StackMap )
Simon Marlow's avatar
Simon Marlow committed
395

396
     handleBranches
397
398
         -- Note [diamond proc point]
       | Just l <- futureContinuation middle
Simon Marlow's avatar
Simon Marlow committed
399
       , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
400
401
402
403
404
405
406
407
408
409
       = do
         let cont_args = mapFindWithDefault 0 l cont_info
             (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
             out = mapFromList [ (l', cont_stack)
                               | l' <- successors last ]
         return ( assigs
                , spOffsetForCall sp0 cont_stack wORD_SIZE
                , last
                , []
                , out)
Simon Marlow's avatar
Simon Marlow committed
410

Simon Marlow's avatar
Simon Marlow committed
411
        | otherwise = do
412
          pps <- mapM handleBranch (successors last)
Simon Marlow's avatar
Simon Marlow committed
413
414
          let lbl_map :: LabelMap Label
              lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
415
              fix_lbl l = mapFindWithDefault l l lbl_map
Simon Marlow's avatar
Simon Marlow committed
416
417
418
          return ( []
                 , 0
                 , mapSuccessors fix_lbl last
419
420
                 , concat [ blk | (_,_,_,blk) <- pps ]
                 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
Simon Marlow's avatar
Simon Marlow committed
421

422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
     -- For each successor of this block
     handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
     handleBranch l
        --   (a) if the successor already has a stackmap, we need to
        --       shuffle the current stack to make it look the same.
        --       We have to insert a new block to make this happen.
        | Just stack2 <- mapLookup l stackmaps
        = do
             let assigs = fixupStack stack0 stack2
             (tmp_lbl, block) <- makeFixupBlock sp0 l stack2 assigs
             return (l, tmp_lbl, stack2, block)

        --   (b) if the successor is a proc point, save everything
        --       on the stack.
        | l `setMember` procpoints
        = do
             let cont_args = mapFindWithDefault 0 l cont_info
                 (stack2, assigs) =
440
441
                      --pprTrace "first visit to proc point"
                      --             (ppr l <+> ppr stack1) $
442
                      setupStackFrame l liveness (sm_ret_off stack0)
Simon Marlow's avatar
Simon Marlow committed
443
                                                       cont_args stack0
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
             --
             (tmp_lbl, block) <- makeFixupBlock sp0 l stack2 assigs
             return (l, tmp_lbl, stack2, block)

        --   (c) otherwise, the current StackMap is the StackMap for
        --       the continuation.  But we must remember to remove any
        --       variables from the StackMap that are *not* live at
        --       the destination, because this StackMap might be used
        --       by fixupStack if this is a join point.
        | otherwise = return (l, l, stack1, [])
        where live = mapFindWithDefault (panic "handleBranch") l liveness
              stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
              is_live (r,_) = r `elemRegSet` live


makeFixupBlock :: ByteOff -> Label -> StackMap -> [CmmNode O O] -> UniqSM (Label, [CmmBlock])
makeFixupBlock sp0 l stack assigs
  | null assigs && sp0 == sm_sp stack = return (l, [])
  | otherwise = do
    tmp_lbl <- liftM mkBlockId $ getUniqueM
    let sp_off = sp0 - sm_sp stack
        block = blockJoin (CmmEntry tmp_lbl)
                          (maybeAddSpAdj sp_off (blockFromList assigs))
                          (CmmBranch l)
    return (tmp_lbl, [block])
Simon Marlow's avatar
Simon Marlow committed
469
470
471
472
473
474
475
476
477
478


-- Sp is currently pointing to current_sp,
-- we want it to point to
--    (sm_sp cont_stack - sm_args cont_stack + args)
-- so the difference is
--    sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
spOffsetForCall current_sp cont_stack args
  = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
Simon Marlow's avatar
Simon Marlow committed
479
480
481
482
483
484
485


-- | create a sequence of assignments to establish the new StackMap,
-- given the old StackMap.
fixupStack :: StackMap -> StackMap -> [CmmNode O O]
fixupStack old_stack new_stack = concatMap move new_locs
 where
Simon Marlow's avatar
Simon Marlow committed
486
     old_map  = sm_regs old_stack
Simon Marlow's avatar
Simon Marlow committed
487
488
489
     new_locs = stackSlotRegs new_stack

     move (r,n)
Simon Marlow's avatar
Simon Marlow committed
490
       | Just (_,m) <- lookupUFM old_map r, n == m = []
Simon Marlow's avatar
Simon Marlow committed
491
492
493
       | otherwise = [CmmStore (CmmStackSlot Old n)
                               (CmmReg (CmmLocal r))]

Simon Marlow's avatar
Simon Marlow committed
494
495
496
497
498
499
500
501
502
503
504


setupStackFrame
             :: BlockId                 -- label of continuation
             -> BlockEnv CmmLive        -- liveness
             -> ByteOff      -- updfr
             -> ByteOff      -- bytes of return values on stack
             -> StackMap     -- current StackMap
             -> (StackMap, [CmmNode O O])

setupStackFrame lbl liveness updfr_off ret_args stack0
Simon Marlow's avatar
Simon Marlow committed
505
  = (cont_stack, assignments)
Simon Marlow's avatar
Simon Marlow committed
506
507
508
509
510
511
512
513
514
515
516
517
518
519
  where
      -- get the set of LocalRegs live in the continuation
      live = mapFindWithDefault Set.empty lbl liveness

      -- the stack from the base to updfr_off is off-limits.
      -- our new stack frame contains:
      --   * saved live variables
      --   * the return address [young(C) + 8]
      --   * the args for the call,
      --     which are replaced by the return values at the return
      --     point.

      -- everything up to updfr_off is off-limits
      -- stack1 contains updfr_off, plus everything we need to save
Simon Marlow's avatar
Simon Marlow committed
520
      (stack1, assignments) = allocate updfr_off live stack0
Simon Marlow's avatar
Simon Marlow committed
521
522
523
524
525
526
527
528
529

      -- And the Sp at the continuation is:
      --   sm_sp stack1 + ret_args
      cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
                         , sm_args = ret_args
                         , sm_ret_off = updfr_off
                         }


530
531
532
533
534
535
-- -----------------------------------------------------------------------------
-- Note [diamond proc point]
--
-- This special case looks for the pattern we get from a typical
-- tagged case expression:
--
Simon Marlow's avatar
Simon Marlow committed
536
537
--    Sp[young(L1)] = L1
--    if (R1 & 7) != 0 goto L1 else goto L2
538
--  L2:
Simon Marlow's avatar
Simon Marlow committed
539
--    call [R1] returns to L1
540
--  L1: live: {y}
Simon Marlow's avatar
Simon Marlow committed
541
--    x = R1
542
543
544
--
-- If we let the generic case handle this, we get
--
Simon Marlow's avatar
Simon Marlow committed
545
546
--    Sp[-16] = L1
--    if (R1 & 7) != 0 goto L1a else goto L2
547
--  L2:
Simon Marlow's avatar
Simon Marlow committed
548
549
550
--    Sp[-8] = y
--    Sp = Sp - 16
--    call [R1] returns to L1
551
--  L1a:
Simon Marlow's avatar
Simon Marlow committed
552
553
554
--    Sp[-8] = y
--    Sp = Sp - 16
--    goto L1
555
--  L1:
Simon Marlow's avatar
Simon Marlow committed
556
--    x = R1
557
558
--
-- The code for saving the live vars is duplicated in each branch, and
Simon Marlow's avatar
Simon Marlow committed
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
-- furthermore there is an extra jump in the fast path (assuming L1 is
-- a proc point, which it probably is if there is a heap check).
--
-- So to fix this we want to set up the stack frame before the
-- conditional jump.  How do we know when to do this, and when it is
-- safe?  The basic idea is, when we see the assignment
-- 
--   Sp[young(L)] = L
-- 
-- we know that
--   * we are definitely heading for L
--   * there can be no more reads from another stack area, because young(L)
--     overlaps with it.
--
-- We don't necessarily know that everything live at L is live now
-- (some might be assigned between here and the jump to L).  So we
-- simplify and only do the optimisation when we see
576
577
578
579
580
--
--   (1) a block containing an assignment of a return address L
--   (2) ending in a branch where one (and only) continuation goes to L,
--       and no other continuations go to proc points.
--
Simon Marlow's avatar
Simon Marlow committed
581
582
-- then we allocate the stack frame for L at the end of the block,
-- before the branch.
583
584
585
586
587
588
--
-- We could generalise (2), but that would make it a bit more
-- complicated to handle, and this currently catches the common case.

futureContinuation :: Block CmmNode O O -> Maybe BlockId
futureContinuation middle = foldBlockNodesB f middle Nothing
589
590
   where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
         f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
591
592
593
594
595
596
597
598
599
600
601
602
603
604
               = Just l
         f _ r = r

-- -----------------------------------------------------------------------------
-- Saving live registers

-- | Given a set of live registers and a StackMap, save all the registers
-- on the stack and return the new StackMap and the assignments to do
-- the saving.
--
allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
allocate ret_off live stackmap@StackMap{ sm_sp = sp0
                                       , sm_regs = regs0 }
 =
605
  -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691

   -- we only have to save regs that are not already in a slot
   let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
       regs1   = filterUFM (\(r,_) -> elemRegSet r live) regs0
   in

   -- make a map of the stack
   let stack = reverse $ Array.elems $
               accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
                 ret_words ++ live_words
            where ret_words =
                   [ (x, Occupied)
                   | x <- [ 1 .. toWords ret_off] ]
                  live_words =
                   [ (toWords x, Occupied)
                   | (r,off) <- eltsUFM regs1,
                     let w = localRegBytes r,
                     x <- [ off, off-wORD_SIZE .. off - w + 1] ]
   in

   -- Pass over the stack: find slots to save all the new live variables,
   -- choosing the oldest slots first (hence a foldr).
   let
       save slot ([], stack, n, assigs, regs) -- no more regs to save
          = ([], slot:stack, n `plusW` 1, assigs, regs)
       save slot (to_save, stack, n, assigs, regs)
          = case slot of
               Occupied ->  (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
               Empty
                 | Just (stack', r, to_save') <-
                       select_save to_save (slot:stack)
                 -> let assig = CmmStore (CmmStackSlot Old n')
                                         (CmmReg (CmmLocal r))
                        n' = n `plusW` 1
                   in
                        (to_save', stack', n', assig : assigs, (r,(r,n')):regs)

                 | otherwise
                 -> (to_save, slot:stack, n `plusW` 1, assigs, regs)

       -- we should do better here: right now we'll fit the smallest first,
       -- but it would make more sense to fit the biggest first.
       select_save :: [LocalReg] -> [StackSlot]
                   -> Maybe ([StackSlot], LocalReg, [LocalReg])
       select_save regs stack = go regs []
         where go []     _no_fit = Nothing
               go (r:rs) no_fit
                 | Just rest <- dropEmpty words stack
                 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
                 | otherwise
                 = go rs (r:no_fit)
                 where words = localRegWords r

       -- fill in empty slots as much as possible
       (still_to_save, save_stack, n, save_assigs, save_regs)
          = foldr save (to_save, [], 0, [], []) stack

       -- push any remaining live vars on the stack
       (push_sp, push_assigs, push_regs)
          = foldr push (n, [], []) still_to_save
          where
              push r (n, assigs, regs)
                = (n', assig : assigs, (r,(r,n')) : regs)
                where
                  n' = n + localRegBytes r
                  assig = CmmStore (CmmStackSlot Old n')
                                   (CmmReg (CmmLocal r))

       trim_sp
          | not (null push_regs) = push_sp
          | otherwise
          = n `plusW` (- length (takeWhile isEmpty save_stack))

       final_regs = regs1 `addListToUFM` push_regs
                          `addListToUFM` save_regs

   in
  -- XXX should be an assert
   if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else

   if (trim_sp .&. (wORD_SIZE - 1)) /= 0  then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else

   ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
   , push_assigs ++ save_assigs )


Simon Marlow's avatar
Simon Marlow committed
692
-- -----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
693
-- Manifesting Sp
Simon Marlow's avatar
Simon Marlow committed
694

Simon Marlow's avatar
Simon Marlow committed
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp.  The
-- block looks like this:
--
--    middle_pre       -- the middle nodes
--    Sp = Sp + sp_off -- Sp adjustment goes here
--    last             -- the last node
--
-- And we have some extra blocks too (that don't contain Sp adjustments)
--
-- The adjustment for middle_pre will be different from that for
-- middle_post, because the Sp adjustment intervenes.
--
manifestSp
   :: BlockEnv StackMap  -- StackMaps for other blocks
   -> StackMap           -- StackMap for this block
   -> ByteOff            -- Sp on entry to the block
   -> ByteOff            -- SpHigh
   -> CmmNode C O        -- first node
   -> [CmmNode O O]      -- middle
   -> ByteOff            -- sp_off
   -> CmmNode O C        -- last node
   -> [CmmBlock]         -- new blocks
   -> [CmmBlock]         -- final blocks with Sp manifest

manifestSp stackmaps stack0 sp0 sp_high
           first middle_pre sp_off last fixup_blocks
  = final_block : fixup_blocks'
  where
    area_off = getAreaOff stackmaps

    adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
    adj_pre_sp  = mapExpDeep (areaToSp sp0            sp_high area_off)
    adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
Simon Marlow's avatar
Simon Marlow committed
728

Simon Marlow's avatar
Simon Marlow committed
729
730
731
732
733
734
735
736
737
738
    final_middle = maybeAddSpAdj sp_off $
                   blockFromList $
                   map adj_pre_sp $
                   elimStackStores stack0 stackmaps area_off $
                   middle_pre

    final_last    = optStackCheck (adj_post_sp last)

    final_block   = blockJoin first final_middle final_last

739
    fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
Simon Marlow's avatar
Simon Marlow committed
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756


getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
getAreaOff _ Old = 0
getAreaOff stackmaps (Young l) =
  case mapLookup l stackmaps of
    Just sm -> sm_sp sm - sm_args sm
    Nothing -> pprPanic "getAreaOff" (ppr l)


maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
maybeAddSpAdj 0      block = block
maybeAddSpAdj sp_off block
   = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)


{-
Simon Marlow's avatar
Simon Marlow committed
757
758
759
760
761
762
Sp(L) is the Sp offset on entry to block L relative to the base of the
OLD area.

SpArgs(L) is the size of the young area for L, i.e. the number of
arguments.

763
 - in block L, each reference to [old + N] turns into
Simon Marlow's avatar
Simon Marlow committed
764
765
   [Sp + Sp(L) - N]

766
 - in block L, each reference to [young(L') + N] turns into
Simon Marlow's avatar
Simon Marlow committed
767
768
769
770
771
772
   [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]

 - be careful with the last node of each block: Sp has already been adjusted
   to be Sp + Sp(L) - Sp(L')
-}

773
774
775
areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) =
  cmmOffset (CmmReg spReg) (sp_old - area_off area - n)
Simon Marlow's avatar
Simon Marlow committed
776
areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
777
778
779
780
781
areaToSp _ _ _ (CmmMachOp (MO_U_Lt _)  -- Note [null stack check]
                      [CmmMachOp (MO_Sub _)
                              [ CmmReg (CmmGlobal Sp)
                              , CmmLit (CmmInt 0 _)],
                       CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
Simon Marlow's avatar
Simon Marlow committed
782
783
areaToSp _ _ _ other = other

Simon Marlow's avatar
Simon Marlow committed
784
-- -----------------------------------------------------------------------------
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
-- Note [null stack check]
--
-- If the high-water Sp is zero, then we end up with
--
--   if (Sp - 0 < SpLim) then .. else ..
--
-- and possibly some dead code for the failure case.  Optimising this
-- away depends on knowing that SpLim <= Sp, so it is really the job
-- of the stack layout algorithm, hence we do it now.  This is also
-- convenient because control-flow optimisation later will drop the
-- dead code.

optStackCheck :: CmmNode O C -> CmmNode O C
optStackCheck n = -- Note [null stack check]
 case n of
   CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
   other -> other

Simon Marlow's avatar
Simon Marlow committed
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832

-- -----------------------------------------------------------------------------

-- | Eliminate stores of the form
--
--    Sp[area+n] = r
--
-- when we know that r is already in the same slot as Sp[area+n].  We
-- could do this in a later optimisation pass, but that would involve
-- a separate analysis and we already have the information to hand
-- here.  It helps clean up some extra stack stores in common cases.
--
-- Note that we may have to modify the StackMap as we walk through the
-- code using procMiddle, since an assignment to a variable in the
-- StackMap will invalidate its mapping there.
--
elimStackStores :: StackMap
                -> BlockEnv StackMap
                -> (Area -> ByteOff)
                -> [CmmNode O O]
                -> [CmmNode O O]
elimStackStores stackmap stackmaps area_off nodes
  = go stackmap nodes
  where
    go _stackmap [] = []
    go stackmap (n:ns)
     = case n of
         CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
            | Just (_,off) <- lookupUFM (sm_regs stackmap) r
            , area_off area + m == off
833
834
            -> -- pprTrace "eliminated a node!" (ppr r) $
               go stackmap ns
Simon Marlow's avatar
Simon Marlow committed
835
836
837
838
         _otherwise
            -> n : go (procMiddle stackmaps n stackmap) ns


839
840
841
842
843
-- -----------------------------------------------------------------------------
-- Update info tables to include stack liveness


setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
844
845
setInfoTableStackMap stackmaps (CmmProc top_info@TopInfo{..} l g)
  = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l g
846
  where
847
848
849
    fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
       info_tbl { cit_rep = StackRep (get_liveness lbl) }
    fix_info _ other = other
850
851
852
853

    get_liveness :: BlockId -> Liveness
    get_liveness lbl
      = case mapLookup lbl stackmaps of
854
          Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
855
856
          Just sm -> stackMapToLiveness sm

857
858
859
setInfoTableStackMap _ d = d


860
861
862
863
864
865
866
867
868
869
stackMapToLiveness :: StackMap -> Liveness
stackMapToLiveness StackMap{..} =
   reverse $ Array.elems $
        accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
                                     toWords (sm_sp - sm_args)) live_words
   where
     live_words =  [ (toWords off, False)
                   | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]


870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
-- -----------------------------------------------------------------------------
-- Lowering safe foreign calls

{-
Note [lower safe foreign calls]

We start with

   Sp[young(L1)] = L1
 ,-----------------------
 | r1 = foo(x,y,z) returns to L1
 '-----------------------
 L1:
   R1 = r1 -- copyIn, inserted by mkSafeCall
   ...

the stack layout algorithm will arrange to save and reload everything
live across the call.  Our job now is to expand the call so we get

   Sp[young(L1)] = L1
 ,-----------------------
 | SAVE_THREAD_STATE()
 | token = suspendThread(BaseReg, interruptible)
 | r = foo(x,y,z)
 | BaseReg = resumeThread(token)
 | LOAD_THREAD_STATE()
 | R1 = r  -- copyOut
897
 | jump Sp[0]
898
899
900
901
902
903
904
905
906
 '-----------------------
 L1:
   r = R1 -- copyIn, inserted by mkSafeCall
   ...

Note the copyOut, which saves the results in the places that L1 is
expecting them (see Note {safe foreign call convention]).
-}

907
908
lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
lowerSafeForeignCall dflags block
909
910
  | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
  = do
911
912
913
914
    -- Both 'id' and 'new_base' are KindNonPtr because they're
    -- RTS-only objects and are not subject to garbage collection
    id <- newTemp bWord
    new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
915
    let (caller_save, caller_load) = callerSaveVolatileRegs dflags
916
917
    load_tso <- newTemp gcWord
    load_stack <- newTemp gcWord
918
    let suspend = saveThreadState dflags <*>
919
920
921
922
923
924
925
926
                  caller_save <*>
                  mkMiddle (callSuspendThread id intrbl)
        midCall = mkUnsafeCall tgt res args
        resume  = mkMiddle (callResumeThread new_base id) <*>
                  -- Assign the result to BaseReg: we
                  -- might now have a different Capability!
                  mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
                  caller_load <*>
927
                  loadThreadState dflags load_tso load_stack
928

929
        (ret_args, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ)
930
931
932
                                           (map (CmmReg . CmmLocal) res)
                                           updfr (0, [])

933
934
935
936
937
938
        -- NB. after resumeThread returns, the top-of-stack probably contains
        -- the stack frame for succ, but it might not: if the current thread
        -- received an exception during the call, then the stack might be
        -- different.  Hence we continue by jumping to the top stack frame,
        -- not by jumping to succ.
        jump = CmmCall { cml_target    = CmmLoad (CmmReg spReg) bWord
939
940
941
942
943
                       , cml_cont      = Just succ
                       , cml_args_regs = regs
                       , cml_args      = widthInBytes wordWidth
                       , cml_ret_args  = ret_args
                       , cml_ret_off   = updfr }
944
945
946
947
948
949
950
951

    graph' <- lgraphOfAGraph $ suspend <*>
                               midCall <*>
                               resume  <*>
                               copyout <*>
                               mkLast jump

    case toBlockList graph' of
952
953
      [one] -> let (_, middle', last) = blockSplit one
               in return (blockJoin entry (middle `blockAppend` middle') last)
954
955
      _ -> panic "lowerSafeForeignCall0"

956
957
  -- Block doesn't end in a safe foreign call:
  | otherwise = return block
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979


foreignLbl :: FastString -> CmmExpr
foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))

newTemp :: CmmType -> UniqSM LocalReg
newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)

callSuspendThread :: LocalReg -> Bool -> CmmNode O O
callSuspendThread id intrbl =
  CmmUnsafeForeignCall
       (ForeignTarget (foreignLbl (fsLit "suspendThread"))
             (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
       [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]

callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
callResumeThread new_base id =
  CmmUnsafeForeignCall
       (ForeignTarget (foreignLbl (fsLit "resumeThread"))
            (ForeignConvention CCallConv [AddrHint] [AddrHint]))
       [new_base] [CmmReg (CmmLocal id)]

Simon Marlow's avatar
Simon Marlow committed
980
981
982
983
984
985
986
987
-- -----------------------------------------------------------------------------

plusW :: ByteOff -> WordOff -> ByteOff
plusW b w = b + w * wORD_SIZE

dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
dropEmpty 0 ss           = Just ss
dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
988
dropEmpty _ _            = Nothing
Simon Marlow's avatar
Simon Marlow committed
989

990
991
992
isEmpty :: StackSlot -> Bool
isEmpty Empty = True
isEmpty _ = False
Simon Marlow's avatar
Simon Marlow committed
993
994

localRegBytes :: LocalReg -> ByteOff
995
localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
Simon Marlow's avatar
Simon Marlow committed
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013

localRegWords :: LocalReg -> WordOff
localRegWords = toWords . localRegBytes

toWords :: ByteOff -> WordOff
toWords x = x `quot` wORD_SIZE


insertReloads :: StackMap -> [CmmNode O O]
insertReloads stackmap =
   [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
                                     (localRegType r))
   | (r,sp) <- stackSlotRegs stackmap
   ]


stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
stackSlotRegs sm = eltsUFM (sm_regs sm)
1014