CmmContFlowOpt.hs 15.8 KB
Newer Older
1
{-# LANGUAGE GADTs #-}
2
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3
module CmmContFlowOpt
4
    ( cmmCfgOpts
Simon Marlow's avatar
Simon Marlow committed
5
    , cmmCfgOptsProc
6
    , removeUnreachableBlocksProc
7
    , replaceLabels
8 9 10
    )
where

11
import Hoopl
12
import BlockId
13
import Cmm
14
import CmmUtils
15
import Maybes
16
import Panic
17

Ian Lynagh's avatar
Ian Lynagh committed
18
import Control.Monad
19
import Prelude hiding (succ, unzip, zip)
20

21 22 23 24 25 26 27 28 29 30

-- Note [What is shortcutting]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this Cmm code:
--
-- L1: ...
--     goto L2;
-- L2: goto L3;
-- L3: ...
31
--
32 33 34
-- Here L2 is an empty block and contains only an unconditional branch
-- to L3. In this situation any block that jumps to L2 can jump
-- directly to L3:
35
--
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
-- L1: ...
--     goto L3;
-- L2: goto L3;
-- L3: ...
--
-- In this situation we say that we shortcut L2 to L3. One of
-- consequences of shortcutting is that some blocks of code may become
-- unreachable (in the example above this is true for L2).


-- Note [Control-flow optimisations]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- This optimisation does four things:
--
--   - If a block finishes in an unconditonal branch to another block
--     and that is the only jump to that block we concatenate the
--     destination block at the end of the current one.
--
--   - If a block finishes in an unconditional branch, we may be able
--     to shortcut the destination block.
--
--   - If a block finishes in a call whose continuation block is a
--     goto, then we can shortcut the destination, making the
--     continuation block the destination of the goto - but see Note
--     [Shortcut call returns].
--
--   - For block finishing in conditional branch we try to invert the
--     condition and shortcut destination of alternatives.
--
--   - For any block that is not a call we try to shortcut the
--     destination(s).
--
-- Blocks are processed using postorder DFS traversal. A side effect
-- of determining traversal order with a graph search is elimination
-- of any blocks that are unreachable.
--
-- Transformations are improved by working from the end of the graph
-- towards the beginning, because we may be able to perform many
-- shortcuts in one go.


-- Note [Shortcut call returns]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- We are going to maintain the "current" graph (BlockEnv CmmBlock) as
-- we go, and also a mapping from BlockId to BlockId, representing
-- continuation labels that we have renamed.  This latter mapping is
-- important because we might shortcut a CmmCall continuation.  For
-- example:
--
--    Sp[0] = L
--    call g returns to L
--    L: goto M
--    M: ...
--
-- So when we shortcut the L block, we need to replace not only
-- the continuation of the call, but also references to L in the
-- code (e.g. the assignment Sp[0] = L):
--
--    Sp[0] = M
--    call g returns to M
--    M: ...
--
-- So we keep track of which labels we have renamed and apply the mapping
-- at the end with replaceLabels.


-- Note [Shortcut call returns and proc-points]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this code that you might get from a recursive
-- let-no-escape:
--
--       goto L1
--      L1:
--       if (Hp > HpLim) then L2 else L3
--      L2:
--       call stg_gc_noregs returns to L4
--      L4:
--       goto L1
--      L3:
--       ...
--       goto L1
--
-- Then the control-flow optimiser shortcuts L4.  But that turns L1
-- into the call-return proc point, and every iteration of the loop
-- has to shuffle variables to and from the stack.  So we must *not*
-- shortcut L4.
--
-- Moreover not shortcutting call returns is probably fine.  If L4 can
-- concat with its branch target then it will still do so.  And we
-- save some compile time because we don't have to traverse all the
-- code in replaceLabels.
--
-- However, we probably do want to do this if we are splitting proc
-- points, because L1 will be a proc-point anyway, so merging it with
-- L4 reduces the number of proc points.  Unfortunately recursive
-- let-no-escapes won't generate very good code with proc-point
-- splitting on - we should probably compile them to explicitly use
-- the native calling convention instead.
137

138 139
cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
cmmCfgOpts split g = fst (blockConcat split g)
140

141
cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
142
cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
143
    where (g', env) = blockConcat split g
144 145
          info' = info{ info_tbls = new_info_tbls }
          new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
Simon Marlow's avatar
Simon Marlow committed
146

147 148 149
          -- If we changed any labels, then we have to update the info tables
          -- too, except for the top-level info table because that might be
          -- referred to by other procs.
150 151
          upd_info (k,info)
             | Just k' <- mapLookup k env
152 153 154
             = (k', if k' == g_entry g'
                       then info
                       else info{ cit_lbl = infoTblLbl k' })
155 156
             | otherwise
             = (k,info)
157
cmmCfgOptsProc _ top = top
158

159

160 161
blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
162
  = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
163
  where
164 165
     -- We might be able to shortcut the entry BlockId itself.
     -- Remember to update the shortcut_map, since we also have to
166 167
     -- update the info_tbls mapping now.
     (new_entry, shortcut_map')
168 169
       | Just entry_blk <- mapLookup entry_id new_blocks
       , Just dest      <- canShortcut entry_blk
170
       = (dest, mapInsert entry_id dest shortcut_map)
171
       | otherwise
172
       = (entry_id, shortcut_map)
173

174 175 176
     -- blocks is a list of blocks in DFS postorder, while blockmap is
     -- a map of blocks. We process each element from blocks and update
     -- blockmap accordingly
177
     blocks = postorderDfs g
178
     blockmap = foldr addBlock emptyBody blocks
179

180 181 182 183 184 185 186 187 188 189 190 191 192
     -- Accumulator contains three components:
     --  * map of blocks in a graph
     --  * map of shortcut labels. See Note [Shortcut call returns]
     --  * map containing number of predecessors for each block. We discard
     --    it after we process all blocks.
     (new_blocks, shortcut_map, _) =
           foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks

     -- Map of predecessors for initial graph. We increase number of
     -- predecessors for entry block by one to denote that it is
     -- target of a jump, even if no block in the current graph jumps
     -- to it.
     initialBackEdges = incPreds entry_id (predMap blocks)
193 194

     maybe_concat :: CmmBlock
195 196 197 198 199
                  -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
                  -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
     maybe_concat block (blocks, shortcut_map, backEdges)
        -- If:
        --   (1) current block ends with unconditional branch to b' and
200 201
        --   (2) it has exactly one predecessor (namely, current block)
        --
202 203 204 205 206 207 208 209 210 211
        -- Then:
        --   (1) append b' block at the end of current block
        --   (2) remove b' from the map of blocks
        --   (3) remove information about b' from predecessors map
        --
        -- This guard must be first so that we always eliminate blocks that have
        -- only one predecessor. If we had a target block that is both
        -- shorcutable and has only one predecessor and attempted to shortcut it
        -- first we would make that block unreachable but would not remove it
        -- from the graph.
212 213 214
        --
        -- Note that we always maintain an up-to-date list of predecessors, so
        -- we can ignore the contents of shortcut_map
215
        | CmmBranch b' <- last
216
        , hasOnePredecessor b'
217
        , Just blk' <- mapLookup b' blocks
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
        = let bid' = entryLabel blk'
          in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
             , shortcut_map
             , mapDelete b' backEdges )

        -- If:
        --   (1) current block ends with unconditional branch to b' and
        --   (2) we can shortcut block b'
        -- Then:
        --   (1) concatenate b' at the end of current block, effectively
        --       changing target of uncondtional jump from b' to dest
        --   (2) increase number of predecessors of dest by 1
        --   (3) decrease number of predecessors of b' by 1
        | CmmBranch b' <- last
        , Just blk' <- mapLookup b' blocks
        , Just dest <- canShortcut blk'
        = ( mapInsert bid (splice head blk') blocks, shortcut_map,
            decPreds b' $ incPreds dest backEdges )

        -- If:
        --   (1) we are splitting proc points (see Note
        --       [Shortcut call returns and proc-points]) and
        --   (2) current block is a CmmCall or CmmForeignCall with
        --       continuation b' and
        --   (3) we can shortcut that continuation to dest
        -- Then:
        --   (1) we change continuation to point to b'
        --   (2) create mapping from b' to dest
        --   (3) increase number of predecessors of dest by 1
        --   (4) decrease number of predecessors of b' by 1
        --
Gabor Greif's avatar
Gabor Greif committed
249
        -- Later we will use replaceLabels to substitute all occurrences of b'
250 251
        -- with dest.
        | splitting_procs
252
        , Just b'   <- callContinuation_maybe last
253
        , Just blk' <- mapLookup b' blocks
Simon Marlow's avatar
Simon Marlow committed
254
        , Just dest <- canShortcut blk'
255 256 257 258 259 260 261 262 263 264 265 266
        = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
          , mapInsert b' dest shortcut_map
          , decPreds b' $ incPreds dest backEdges )

        -- If:
        --   (1) a block does not end with a call
        -- Then:
        --   (1) if it ends with a conditional attempt to invert the
        --       conditional
        --   (2) attempt to shortcut all destination blocks
        --   (3) if new successors of a block are different from the old ones
        --       we update the of predecessors accordingly
267
        | Nothing <- callContinuation_maybe last
268 269 270 271 272 273 274 275 276
        = let oldSuccs = successors last
              newSuccs = successors swapcond_last
          in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
             , shortcut_map
             , if oldSuccs == newSuccs
               then backEdges
               else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )

        -- Otherwise don't do anything
277
        | otherwise
278
        = ( blocks, shortcut_map, backEdges )
279
        where
Simon Marlow's avatar
Simon Marlow committed
280 281
          (head, last) = blockSplitTail block
          bid = entryLabel block
282

283 284 285 286 287 288 289 290
          -- Changes continuation of a call to a specified label
          update_cont dest =
              case last of
                CmmCall{}        -> last { cml_cont = Just dest }
                CmmForeignCall{} -> last { succ = dest }
                _                -> panic "Can't shortcut continuation."

          -- Attempts to shortcut successors of last node
291
          shortcut_last = mapSuccessors shortcut last
292 293 294
            where
              shortcut l =
                 case mapLookup l blocks of
295
                   Just b | Just dest <- canShortcut b -> dest
296 297
                   _otherwise -> l

298 299 300 301 302
          -- For a conditional, we invert the conditional if that would make it
          -- more likely that the branch-not-taken case becomes a fallthrough.
          -- This helps the native codegen a little bit, and probably has no
          -- effect on LLVM.  It's convenient to do it here, where we have the
          -- information about predecessors.
303 304 305
          swapcond_last
            | CmmCondBranch cond t f <- shortcut_last
            , numPreds f > 1
306
            , hasOnePredecessor t
307 308 309 310 311 312
            , Just cond' <- maybeInvertCmmExpr cond
            = CmmCondBranch cond' f t

            | otherwise
            = shortcut_last

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
          -- Number of predecessors for a block
          numPreds bid = mapLookup bid backEdges `orElse` 0

          hasOnePredecessor b = numPreds b == 1

-- Functions for incrementing and decrementing number of predecessors. If
-- decrementing would set the predecessor count to 0, we remove entry from the
-- map.
-- Invariant: if a block has no predecessors it should be dropped from the
-- graph because it is unreachable. maybe_concat is constructed to maintain
-- that invariant, but calling replaceLabels may introduce unreachable blocks.
-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
-- blocks.
incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int
incPreds bid edges = mapInsertWith (+) bid 1 edges
decPreds bid edges = case mapLookup bid edges of
                       Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
                       Just _                 -> mapDelete bid edges
                       _                      -> edges


-- Checks if a block consists only of "goto dest". If it does than we return
-- "Just dest" label. See Note [What is shortcutting]
canShortcut :: CmmBlock -> Maybe BlockId
canShortcut block
    | (_, middle, CmmBranch dest) <- blockSplit block
    , isEmptyBlock middle
    = Just dest
    | otherwise
    = Nothing


-- Concatenates two blocks. First one is assumed to be open on exit, the second
-- is assumed to be closed on entry (i.e. it has a label attached to it, which
-- the splice function removes by calling snd on result of blockSplitHead).
splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
splice head rest = head `blockAppend` snd (blockSplitHead rest)


-- If node is a call with continuation call return Just label of that
-- continuation. Otherwise return Nothing.
354 355 356 357 358
callContinuation_maybe :: CmmNode O C -> Maybe BlockId
callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
callContinuation_maybe _ = Nothing

359

360 361
-- Map over the CmmGraph, replacing each label with its mapping in the
-- supplied BlockEnv.
362
replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
363
replaceLabels env g
Simon Marlow's avatar
Simon Marlow committed
364 365
  | mapNull env = g
  | otherwise   = replace_eid $ mapGraphNodes1 txnode g
366
   where
367
     replace_eid g = g {g_entry = lookup (g_entry g)}
368 369 370 371
     lookup id = mapLookup id env `orElse` id

     txnode :: CmmNode e x -> CmmNode e x
     txnode (CmmBranch bid)         = CmmBranch (lookup bid)
372
     txnode (CmmCondBranch p t f)   = mkCmmCondBranch (exp p) (lookup t) (lookup f)
373
     txnode (CmmSwitch e arms)      = CmmSwitch (exp e) (map (liftM lookup) arms)
374
     txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
375 376 377 378 379
     txnode fc@CmmForeignCall{}     = fc{ args = map exp (args fc)
                                        , succ = lookup (succ fc) }
     txnode other                   = mapExpDeep exp other

     exp :: CmmExpr -> CmmExpr
380
     exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
Simon Marlow's avatar
Simon Marlow committed
381
     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
382 383
     exp e                                      = e

Simon Marlow's avatar
Simon Marlow committed
384
mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
385
mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
dias@eecs.harvard.edu's avatar
dias@eecs.harvard.edu committed
386

387
-- Build a map from a block to its set of predecessors.
Simon Marlow's avatar
Simon Marlow committed
388
predMap :: [CmmBlock] -> BlockEnv Int
389
predMap blocks = foldr add_preds mapEmpty blocks
Simon Marlow's avatar
Simon Marlow committed
390 391 392
  where
    add_preds block env = foldr add env (successors block)
      where add lbl env = mapInsertWith (+) lbl 1 env
393 394

-- Removing unreachable blocks
395
removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
   | length used_blocks < mapSize (toBlockMap g) 
   = CmmProc info' lbl live g' 
   | otherwise
   = proc
   where
     g'    = ofBlockList (g_entry g) used_blocks
     info' = info { info_tbls = keep_used (info_tbls info) }
             -- Remove any info_tbls for unreachable 

     keep_used :: BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
     keep_used bs = mapFoldWithKey keep emptyBlockMap bs

     keep :: Label -> CmmInfoTable -> BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
     keep l i env | l `setMember` used_lbls = mapInsert l i env
                  | otherwise               = env

     used_blocks :: [CmmBlock]
     used_blocks = postorderDfs g

     used_lbls :: LabelSet
     used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks