CmmContFlowOpt.hs 16.5 KB
Newer Older
1
{-# LANGUAGE GADTs #-}
Ben Gamari's avatar
Ben Gamari committed
2
{-# LANGUAGE BangPatterns #-}
3
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
4
module CmmContFlowOpt
5
    ( cmmCfgOpts
Simon Marlow's avatar
Simon Marlow committed
6
    , cmmCfgOptsProc
7
    , removeUnreachableBlocksProc
8
    , replaceLabels
9 10 11
    )
where

12 13
import GhcPrelude hiding (succ, unzip, zip)

14 15 16 17
import Hoopl.Block
import Hoopl.Collections
import Hoopl.Graph
import Hoopl.Label
18
import BlockId
19
import Cmm
20
import CmmUtils
21
import CmmSwitch (mapSwitchTargets)
22
import Maybes
23
import Panic
24
import Util
25

Ian Lynagh's avatar
Ian Lynagh committed
26
import Control.Monad
27

28 29 30 31 32 33 34 35 36 37

-- Note [What is shortcutting]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this Cmm code:
--
-- L1: ...
--     goto L2;
-- L2: goto L3;
-- L3: ...
38
--
39 40 41
-- Here L2 is an empty block and contains only an unconditional branch
-- to L3. In this situation any block that jumps to L2 can jump
-- directly to L3:
42
--
43 44 45 46 47 48 49 50 51 52 53 54 55
-- L1: ...
--     goto L3;
-- L2: goto L3;
-- L3: ...
--
-- In this situation we say that we shortcut L2 to L3. One of
-- consequences of shortcutting is that some blocks of code may become
-- unreachable (in the example above this is true for L2).


-- Note [Control-flow optimisations]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
56
-- This optimisation does three things:
57
--
Gabor Greif's avatar
Gabor Greif committed
58
--   - If a block finishes in an unconditional branch to another block
59 60 61 62 63 64 65 66 67
--     and that is the only jump to that block we concatenate the
--     destination block at the end of the current one.
--
--   - If a block finishes in a call whose continuation block is a
--     goto, then we can shortcut the destination, making the
--     continuation block the destination of the goto - but see Note
--     [Shortcut call returns].
--
--   - For any block that is not a call we try to shortcut the
68 69
--     destination(s). Additionally, if a block ends with a
--     conditional branch we try to invert the condition.
70 71 72 73 74 75 76 77 78 79 80 81 82
--
-- Blocks are processed using postorder DFS traversal. A side effect
-- of determining traversal order with a graph search is elimination
-- of any blocks that are unreachable.
--
-- Transformations are improved by working from the end of the graph
-- towards the beginning, because we may be able to perform many
-- shortcuts in one go.


-- Note [Shortcut call returns]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
83
-- We are going to maintain the "current" graph (LabelMap CmmBlock) as
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
-- we go, and also a mapping from BlockId to BlockId, representing
-- continuation labels that we have renamed.  This latter mapping is
-- important because we might shortcut a CmmCall continuation.  For
-- example:
--
--    Sp[0] = L
--    call g returns to L
--    L: goto M
--    M: ...
--
-- So when we shortcut the L block, we need to replace not only
-- the continuation of the call, but also references to L in the
-- code (e.g. the assignment Sp[0] = L):
--
--    Sp[0] = M
--    call g returns to M
--    M: ...
--
-- So we keep track of which labels we have renamed and apply the mapping
-- at the end with replaceLabels.


-- Note [Shortcut call returns and proc-points]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this code that you might get from a recursive
-- let-no-escape:
--
--       goto L1
--      L1:
--       if (Hp > HpLim) then L2 else L3
--      L2:
--       call stg_gc_noregs returns to L4
--      L4:
--       goto L1
--      L3:
--       ...
--       goto L1
--
-- Then the control-flow optimiser shortcuts L4.  But that turns L1
-- into the call-return proc point, and every iteration of the loop
-- has to shuffle variables to and from the stack.  So we must *not*
-- shortcut L4.
--
-- Moreover not shortcutting call returns is probably fine.  If L4 can
-- concat with its branch target then it will still do so.  And we
-- save some compile time because we don't have to traverse all the
-- code in replaceLabels.
--
-- However, we probably do want to do this if we are splitting proc
-- points, because L1 will be a proc-point anyway, so merging it with
-- L4 reduces the number of proc points.  Unfortunately recursive
-- let-no-escapes won't generate very good code with proc-point
-- splitting on - we should probably compile them to explicitly use
-- the native calling convention instead.
139

140 141
cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
cmmCfgOpts split g = fst (blockConcat split g)
142

143
cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
144
cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
145
    where (g', env) = blockConcat split g
146 147
          info' = info{ info_tbls = new_info_tbls }
          new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
Simon Marlow's avatar
Simon Marlow committed
148

149 150 151
          -- If we changed any labels, then we have to update the info tables
          -- too, except for the top-level info table because that might be
          -- referred to by other procs.
152 153
          upd_info (k,info)
             | Just k' <- mapLookup k env
154 155 156
             = (k', if k' == g_entry g'
                       then info
                       else info{ cit_lbl = infoTblLbl k' })
157 158
             | otherwise
             = (k,info)
159
cmmCfgOptsProc _ top = top
160

161

162
blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
163
blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
164
  = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
165
  where
166 167
     -- We might be able to shortcut the entry BlockId itself.
     -- Remember to update the shortcut_map, since we also have to
168 169
     -- update the info_tbls mapping now.
     (new_entry, shortcut_map')
170 171
       | Just entry_blk <- mapLookup entry_id new_blocks
       , Just dest      <- canShortcut entry_blk
172
       = (dest, mapInsert entry_id dest shortcut_map)
173
       | otherwise
174
       = (entry_id, shortcut_map)
175

176 177 178
     -- blocks is a list of blocks in DFS postorder, while blockmap is
     -- a map of blocks. We process each element from blocks and update
     -- blockmap accordingly
179
     blocks = postorderDfs g
180
     blockmap = foldr addBlock emptyBody blocks
181

182 183 184 185 186 187 188 189 190 191 192 193 194
     -- Accumulator contains three components:
     --  * map of blocks in a graph
     --  * map of shortcut labels. See Note [Shortcut call returns]
     --  * map containing number of predecessors for each block. We discard
     --    it after we process all blocks.
     (new_blocks, shortcut_map, _) =
           foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks

     -- Map of predecessors for initial graph. We increase number of
     -- predecessors for entry block by one to denote that it is
     -- target of a jump, even if no block in the current graph jumps
     -- to it.
     initialBackEdges = incPreds entry_id (predMap blocks)
195 196

     maybe_concat :: CmmBlock
197 198
                  -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
                  -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
Ben Gamari's avatar
Ben Gamari committed
199
     maybe_concat block (!blocks, !shortcut_map, !backEdges)
200 201
        -- If:
        --   (1) current block ends with unconditional branch to b' and
202 203
        --   (2) it has exactly one predecessor (namely, current block)
        --
204 205 206 207 208
        -- Then:
        --   (1) append b' block at the end of current block
        --   (2) remove b' from the map of blocks
        --   (3) remove information about b' from predecessors map
        --
209 210
        -- Since we know that the block has only one predecessor we call
        -- mapDelete directly instead of calling decPreds.
211 212 213
        --
        -- Note that we always maintain an up-to-date list of predecessors, so
        -- we can ignore the contents of shortcut_map
214
        | CmmBranch b' <- last
215
        , hasOnePredecessor b'
216
        , Just blk' <- mapLookup b' blocks
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
        = let bid' = entryLabel blk'
          in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
             , shortcut_map
             , mapDelete b' backEdges )

        -- If:
        --   (1) we are splitting proc points (see Note
        --       [Shortcut call returns and proc-points]) and
        --   (2) current block is a CmmCall or CmmForeignCall with
        --       continuation b' and
        --   (3) we can shortcut that continuation to dest
        -- Then:
        --   (1) we change continuation to point to b'
        --   (2) create mapping from b' to dest
        --   (3) increase number of predecessors of dest by 1
        --   (4) decrease number of predecessors of b' by 1
        --
Gabor Greif's avatar
Gabor Greif committed
234
        -- Later we will use replaceLabels to substitute all occurrences of b'
235 236
        -- with dest.
        | splitting_procs
237
        , Just b'   <- callContinuation_maybe last
238
        , Just blk' <- mapLookup b' blocks
Simon Marlow's avatar
Simon Marlow committed
239
        , Just dest <- canShortcut blk'
240 241 242 243 244 245 246 247 248 249 250
        = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
          , mapInsert b' dest shortcut_map
          , decPreds b' $ incPreds dest backEdges )

        -- If:
        --   (1) a block does not end with a call
        -- Then:
        --   (1) if it ends with a conditional attempt to invert the
        --       conditional
        --   (2) attempt to shortcut all destination blocks
        --   (3) if new successors of a block are different from the old ones
251 252 253 254
        --       update the of predecessors accordingly
        --
        -- A special case of this is a situation when a block ends with an
        -- unconditional jump to a block that can be shortcut.
255
        | Nothing <- callContinuation_maybe last
256 257 258 259 260 261 262 263 264
        = let oldSuccs = successors last
              newSuccs = successors swapcond_last
          in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
             , shortcut_map
             , if oldSuccs == newSuccs
               then backEdges
               else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )

        -- Otherwise don't do anything
265
        | otherwise
266
        = ( blocks, shortcut_map, backEdges )
267
        where
Simon Marlow's avatar
Simon Marlow committed
268 269
          (head, last) = blockSplitTail block
          bid = entryLabel block
270

271 272 273 274 275 276 277 278
          -- Changes continuation of a call to a specified label
          update_cont dest =
              case last of
                CmmCall{}        -> last { cml_cont = Just dest }
                CmmForeignCall{} -> last { succ = dest }
                _                -> panic "Can't shortcut continuation."

          -- Attempts to shortcut successors of last node
279
          shortcut_last = mapSuccessors shortcut last
280 281 282
            where
              shortcut l =
                 case mapLookup l blocks of
283
                   Just b | Just dest <- canShortcut b -> dest
284 285
                   _otherwise -> l

286
          -- See Note [Invert Cmm conditionals]
287
          swapcond_last
288
            | CmmCondBranch cond t f l <- shortcut_last
289 290
            , hasOnePredecessor t -- inverting will make t a fallthrough
            , likelyTrue l || (numPreds f > 1)
291
            , Just cond' <- maybeInvertCmmExpr cond
292
            = CmmCondBranch cond' f t (invertLikeliness l)
293 294 295 296

            | otherwise
            = shortcut_last

297 298
          likelyTrue (Just True)   = True
          likelyTrue _             = False
299

300 301
          invertLikeliness :: Maybe Bool -> Maybe Bool
          invertLikeliness         = fmap not
302

303 304 305 306 307
          -- Number of predecessors for a block
          numPreds bid = mapLookup bid backEdges `orElse` 0

          hasOnePredecessor b = numPreds b == 1

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
{-
  Note [Invert Cmm conditionals]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  The native code generator always produces jumps to the true branch.
  Falling through to the false branch is however faster. So we try to
  arrange for that to happen.
  This means we invert the condition if:
  * The likely path will become a fallthrough.
  * We can't guarantee a fallthrough for the false branch but for the
    true branch.

  In some cases it's faster to avoid inverting when the false branch is likely.
  However determining when that is the case is neither easy nor cheap so for
  now we always invert as this produces smaller binaries and code that is
  equally fast on average. (On an i7-6700K)

  TODO:
  There is also the edge case when both branches have multiple predecessors.
  In this case we could assume that we will end up with a jump for BOTH
  branches. In this case it might be best to put the likely path in the true
  branch especially if there are large numbers of predecessors as this saves
  us the jump thats not taken. However I haven't tested this and as of early
  2018 we almost never generate cmm where this would apply.
-}

333 334 335 336 337 338 339 340
-- Functions for incrementing and decrementing number of predecessors. If
-- decrementing would set the predecessor count to 0, we remove entry from the
-- map.
-- Invariant: if a block has no predecessors it should be dropped from the
-- graph because it is unreachable. maybe_concat is constructed to maintain
-- that invariant, but calling replaceLabels may introduce unreachable blocks.
-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
-- blocks.
341
incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
342 343 344 345 346 347 348 349 350 351 352 353
incPreds bid edges = mapInsertWith (+) bid 1 edges
decPreds bid edges = case mapLookup bid edges of
                       Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
                       Just _                 -> mapDelete bid edges
                       _                      -> edges


-- Checks if a block consists only of "goto dest". If it does than we return
-- "Just dest" label. See Note [What is shortcutting]
canShortcut :: CmmBlock -> Maybe BlockId
canShortcut block
    | (_, middle, CmmBranch dest) <- blockSplit block
Peter Wortmann's avatar
Peter Wortmann committed
354
    , all dont_care $ blockToList middle
355 356 357
    = Just dest
    | otherwise
    = Nothing
Peter Wortmann's avatar
Peter Wortmann committed
358 359 360
    where dont_care CmmComment{} = True
          dont_care CmmTick{}    = True
          dont_care _other       = False
361 362 363 364 365

-- Concatenates two blocks. First one is assumed to be open on exit, the second
-- is assumed to be closed on entry (i.e. it has a label attached to it, which
-- the splice function removes by calling snd on result of blockSplitHead).
splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
Peter Wortmann's avatar
Peter Wortmann committed
366 367 368 369
splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
  where (CmmEntry lbl sc0, code0) = blockSplitHead head
        (CmmEntry _   sc1, code1) = blockSplitHead rest
        entry = CmmEntry lbl (combineTickScopes sc0 sc1)
370 371 372

-- If node is a call with continuation call return Just label of that
-- continuation. Otherwise return Nothing.
373 374 375 376 377
callContinuation_maybe :: CmmNode O C -> Maybe BlockId
callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
callContinuation_maybe _ = Nothing

378

379
-- Map over the CmmGraph, replacing each label with its mapping in the
380 381
-- supplied LabelMap.
replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
382
replaceLabels env g
Simon Marlow's avatar
Simon Marlow committed
383 384
  | mapNull env = g
  | otherwise   = replace_eid $ mapGraphNodes1 txnode g
385
   where
386
     replace_eid g = g {g_entry = lookup (g_entry g)}
387 388 389
     lookup id = mapLookup id env `orElse` id

     txnode :: CmmNode e x -> CmmNode e x
390 391 392 393 394 395 396 397 398 399
     txnode (CmmBranch bid) = CmmBranch (lookup bid)
     txnode (CmmCondBranch p t f l) =
       mkCmmCondBranch (exp p) (lookup t) (lookup f) l
     txnode (CmmSwitch e ids) =
       CmmSwitch (exp e) (mapSwitchTargets lookup ids)
     txnode (CmmCall t k rg a res r) =
       CmmCall (exp t) (liftM lookup k) rg a res r
     txnode fc@CmmForeignCall{} =
       fc{ args = map exp (args fc), succ = lookup (succ fc) }
     txnode other = mapExpDeep exp other
400 401

     exp :: CmmExpr -> CmmExpr
402
     exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
Simon Marlow's avatar
Simon Marlow committed
403
     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
404 405
     exp e                                      = e

406 407 408
mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
mkCmmCondBranch p t f l =
  if t == f then CmmBranch t else CmmCondBranch p t f l
dias@eecs.harvard.edu's avatar
dias@eecs.harvard.edu committed
409

410
-- Build a map from a block to its set of predecessors.
411
predMap :: [CmmBlock] -> LabelMap Int
412
predMap blocks = foldr add_preds mapEmpty blocks
Simon Marlow's avatar
Simon Marlow committed
413 414 415
  where
    add_preds block env = foldr add env (successors block)
      where add lbl env = mapInsertWith (+) lbl 1 env
416 417

-- Removing unreachable blocks
418
removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
419
removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
420
   | used_blocks `lengthLessThan` mapSize (toBlockMap g)
421
   = CmmProc info' lbl live g'
422 423 424 425 426
   | otherwise
   = proc
   where
     g'    = ofBlockList (g_entry g) used_blocks
     info' = info { info_tbls = keep_used (info_tbls info) }
427
             -- Remove any info_tbls for unreachable
428

429
     keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
430
     keep_used bs = mapFoldlWithKey keep mapEmpty bs
431

432 433
     keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
     keep env l i | l `setMember` used_lbls = mapInsert l i env
434 435 436 437 438 439
                  | otherwise               = env

     used_blocks :: [CmmBlock]
     used_blocks = postorderDfs g

     used_lbls :: LabelSet
Ben Gamari's avatar
Ben Gamari committed
440
     used_lbls = setFromList $ map entryLabel used_blocks