CmmSwitch.hs 18.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
{-# LANGUAGE GADTs #-}
module CmmSwitch (
     SwitchTargets,
     mkSwitchTargets,
     switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
     mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
     switchTargetsToList, eqSwitchTargetWith,

     SwitchPlan(..),
     targetSupportsSwitch,
     createSwitchPlan,
  ) where

14 15
import GhcPrelude

16 17
import Outputable
import DynFlags
18
import Hoopl.Label (Label)
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34

import Data.Maybe
import Data.List (groupBy)
import Data.Function (on)
import qualified Data.Map as M

-- Note [Cmm Switches, the general plan]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Compiling a high-level switch statement, as it comes out of a STG case
-- expression, for example, allows for a surprising amount of design decisions.
-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
-- well as from the actual code generation.
--
-- The overall plan is:
--  * The Stg → Cmm transformation creates a single `SwitchTargets` in
35
--    emitSwitch and emitCmmLitSwitch in GHC.StgToCmm/Utils.hs.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
--    At this stage, they are unsuitable for code generation.
--  * A dedicated Cmm transformation (CmmImplementSwitchPlans) replaces these
--    switch statements with code that is suitable for code generation, i.e.
--    a nice balanced tree of decisions with dense jump tables in the leafs.
--    The actual planning of this tree is performed in pure code in createSwitchPlan
--    in this module. See Note [createSwitchPlan].
--  * The actual code generation will not do any further processing and
--    implement each CmmSwitch with a jump tables.
--
-- When compiling to LLVM or C, CmmImplementSwitchPlans leaves the switch
-- statements alone, as we can turn a SwitchTargets value into a nice
-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
--
-- See Note [CmmSwitch vs. CmmImplementSwitchPlans] why the two module are
-- separated.

-----------------------------------------------------------------------------
53
-- Note [Magic Constants in CmmSwitch]
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
--
-- There are a lot of heuristics here that depend on magic values where it is
-- hard to determine the "best" value (for whatever that means). These are the
-- magic values:

-- | Number of consecutive default values allowed in a jump table. If there are
-- more of them, the jump tables are split.
--
-- Currently 7, as it costs 7 words of additional code when a jump table is
-- split (at least on x64, determined experimentally).
maxJumpTableHole :: Integer
maxJumpTableHole = 7

-- | Minimum size of a jump table. If the number is smaller, the switch is
-- implemented using conditionals.
-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
minJumpTableSize :: Int
minJumpTableSize = 5

-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
minJumpTableOffset :: Integer
minJumpTableOffset = 2


-----------------------------------------------------------------------------
-- Switch Targets

-- Note [SwitchTargets]:
-- ~~~~~~~~~~~~~~~~~~~~~
--
-- The branches of a switch are stored in a SwitchTargets, which consists of an
-- (optional) default jump target, and a map from values to jump targets.
--
-- If the default jump target is absent, the behaviour of the switch outside the
-- values of the map is undefined.
--
-- We use an Integer for the keys the map so that it can be used in switches on
-- unsigned as well as signed integers.
--
93 94
-- The map may be empty (we prune out-of-range branches here, so it could be us
-- emptying it).
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
--
-- Before code generation, the table needs to be brought into a form where all
-- entries are non-negative, so that it can be compiled into a jump table.
-- See switchTargetsToTable.


-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
-- value, and knows whether the value is signed, the possible range, an
-- optional default value and a map from values to jump labels.
data SwitchTargets =
    SwitchTargets
        Bool                       -- Signed values
        (Integer, Integer)         -- Range
        (Maybe Label)              -- Default value
        (M.Map Integer Label)      -- The branches
    deriving (Show, Eq)

Gabor Greif's avatar
Gabor Greif committed
112
-- | The smart constructor mkSwitchTargets normalises the map a bit:
113 114 115 116 117 118 119 120 121 122 123 124
--  * No entries outside the range
--  * No entries equal to the default
--  * No default if all elements have explicit values
mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
mkSwitchTargets signed range@(lo,hi) mbdef ids
    = SwitchTargets signed range mbdef' ids'
  where
    ids' = dropDefault $ restrict ids
    mbdef' | defaultNeeded = mbdef
           | otherwise     = Nothing

    -- Drop entries outside the range, if there is a range
125
    restrict = restrictMap (lo,hi)
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156

    -- Drop entries that equal the default, if there is a default
    dropDefault | Just l <- mbdef = M.filter (/= l)
                | otherwise       = id

    -- Check if the default is still needed
    defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1


-- | Changes all labels mentioned in the SwitchTargets value
mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
mapSwitchTargets f (SwitchTargets signed range mbdef branches)
    = SwitchTargets signed range (fmap f mbdef) (fmap f branches)

-- | Returns the list of non-default branches of the SwitchTargets value
switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches

-- | Return the default label of the SwitchTargets value
switchTargetsDefault :: SwitchTargets -> Maybe Label
switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef

-- | Return the range of the SwitchTargets value
switchTargetsRange :: SwitchTargets -> (Integer, Integer)
switchTargetsRange (SwitchTargets _ range _ _) = range

-- | Return whether this is used for a signed value
switchTargetsSigned :: SwitchTargets -> Bool
switchTargetsSigned (SwitchTargets signed _ _ _) = signed

-- | switchTargetsToTable creates a dense jump table, usable for code generation.
157 158 159 160 161 162 163
--
-- Also returns an offset to add to the value; the list is 0-based on the
-- result of that addition.
--
-- The conversion from Integer to Int is a bit of a wart, as the actual
-- scrutinee might be an unsigned word, but it just works, due to wrap-around
-- arithmetic (as verified by the CmmSwitchTest test case).
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
    = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
  where
    labelFor i = case M.lookup i branches of Just l -> Just l
                                             Nothing -> mbdef
    start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
          | otherwise                           = lo

-- Note [Jump Table Offset]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Usually, the code for a jump table starting at x will first subtract x from
-- the value, to avoid a large amount of empty entries. But if x is very small,
-- the extra entries are no worse than the subtraction in terms of code size, and
-- not having to do the subtraction is quicker.
--
-- I.e. instead of
--     _u20N:
--             leaq -1(%r14),%rax
--             jmp *_n20R(,%rax,8)
--     _n20R:
--             .quad   _c20p
--             .quad   _c20q
-- do
--     _u20N:
--             jmp *_n20Q(,%r14,8)
--
--     _n20Q:
--             .quad   0
--             .quad   _c20p
--             .quad   _c20q
--             .quad   _c20r

Brian Wignall's avatar
Brian Wignall committed
198
-- | The list of all labels occurring in the SwitchTargets value.
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
switchTargetsToList :: SwitchTargets -> [Label]
switchTargetsToList (SwitchTargets _ _ mbdef branches)
    = maybeToList mbdef ++ M.elems branches

-- | Groups cases with equal targets, suitable for pretty-printing to a
-- c-like switch statement with fall-through semantics.
switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
  where
    groups = map (\xs -> (map fst xs, snd (head xs))) $
             groupBy ((==) `on` snd) $
             M.toList branches

-- | Custom equality helper, needed for "CmmCommonBlockElim"
eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
    signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
  where
    goMB Nothing Nothing = True
    goMB (Just l1) (Just l2) = l1 `eq` l2
    goMB _ _ = False
    goList [] [] = True
    goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
    goList _ _ = False

-----------------------------------------------------------------------------
-- Code generation for Switches


228
-- | A SwitchPlan abstractly describes how a Switch statement ought to be
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
-- implemented. See Note [createSwitchPlan]
data SwitchPlan
    = Unconditionally Label
    | IfEqual Integer Label SwitchPlan
    | IfLT Bool Integer SwitchPlan SwitchPlan
    | JumpTable SwitchTargets
  deriving Show
--
-- Note [createSwitchPlan]
-- ~~~~~~~~~~~~~~~~~~~~~~~
--
-- A SwitchPlan describes how a Switch statement is to be broken down into
-- smaller pieces suitable for code generation.
--
-- createSwitchPlan creates such a switch plan, in these steps:
244 245
--  1. It splits the switch statement at segments of non-default values that
--     are too large. See splitAtHoles and Note [Magic Constants in CmmSwitch]
246 247
--  2. Too small jump tables should be avoided, so we break up smaller pieces
--     in breakTooSmall.
248
--  3. We fill in the segments between those pieces with a jump to the default
249
--     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
250
--  4. We find and replace two less-than branches by a single equal-to-test in
251 252 253
--     findSingleValues
--  5. The thus collected pieces are assembled to a balanced binary tree.

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
{-
  Note [Two alts + default]
  ~~~~~~~~~~~~~~~~~~~~~~~~~

Discussion and a bit more info at #14644

When dealing with a switch of the form:
switch(e) {
  case 1: goto l1;
  case 3000: goto l2;
  default: goto ldef;
}

If we treat it as a sparse jump table we would generate:

if (e > 3000) //Check if value is outside of the jump table.
    goto ldef;
else {
    if (e < 3000) { //Compare to upper value
        if(e != 1) //Compare to remaining value
            goto ldef;
          else
            goto l2;
    }
    else
        goto l1;
}

Instead we special case this to :

if (e==1) goto l1;
else if (e==3000) goto l2;
else goto l3;

This means we have:
* Less comparisons for: 1,<3000
* Unchanged for 3000
* One more for >3000

This improves code in a few ways:
* One comparison less means smaller code which helps with cache.
* It exchanges a taken jump for two jumps no taken in the >range case.
  Jumps not taken are cheaper (See Agner guides) making this about as fast.
* For all other cases the first range check is removed making it faster.

The end result is that the change is not measurably slower for the case
>3000 and faster for the other cases.

This makes running this kind of match in an inner loop cheaper by 10-20%
depending on the data.
In nofib this improves wheel-sieve1 by 4-9% depending on problem
size.

We could also add a second conditional jump after the comparison to
keep the range check like this:
    cmp 3000, rArgument
    jg <default>
    je <branch 2>
While this is fairly cheap it made no big difference for the >3000 case
and slowed down all other cases making it not worthwhile.
-}

316 317 318 319 320 321 322 323 324 325 326

-- | Does the target support switch out of the box? Then leave this to the
-- target!
targetSupportsSwitch :: HscTarget -> Bool
targetSupportsSwitch HscC = True
targetSupportsSwitch HscLlvm = True
targetSupportsSwitch _ = False

-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
-- down into smaller pieces suitable for code generation.
createSwitchPlan :: SwitchTargets -> SwitchPlan
327
-- Lets do the common case of a singleton map quickly and efficiently (#10677)
328 329 330
createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
    | [(x, l)] <- M.toList m
    = IfEqual x l (Unconditionally defLabel)
331
-- And another common case, matching "booleans"
332
createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
333 334 335
    | [(x1, l1), (_x2,l2)] <- M.toAscList m
    --Checking If |range| = 2 is enough if we have two unique literals
    , hi - lo == 1
336
    = IfEqual x1 l1 (Unconditionally l2)
337 338 339 340
-- See Note [Two alts + default]
createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
    | [(x1, l1), (x2,l2)] <- M.toAscList m
    = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
341
createSwitchPlan (SwitchTargets signed range mbdef m) =
342 343 344 345
    -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
    plan
  where
    pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
346
    flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
347 348 349 350 351 352 353
    plan = buildTree signed $ flatPlan


---
--- Step 1: Splitting at large holes
---
splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
354
splitAtHoles _        m | M.null m = []
355 356 357 358 359 360 361 362 363 364 365 366
splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
  where
    holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
    nonHoles = reassocTuples lo holes hi

    (lo,_) = M.findMin m
    (hi,_) = M.findMax m

---
--- Step 2: Avoid small jump tables
---
-- We do not want jump tables below a certain size. This breaks them up
367
-- (into singleton maps, for now).
368 369 370 371 372 373 374 375 376
breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
breakTooSmall m
  | M.size m > minJumpTableSize = [m]
  | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]

---
---  Step 3: Fill in the blanks
---

377 378 379 380 381 382
-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
-- two entries, dividing the range.
-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
-- the expression is < n, and plan2 otherwise.

type FlatSwitchPlan = SeparatedList Integer SwitchPlan
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424

mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan

-- If we have no default (i.e. undefined where there is no entry), we can
-- branch at the minimum of each map
mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
mkFlatSwitchPlan signed  Nothing _ (m:ms)
  = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])

-- If we have a default, we have to interleave segments that jump
-- to the default between the maps
mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
  where
    go (lo,hi) []
        | lo > hi = []
        | otherwise = [(lo, Unconditionally l)]
    go (lo,hi) (m:ms)
        | lo < min
        = (lo, Unconditionally l) : go (min,hi) (m:ms)
        | lo == min
        = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
        | otherwise
        = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
      where
        min = fst (M.findMin m)
        max = fst (M.findMax m)


mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
mkLeafPlan signed mbdef m
    | [(_,l)] <- M.toList m -- singleton map
    = Unconditionally l
    | otherwise
    = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
  where
    min = fst (M.findMin m)
    max = fst (M.findMax m)

---
---  Step 4: Reduce the number of branches using ==
---

425
-- A sequence of three unconditional jumps, with the outer two pointing to the
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
-- same value and the bounds off by exactly one can be improved
findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
  | l == l3 && i + 1 == i'
  = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
findSingleValues (p, (i,p'):xs)
  = (p,i) `consSL` findSingleValues (p', xs)
findSingleValues (p, [])
  = (p, [])

---
---  Step 5: Actually build the tree
---

-- Build a balanced tree from a separated list
buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
buildTree _ (p,[]) = p
buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
  where
    (sl1, m, sl2) = divideSL sl



--
-- Utility data type: Non-empty lists with extra markers in between each
-- element:
--

type SeparatedList b a = (a, [(b,a)])

consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
consSL (a, b) (a', xs) = (a, (b,a'):xs)

divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
divideSL (p,xs) = ((p, xs1), m, (p', xs2))
  where
    (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs

--
-- Other Utilities
--

469
restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
restrictMap (lo,hi) m = mid
  where (_,   mid_hi) = M.split (lo-1) m
        (mid, _) =      M.split (hi+1) mid_hi

-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
reassocTuples initial [] last
    = [(initial,last)]
reassocTuples initial ((a,b):tuples) last
    = (initial,a) : reassocTuples b tuples last

-- Note [CmmSwitch vs. CmmImplementSwitchPlans]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- I (Joachim) separated the two somewhat closely related modules
--
--  - CmmSwitch, which provides the CmmSwitchTargets type and contains the strategy
--    for implementing a Cmm switch (createSwitchPlan), and
487
--  - CmmImplementSwitchPlans, which contains the actual Cmm graph modification,
488 489 490 491 492 493 494 495 496 497 498 499 500
--
-- for these reasons:
--
--  * CmmSwitch is very low in the dependency tree, i.e. does not depend on any
--    GHC specific modules at all (with the exception of Output and Hoople
--    (Literal)). CmmImplementSwitchPlans is the Cmm transformation and hence very
--    high in the dependency tree.
--  * CmmSwitch provides the CmmSwitchTargets data type, which is abstract, but
--    used in CmmNodes.
--  * Because CmmSwitch is low in the dependency tree, the separation allows
--    for more parallelism when building GHC.
--  * The interaction between the modules is very explicit and easy to
--    understand, due to the small and simple interface.