Commit 535a88e1 authored by Andreas Klebinger's avatar Andreas Klebinger Committed by Marge Bot

Add loop level analysis to the NCG backend.

For backends maintaining the CFG during codegen
we can now find loops and their nesting level.

This is based on the Cmm CFG and dominator analysis.

As a result we can estimate edge frequencies a lot better
for methods, resulting in far better code layout.

Speedup on nofib: ~1.5%
Increase in compile times: ~1.9%

To make this feasible this commit adds:
* Dominator analysis based on the Lengauer-Tarjan Algorithm.
* An algorithm estimating global edge frequences from branch
probabilities - In CFG.hs

A few static branch prediction heuristics:

* Expect to take the backedge in loops.
* Expect to take the branch NOT exiting a loop.
* Expect integer vs constant comparisons to be false.

We also treat heap/stack checks special for branch prediction
to avoid them being treated as loops.
parent 9c11f817
Pipeline #11446 passed with stages
in 496 minutes and 26 seconds
...@@ -6,8 +6,6 @@ ...@@ -6,8 +6,6 @@
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# OPTIONS_GHC -fprof-auto-top #-}
-- --
-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones, -- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones,
-- and Norman Ramsey -- and Norman Ramsey
...@@ -108,6 +106,7 @@ analyzeCmm ...@@ -108,6 +106,7 @@ analyzeCmm
-> FactBase f -> FactBase f
-> FactBase f -> FactBase f
analyzeCmm dir lattice transfer cmmGraph initFact = analyzeCmm dir lattice transfer cmmGraph initFact =
{-# SCC analyzeCmm #-}
let entry = g_entry cmmGraph let entry = g_entry cmmGraph
hooplGraph = g_graph cmmGraph hooplGraph = g_graph cmmGraph
blockMap = blockMap =
...@@ -169,7 +168,7 @@ rewriteCmm ...@@ -169,7 +168,7 @@ rewriteCmm
-> CmmGraph -> CmmGraph
-> FactBase f -> FactBase f
-> UniqSM (CmmGraph, FactBase f) -> UniqSM (CmmGraph, FactBase f)
rewriteCmm dir lattice rwFun cmmGraph initFact = do rewriteCmm dir lattice rwFun cmmGraph initFact = {-# SCC rewriteCmm #-} do
let entry = g_entry cmmGraph let entry = g_entry cmmGraph
hooplGraph = g_graph cmmGraph hooplGraph = g_graph cmmGraph
blockMap1 = blockMap1 =
......
...@@ -593,6 +593,7 @@ Library ...@@ -593,6 +593,7 @@ Library
Instruction Instruction
BlockLayout BlockLayout
CFG CFG
Dominators
Format Format
Reg Reg
RegClass RegClass
......
...@@ -562,7 +562,7 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count ...@@ -562,7 +562,7 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
Opt_D_dump_asm_native "Native code" Opt_D_dump_asm_native "Native code"
(vcat $ map (pprNatCmmDecl ncgImpl) native) (vcat $ map (pprNatCmmDecl ncgImpl) native)
dumpIfSet_dyn dflags when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags
Opt_D_dump_cfg_weights "CFG Weights" Opt_D_dump_cfg_weights "CFG Weights"
(pprEdgeWeights nativeCfgWeights) (pprEdgeWeights nativeCfgWeights)
...@@ -691,7 +691,7 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count ...@@ -691,7 +691,7 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
{-# SCC "generateJumpTables" #-} {-# SCC "generateJumpTables" #-}
generateJumpTables ncgImpl alloced generateJumpTables ncgImpl alloced
dumpIfSet_dyn dflags when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags
Opt_D_dump_cfg_weights "CFG Update information" Opt_D_dump_cfg_weights "CFG Update information"
( text "stack:" <+> ppr stack_updt_blks $$ ( text "stack:" <+> ppr stack_updt_blks $$
text "linearAlloc:" <+> ppr cfgRegAllocUpdates ) text "linearAlloc:" <+> ppr cfgRegAllocUpdates )
...@@ -705,8 +705,9 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count ...@@ -705,8 +705,9 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
optimizedCFG = optimizedCFG =
optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG
maybe (return ()) maybe (return ()) (\cfg->
(dumpIfSet_dyn dflags Opt_D_dump_cfg_weights "CFG Final Weights" . pprEdgeWeights) dumpIfSet_dyn dflags Opt_D_dump_cfg_weights "CFG Final Weights"
( pprEdgeWeights cfg ))
optimizedCFG optimizedCFG
--TODO: Partially check validity of the cfg. --TODO: Partially check validity of the cfg.
......
This diff is collapsed.
This diff is collapsed.
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables, GADTs, BangPatterns #-}
module RegAlloc.Graph.SpillCost ( module RegAlloc.Graph.SpillCost (
SpillCostRecord, SpillCostRecord,
plusSpillCostRecord, plusSpillCostRecord,
...@@ -23,6 +23,7 @@ import Reg ...@@ -23,6 +23,7 @@ import Reg
import GraphBase import GraphBase
import Hoopl.Collections (mapLookup) import Hoopl.Collections (mapLookup)
import Hoopl.Label
import Cmm import Cmm
import UniqFM import UniqFM
import UniqSet import UniqSet
...@@ -49,9 +50,6 @@ type SpillCostRecord ...@@ -49,9 +50,6 @@ type SpillCostRecord
type SpillCostInfo type SpillCostInfo
= UniqFM SpillCostRecord = UniqFM SpillCostRecord
-- | Block membership in a loop
type LoopMember = Bool
type SpillCostState = State (UniqFM SpillCostRecord) () type SpillCostState = State (UniqFM SpillCostRecord) ()
-- | An empty map of spill costs. -- | An empty map of spill costs.
...@@ -88,45 +86,49 @@ slurpSpillCostInfo platform cfg cmm ...@@ -88,45 +86,49 @@ slurpSpillCostInfo platform cfg cmm
where where
countCmm CmmData{} = return () countCmm CmmData{} = return ()
countCmm (CmmProc info _ _ sccs) countCmm (CmmProc info _ _ sccs)
= mapM_ (countBlock info) = mapM_ (countBlock info freqMap)
$ flattenSCCs sccs $ flattenSCCs sccs
where
LiveInfo _ entries _ _ = info
freqMap = (fst . mkGlobalWeights (head entries)) <$> cfg
-- Lookup the regs that are live on entry to this block in -- Lookup the regs that are live on entry to this block in
-- the info table from the CmmProc. -- the info table from the CmmProc.
countBlock info (BasicBlock blockId instrs) countBlock info freqMap (BasicBlock blockId instrs)
| LiveInfo _ _ blockLive _ <- info | LiveInfo _ _ blockLive _ <- info
, Just rsLiveEntry <- mapLookup blockId blockLive , Just rsLiveEntry <- mapLookup blockId blockLive
, rsLiveEntry_virt <- takeVirtuals rsLiveEntry , rsLiveEntry_virt <- takeVirtuals rsLiveEntry
= countLIs (loopMember blockId) rsLiveEntry_virt instrs = countLIs (ceiling $ blockFreq freqMap blockId) rsLiveEntry_virt instrs
| otherwise | otherwise
= error "RegAlloc.SpillCost.slurpSpillCostInfo: bad block" = error "RegAlloc.SpillCost.slurpSpillCostInfo: bad block"
countLIs :: LoopMember -> UniqSet VirtualReg -> [LiveInstr instr] -> SpillCostState
countLIs :: Int -> UniqSet VirtualReg -> [LiveInstr instr] -> SpillCostState
countLIs _ _ [] countLIs _ _ []
= return () = return ()
-- Skip over comment and delta pseudo instrs. -- Skip over comment and delta pseudo instrs.
countLIs inLoop rsLive (LiveInstr instr Nothing : lis) countLIs scale rsLive (LiveInstr instr Nothing : lis)
| isMetaInstr instr | isMetaInstr instr
= countLIs inLoop rsLive lis = countLIs scale rsLive lis
| otherwise | otherwise
= pprPanic "RegSpillCost.slurpSpillCostInfo" = pprPanic "RegSpillCost.slurpSpillCostInfo"
$ text "no liveness information on instruction " <> ppr instr $ text "no liveness information on instruction " <> ppr instr
countLIs inLoop rsLiveEntry (LiveInstr instr (Just live) : lis) countLIs scale rsLiveEntry (LiveInstr instr (Just live) : lis)
= do = do
-- Increment the lifetime counts for regs live on entry to this instr. -- Increment the lifetime counts for regs live on entry to this instr.
mapM_ (incLifetime (loopCount inLoop)) $ nonDetEltsUniqSet rsLiveEntry mapM_ incLifetime $ nonDetEltsUniqSet rsLiveEntry
-- This is non-deterministic but we do not -- This is non-deterministic but we do not
-- currently support deterministic code-generation. -- currently support deterministic code-generation.
-- See Note [Unique Determinism and code generation] -- See Note [Unique Determinism and code generation]
-- Increment counts for what regs were read/written from. -- Increment counts for what regs were read/written from.
let (RU read written) = regUsageOfInstr platform instr let (RU read written) = regUsageOfInstr platform instr
mapM_ (incUses (loopCount inLoop)) $ catMaybes $ map takeVirtualReg $ nub read mapM_ (incUses scale) $ catMaybes $ map takeVirtualReg $ nub read
mapM_ (incDefs (loopCount inLoop)) $ catMaybes $ map takeVirtualReg $ nub written mapM_ (incDefs scale) $ catMaybes $ map takeVirtualReg $ nub written
-- Compute liveness for entry to next instruction. -- Compute liveness for entry to next instruction.
let liveDieRead_virt = takeVirtuals (liveDieRead live) let liveDieRead_virt = takeVirtuals (liveDieRead live)
...@@ -140,21 +142,18 @@ slurpSpillCostInfo platform cfg cmm ...@@ -140,21 +142,18 @@ slurpSpillCostInfo platform cfg cmm
= (rsLiveAcross `unionUniqSets` liveBorn_virt) = (rsLiveAcross `unionUniqSets` liveBorn_virt)
`minusUniqSet` liveDieWrite_virt `minusUniqSet` liveDieWrite_virt
countLIs inLoop rsLiveNext lis countLIs scale rsLiveNext lis
loopCount inLoop
| inLoop = 10
| otherwise = 1
incDefs count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, count, 0, 0) incDefs count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, count, 0, 0)
incUses count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, count, 0) incUses count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, count, 0)
incLifetime count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, 0, count) incLifetime reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, 0, 1)
loopBlocks = CFG.loopMembers <$> cfg blockFreq :: Maybe (LabelMap Double) -> Label -> Double
loopMember bid blockFreq freqs bid
| Just isMember <- join (mapLookup bid <$> loopBlocks) | Just freq <- join (mapLookup bid <$> freqs)
= isMember = max 1.0 (10000 * freq)
| otherwise | otherwise
= False = 1.0 -- Only if no cfg given
-- | Take all the virtual registers from this set. -- | Take all the virtual registers from this set.
takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg
...@@ -215,31 +214,39 @@ chooseSpill info graph ...@@ -215,31 +214,39 @@ chooseSpill info graph
-- Without live range splitting, its's better to spill from the outside -- Without live range splitting, its's better to spill from the outside
-- in so set the cost of very long live ranges to zero -- in so set the cost of very long live ranges to zero
-- --
{-
spillCost_chaitin
:: SpillCostInfo
-> Graph Reg RegClass Reg
-> Reg
-> Float
spillCost_chaitin info graph reg -- spillCost_chaitin
-- Spilling a live range that only lives for 1 instruction -- :: SpillCostInfo
-- isn't going to help us at all - and we definitely want to avoid -- -> Graph VirtualReg RegClass RealReg
-- trying to re-spill previously inserted spill code. -- -> VirtualReg
| lifetime <= 1 = 1/0 -- -> Float
-- It's unlikely that we'll find a reg for a live range this long -- spillCost_chaitin info graph reg
-- better to spill it straight up and not risk trying to keep it around -- -- Spilling a live range that only lives for 1 instruction
-- and have to go through the build/color cycle again. -- -- isn't going to help us at all - and we definitely want to avoid
| lifetime > allocatableRegsInClass (regClass reg) * 10 -- -- trying to re-spill previously inserted spill code.
= 0 -- | lifetime <= 1 = 1/0
-- -- It's unlikely that we'll find a reg for a live range this long
-- -- better to spill it straight up and not risk trying to keep it around
-- -- and have to go through the build/color cycle again.
-- -- To facility this we scale down the spill cost of long ranges.
-- -- This makes sure long ranges are still spilled first.
-- -- But this way spill cost remains relevant for long live
-- -- ranges.
-- | lifetime >= 128
-- = (spillCost / conflicts) / 10.0
-- -- Otherwise revert to chaitin's regular cost function.
-- | otherwise = (spillCost / conflicts)
-- where
-- !spillCost = fromIntegral (uses + defs) :: Float
-- conflicts = fromIntegral (nodeDegree classOfVirtualReg graph reg)
-- (_, defs, uses, lifetime)
-- = fromMaybe (reg, 0, 0, 0) $ lookupUFM info reg
-- Otherwise revert to chaitin's regular cost function.
| otherwise = fromIntegral (uses + defs)
/ fromIntegral (nodeDegree graph reg)
where (_, defs, uses, lifetime)
= fromMaybe (reg, 0, 0, 0) $ lookupUFM info reg
-}
-- Just spill the longest live range. -- Just spill the longest live range.
spillCost_length spillCost_length
......
...@@ -3529,7 +3529,7 @@ invertCondBranches (Just cfg) keep bs = ...@@ -3529,7 +3529,7 @@ invertCondBranches (Just cfg) keep bs =
, Just edgeInfo2 <- getEdgeInfo lbl1 target2 cfg , Just edgeInfo2 <- getEdgeInfo lbl1 target2 cfg
-- Both jumps come from the same cmm statement -- Both jumps come from the same cmm statement
, transitionSource edgeInfo1 == transitionSource edgeInfo2 , transitionSource edgeInfo1 == transitionSource edgeInfo2
, (CmmSource cmmCondBranch) <- transitionSource edgeInfo1 , CmmSource {trans_cmmNode = cmmCondBranch} <- transitionSource edgeInfo1
--Int comparisons are invertable --Int comparisons are invertable
, CmmCondBranch (CmmMachOp op _args) _ _ _ <- cmmCondBranch , CmmCondBranch (CmmMachOp op _args) _ _ _ <- cmmCondBranch
......
This diff is collapsed.
...@@ -10,14 +10,18 @@ can be appended in linear time. ...@@ -10,14 +10,18 @@ can be appended in linear time.
-} -}
{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE BangPatterns #-}
module OrdList ( module OrdList (
OrdList, OrdList,
nilOL, isNilOL, unitOL, appOL, consOL, snocOL, concatOL, lastOL, nilOL, isNilOL, unitOL, appOL, consOL, snocOL, concatOL, lastOL,
headOL, headOL,
mapOL, fromOL, toOL, foldrOL, foldlOL, reverseOL, fromOLReverse mapOL, fromOL, toOL, foldrOL, foldlOL, reverseOL, fromOLReverse,
strictlyEqOL, strictlyOrdOL
) where ) where
import GhcPrelude import GhcPrelude
import Data.Foldable
import Outputable import Outputable
...@@ -49,7 +53,11 @@ instance Monoid (OrdList a) where ...@@ -49,7 +53,11 @@ instance Monoid (OrdList a) where
mconcat = concatOL mconcat = concatOL
instance Foldable OrdList where instance Foldable OrdList where
foldr = foldrOL foldr = foldrOL
foldl' = foldlOL
toList = fromOL
null = isNilOL
length = lengthOL
instance Traversable OrdList where instance Traversable OrdList where
traverse f xs = toOL <$> traverse f (fromOL xs) traverse f xs = toOL <$> traverse f (fromOL xs)
...@@ -64,7 +72,7 @@ appOL :: OrdList a -> OrdList a -> OrdList a ...@@ -64,7 +72,7 @@ appOL :: OrdList a -> OrdList a -> OrdList a
concatOL :: [OrdList a] -> OrdList a concatOL :: [OrdList a] -> OrdList a
headOL :: OrdList a -> a headOL :: OrdList a -> a
lastOL :: OrdList a -> a lastOL :: OrdList a -> a
lengthOL :: OrdList a -> Int
nilOL = None nilOL = None
unitOL as = One as unitOL as = One as
...@@ -86,6 +94,13 @@ lastOL (Cons _ as) = lastOL as ...@@ -86,6 +94,13 @@ lastOL (Cons _ as) = lastOL as
lastOL (Snoc _ a) = a lastOL (Snoc _ a) = a
lastOL (Two _ as) = lastOL as lastOL (Two _ as) = lastOL as
lengthOL None = 0
lengthOL (One _) = 1
lengthOL (Many as) = length as
lengthOL (Cons _ as) = 1 + length as
lengthOL (Snoc as _) = 1 + length as
lengthOL (Two as bs) = length as + length bs
isNilOL None = True isNilOL None = True
isNilOL _ = False isNilOL _ = False
...@@ -126,13 +141,14 @@ foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs ...@@ -126,13 +141,14 @@ foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs
foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1 foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1
foldrOL k z (Many xs) = foldr k z xs foldrOL k z (Many xs) = foldr k z xs
-- | Strict left fold.
foldlOL :: (b->a->b) -> b -> OrdList a -> b foldlOL :: (b->a->b) -> b -> OrdList a -> b
foldlOL _ z None = z foldlOL _ z None = z
foldlOL k z (One x) = k z x foldlOL k z (One x) = k z x
foldlOL k z (Cons x xs) = foldlOL k (k z x) xs foldlOL k z (Cons x xs) = let !z' = (k z x) in foldlOL k z' xs
foldlOL k z (Snoc xs x) = k (foldlOL k z xs) x foldlOL k z (Snoc xs x) = let !z' = (foldlOL k z xs) in k z' x
foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2 foldlOL k z (Two b1 b2) = let !z' = (foldlOL k z b1) in foldlOL k z' b2
foldlOL k z (Many xs) = foldl k z xs foldlOL k z (Many xs) = foldl' k z xs
toOL :: [a] -> OrdList a toOL :: [a] -> OrdList a
toOL [] = None toOL [] = None
...@@ -146,3 +162,33 @@ reverseOL (Cons a b) = Snoc (reverseOL b) a ...@@ -146,3 +162,33 @@ reverseOL (Cons a b) = Snoc (reverseOL b) a
reverseOL (Snoc a b) = Cons b (reverseOL a) reverseOL (Snoc a b) = Cons b (reverseOL a)
reverseOL (Two a b) = Two (reverseOL b) (reverseOL a) reverseOL (Two a b) = Two (reverseOL b) (reverseOL a)
reverseOL (Many xs) = Many (reverse xs) reverseOL (Many xs) = Many (reverse xs)
-- | Compare not only the values but also the structure of two lists
strictlyEqOL :: Eq a => OrdList a -> OrdList a -> Bool
strictlyEqOL None None = True
strictlyEqOL (One x) (One y) = x == y
strictlyEqOL (Cons a as) (Cons b bs) = a == b && as `strictlyEqOL` bs
strictlyEqOL (Snoc as a) (Snoc bs b) = a == b && as `strictlyEqOL` bs
strictlyEqOL (Two a1 a2) (Two b1 b2) = a1 `strictlyEqOL` b1 && a2 `strictlyEqOL` b2
strictlyEqOL (Many as) (Many bs) = as == bs
strictlyEqOL _ _ = False
-- | Compare not only the values but also the structure of two lists
strictlyOrdOL :: Ord a => OrdList a -> OrdList a -> Ordering
strictlyOrdOL None None = EQ
strictlyOrdOL None _ = LT
strictlyOrdOL (One x) (One y) = compare x y
strictlyOrdOL (One _) _ = LT
strictlyOrdOL (Cons a as) (Cons b bs) =
compare a b `mappend` strictlyOrdOL as bs
strictlyOrdOL (Cons _ _) _ = LT
strictlyOrdOL (Snoc as a) (Snoc bs b) =
compare a b `mappend` strictlyOrdOL as bs
strictlyOrdOL (Snoc _ _) _ = LT
strictlyOrdOL (Two a1 a2) (Two b1 b2) =
(strictlyOrdOL a1 b1) `mappend` (strictlyOrdOL a2 b2)
strictlyOrdOL (Two _ _) _ = LT
strictlyOrdOL (Many as) (Many bs) = compare as bs
strictlyOrdOL (Many _ ) _ = GT
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment