Add graph coloring register allocator.

Refactored linear allocator into separate liveness annotation and allocation stages.
Added graph coloring allocator, use -fregs-graph to enable.
  New dump flags are
    -ddump-asm-native          -- output of cmm -> native transform.
    -ddump-asm-liveness        -- code annotated with register liveness info
    -ddump-asm-coalesce        -- output of register move coalescing
                                    (this is a separate pass when using the coloring allocator)
                                    (this could change in the future)
    -ddump-asm-regalloc        -- code after register allocation
    -ddump-asm-regalloc-stages -- blocks after each build/spill stage of coloring allocator
    -ddump-asm-conflicts       -- a global register liveness graph in graphviz format 
        
The new register allocator will allocate some registers, but it's not
quite ready for prime-time yet. The spill code generator needs some work...
parent 27802c59
......@@ -33,7 +33,7 @@
--
module PprCmm (
writeCmms, pprCmms, pprCmm, pprStmt, pprExpr
writeCmms, pprCmms, pprCmm, pprStmt, pprExpr, pprSection, pprStatic
) where
#include "HsVersions.h"
......@@ -65,12 +65,16 @@ writeCmms handle cmms = printForC handle (pprCmms cmms)
instance (Outputable info) => Outputable (GenCmm CmmStatic info CmmStmt) where
ppr c = pprCmm c
instance (Outputable info) => Outputable (GenCmmTop CmmStatic info CmmStmt) where
instance (Outputable d, Outputable info, Outputable i)
=> Outputable (GenCmmTop d info i) where
ppr t = pprTop t
instance Outputable CmmBasicBlock where
instance (Outputable instr) => Outputable (GenBasicBlock instr) where
ppr b = pprBBlock b
instance Outputable BlockId where
ppr id = pprBlockId id
instance Outputable CmmStmt where
ppr s = pprStmt s
......@@ -92,6 +96,8 @@ instance Outputable CmmStatic where
instance Outputable CmmInfo where
ppr e = pprInfo e
-----------------------------------------------------------------------------
pprCmm :: (Outputable info) => GenCmm CmmStatic info CmmStmt -> SDoc
......@@ -100,7 +106,9 @@ pprCmm (Cmm tops) = vcat $ intersperse (text "") $ map pprTop tops
-- --------------------------------------------------------------------------
-- Top level `procedure' blocks.
--
pprTop :: (Outputable info) => GenCmmTop CmmStatic info CmmStmt -> SDoc
pprTop :: (Outputable d, Outputable info, Outputable i)
=> GenCmmTop d info i -> SDoc
pprTop (CmmProc info lbl params blocks )
= vcat [ pprCLabel lbl <> parens (commafy $ map ppr params) <+> lbrace
......@@ -114,7 +122,7 @@ pprTop (CmmProc info lbl params blocks )
-- section "data" { ... }
--
pprTop (CmmData section ds) =
(hang (pprSection section <+> lbrace) 4 (vcat (map pprStatic ds)))
(hang (pprSection section <+> lbrace) 4 (vcat (map ppr ds)))
$$ rbrace
-- --------------------------------------------------------------------------
......@@ -186,7 +194,7 @@ pprUpdateFrame (UpdateFrame expr args) =
-- --------------------------------------------------------------------------
-- Basic blocks look like assembly blocks.
-- lbl: stmt ; stmt ; ..
pprBBlock :: CmmBasicBlock -> SDoc
pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc
pprBBlock (BasicBlock ident stmts) =
hang (pprBlockId ident <> colon) 4 (vcat (map ppr stmts))
......
......@@ -96,6 +96,12 @@ data DynFlag
= Opt_D_dump_cmm
| Opt_D_dump_cps_cmm
| Opt_D_dump_asm
| Opt_D_dump_asm_native
| Opt_D_dump_asm_liveness
| Opt_D_dump_asm_coalesce
| Opt_D_dump_asm_regalloc
| Opt_D_dump_asm_regalloc_stages
| Opt_D_dump_asm_conflicts
| Opt_D_dump_cpranal
| Opt_D_dump_deriv
| Opt_D_dump_ds
......@@ -229,6 +235,7 @@ data DynFlag
| Opt_DictsCheap
| Opt_RewriteRules
| Opt_Vectorise
| Opt_RegsGraph
-- misc opts
| Opt_Cpp
......@@ -990,6 +997,13 @@ dynamic_flags = [
, ( "ddump-cmm", setDumpFlag Opt_D_dump_cmm)
, ( "ddump-cps-cmm", setDumpFlag Opt_D_dump_cps_cmm)
, ( "ddump-asm", setDumpFlag Opt_D_dump_asm)
, ( "ddump-asm-native", setDumpFlag Opt_D_dump_asm_native)
, ( "ddump-asm-liveness", setDumpFlag Opt_D_dump_asm_liveness)
, ( "ddump-asm-coalesce", setDumpFlag Opt_D_dump_asm_coalesce)
, ( "ddump-asm-regalloc", setDumpFlag Opt_D_dump_asm_regalloc)
, ( "ddump-asm-conflicts", setDumpFlag Opt_D_dump_asm_conflicts)
, ( "ddump-asm-regalloc-stages",
setDumpFlag Opt_D_dump_asm_regalloc_stages)
, ( "ddump-cpranal", setDumpFlag Opt_D_dump_cpranal)
, ( "ddump-deriv", setDumpFlag Opt_D_dump_deriv)
, ( "ddump-ds", setDumpFlag Opt_D_dump_ds)
......@@ -1137,6 +1151,7 @@ fFlags = [
( "rewrite-rules", Opt_RewriteRules ),
( "break-on-exception", Opt_BreakOnException ),
( "vectorise", Opt_Vectorise ),
( "regs-graph", Opt_RegsGraph),
-- Deprecated in favour of -XTemplateHaskell:
( "th", Opt_TemplateHaskell ),
-- Deprecated in favour of -XForeignFunctionInterface:
......
......@@ -16,14 +16,18 @@ import MachInstrs
import MachRegs
import MachCodeGen
import PprMach
import RegisterAlloc
import RegAllocInfo
import NCGMonad
import PositionIndependentCode
import RegAllocLinear
import RegLiveness
import RegCoalesce
import qualified RegAllocColor as Color
import qualified GraphColor as Color
import Cmm
import CmmOpt ( cmmMiniInline, cmmMachOpFold )
import PprCmm ( pprStmt, pprCmms )
import PprCmm ( pprStmt, pprCmms, pprCmm )
import MachOp
import CLabel
......@@ -42,6 +46,7 @@ import Digraph
import qualified Pretty
import Outputable
import FastString
import UniqSet
-- DEBUGGING ONLY
--import OrdList
......@@ -53,6 +58,7 @@ import List ( intersperse )
import Data.Int
import Data.Word
import Data.Bits
import Data.Maybe
import GHC.Exts
{-
......@@ -108,21 +114,68 @@ The machine-dependent bits break down as follows:
-- NB. We *lazilly* compile each block of code for space reasons.
--------------------
nativeCodeGen :: DynFlags -> [RawCmm] -> UniqSupply -> IO Pretty.Doc
nativeCodeGen dflags cmms us
= let (res, _) = initUs us $
cgCmm (concat (map add_split cmms))
cgCmm :: [RawCmmTop] -> UniqSM (RawCmm, Pretty.Doc, [CLabel])
cgCmm :: [RawCmmTop] -> UniqSM ( [CmmNativeGenDump], Pretty.Doc, [CLabel])
cgCmm tops =
lazyMapUs (cmmNativeGen dflags) tops `thenUs` \ results ->
case unzip3 results of { (cmms,docs,imps) ->
returnUs (Cmm cmms, my_vcat docs, concat imps)
case unzip3 results of { (dump,docs,imps) ->
returnUs (dump, my_vcat docs, concat imps)
}
in
case res of { (ppr_cmms, insn_sdoc, imports) -> do
dumpIfSet_dyn dflags Opt_D_dump_opt_cmm "Optimised Cmm" (pprCmms [ppr_cmms])
case res of { (dump, insn_sdoc, imports) -> do
-- stripe across the outputs for each block so all the information for a
-- certain stage is concurrent in the dumps.
dumpIfSet_dyn dflags
Opt_D_dump_opt_cmm "Optimised Cmm"
(pprCmm $ Cmm $ map cdCmmOpt dump)
dumpIfSet_dyn dflags
Opt_D_dump_asm_native "(asm-native) Native code"
(vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdNative dump)
dumpIfSet_dyn dflags
Opt_D_dump_asm_liveness "(asm-liveness) Liveness info added"
(vcat $ map (ppr . cdLiveness) dump)
dumpIfSet_dyn dflags
Opt_D_dump_asm_coalesce "(asm-coalesce) Register moves coalesced."
(vcat $ map (ppr . cdCoalesce) dump)
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc "(asm-regalloc) Registers allocated"
(vcat $ map (docToSDoc . pprNatCmmTop) $ concatMap cdAlloced dump)
-- with the graph coloring allocator, show the result of each build/spill stage
-- for each block in turn.
mapM_ (\codeGraphs
-> dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc_stages
"(asm-regalloc-stages)"
(vcat $ map (\(stage, (code, graph)) ->
( text "-- Stage " <> int stage
$$ ppr code
$$ Color.dotGraph Color.regDotColor trivColorable graph))
(zip [0..] codeGraphs)))
$ map cdCodeGraphs dump
-- Build a global register conflict graph.
-- If you want to see the graph for just one basic block then use asm-regalloc-stages instead.
dumpIfSet_dyn dflags
Opt_D_dump_asm_conflicts "(asm-conflicts) Register conflict graph"
$ Color.dotGraph Color.regDotColor trivColorable
$ foldl Color.union Color.initGraph
$ catMaybes $ map cdColoredGraph dump
return (insn_sdoc Pretty.$$ dyld_stubs imports
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-- On recent versions of Darwin, the linker supports
-- dead-stripping of code and data on a per-symbol basis.
......@@ -193,45 +246,161 @@ nativeCodeGen dflags cmms us
#endif
-- Complete native code generation phase for a single top-level chunk
-- of Cmm.
-- Carries output of the code generator passes, for dumping.
-- Make sure to only fill the one's we're interested in to avoid
-- creating space leaks.
data CmmNativeGenDump
= CmmNativeGenDump
{ cdCmmOpt :: RawCmmTop
, cdNative :: [NatCmmTop]
, cdLiveness :: [LiveCmmTop]
, cdCoalesce :: [LiveCmmTop]
, cdCodeGraphs :: [([LiveCmmTop], Color.Graph Reg RegClass Reg)]
, cdColoredGraph :: Maybe (Color.Graph Reg RegClass Reg)
, cdAlloced :: [NatCmmTop] }
cmmNativeGen :: DynFlags -> RawCmmTop -> UniqSM (RawCmmTop, Pretty.Doc, [CLabel])
dchoose dflags opt a b
| dopt opt dflags = a
| otherwise = b
-- | Complete native code generation phase for a single top-level chunk of Cmm.
-- Unless they're being dumped, intermediate data structures are squashed after
-- every stage to avoid creating space leaks.
--
cmmNativeGen :: DynFlags -> RawCmmTop -> UniqSM (CmmNativeGenDump, Pretty.Doc, [CLabel])
cmmNativeGen dflags cmm
= {-# SCC "fixAssigns" #-}
fixAssignsTop cmm `thenUs` \ fixed_cmm ->
{-# SCC "genericOpt" #-}
cmmToCmm dflags fixed_cmm `bind` \ (cmm, imports) ->
(if dopt Opt_D_dump_opt_cmm dflags -- space leak avoidance
then cmm
else CmmData Text []) `bind` \ ppr_cmm ->
{-# SCC "genMachCode" #-}
genMachCode dflags cmm `thenUs` \ (pre_regalloc, lastMinuteImports) ->
{-# SCC "regAlloc" #-}
mapUs regAlloc pre_regalloc `thenUs` \ with_regs ->
{-# SCC "shortcutBranches" #-}
shortcutBranches dflags with_regs `bind` \ shorted ->
{-# SCC "sequenceBlocks" #-}
map sequenceTop shorted `bind` \ sequenced ->
{-# SCC "x86fp_kludge" #-}
map x86fp_kludge sequenced `bind` \ final_mach_code ->
{-# SCC "vcat" #-}
Pretty.vcat (map pprNatCmmTop final_mach_code) `bind` \ final_sdoc ->
returnUs (ppr_cmm, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
where
x86fp_kludge :: NatCmmTop -> NatCmmTop
x86fp_kludge top@(CmmData _ _) = top
= do
--
fixed_cmm
<- {-# SCC "fixAssigns" #-}
fixAssignsTop cmm
---- cmm to cmm optimisations
(cmm, imports, ppr_cmm)
<- (\fixed_cmm
-> {-# SCC "genericOpt" #-}
do let (cmm, imports) = cmmToCmm dflags fixed_cmm
return ( cmm
, imports
, dchoose dflags Opt_D_dump_cmm cmm (CmmData Text []))
) fixed_cmm
---- generate native code from cmm
(native, lastMinuteImports, ppr_native)
<- (\cmm
-> {-# SCC "genMachCode" #-}
do (machCode, lastMinuteImports)
<- genMachCode dflags cmm
return ( machCode
, lastMinuteImports
, dchoose dflags Opt_D_dump_asm_native machCode [])
) cmm
---- tag instructions with register liveness information
(withLiveness, ppr_withLiveness)
<- (\native
-> {-# SCC "regLiveness" #-}
do
withLiveness <- mapUs regLiveness native
return ( withLiveness
, dchoose dflags Opt_D_dump_asm_liveness withLiveness []))
native
---- allocate registers
(alloced, ppr_alloced, ppr_coalesce, ppr_codeGraphs, ppr_coloredGraph)
<- (\withLiveness
-> {-# SCC "regAlloc" #-}
do
if dopt Opt_RegsGraph dflags
then do
-- the regs usable for allocation
let alloc_regs
= foldr (\r -> plusUFM_C unionUniqSets
$ unitUFM (regClass r) (unitUniqSet r))
emptyUFM
$ map RealReg allocatableRegs
-- aggressively coalesce moves between virtual regs
coalesced <- regCoalesce withLiveness
-- graph coloring register allocation
(alloced, codeGraphs)
<- Color.regAlloc
alloc_regs
(mkUniqSet [0..maxSpillSlots])
coalesced
return ( alloced
, dchoose dflags Opt_D_dump_asm_regalloc alloced []
, dchoose dflags Opt_D_dump_asm_coalesce coalesced []
, dchoose dflags Opt_D_dump_asm_regalloc_stages codeGraphs []
, dchoose dflags Opt_D_dump_asm_conflicts Nothing Nothing)
else do
-- do linear register allocation
alloced <- mapUs regAlloc withLiveness
return ( alloced
, dchoose dflags Opt_D_dump_asm_regalloc alloced []
, []
, []
, Nothing ))
withLiveness
---- shortcut branches
let shorted =
{-# SCC "shortcutBranches" #-}
shortcutBranches dflags alloced
---- sequence blocks
let sequenced =
{-# SCC "sequenceBlocks" #-}
map sequenceTop shorted
---- x86fp_kludge
let final_mach_code =
#if i386_TARGET_ARCH
x86fp_kludge top@(CmmProc info lbl params code) =
CmmProc info lbl params (map bb_i386_insert_ffrees code)
where
bb_i386_insert_ffrees (BasicBlock id instrs) =
BasicBlock id (i386_insert_ffrees instrs)
{-# SCC "x86fp_kludge" #-}
map x86fp_kludge sequenced
#else
x86fp_kludge top = top
sequenced
#endif
---- vcat
let final_sdoc =
{-# SCC "vcat" #-}
Pretty.vcat (map pprNatCmmTop final_mach_code)
let dump =
CmmNativeGenDump
{ cdCmmOpt = ppr_cmm
, cdNative = ppr_native
, cdLiveness = ppr_withLiveness
, cdCoalesce = ppr_coalesce
, cdCodeGraphs = ppr_codeGraphs
, cdColoredGraph = ppr_coloredGraph
, cdAlloced = ppr_alloced }
returnUs (dump, final_sdoc Pretty.$$ Pretty.text "", lastMinuteImports ++ imports)
#if i386_TARGET_ARCH
x86fp_kludge :: NatCmmTop -> NatCmmTop
x86fp_kludge top@(CmmData _ _) = top
x86fp_kludge top@(CmmProc info lbl params code) =
CmmProc info lbl params (map bb_i386_insert_ffrees code)
where
bb_i386_insert_ffrees (BasicBlock id instrs) =
BasicBlock id (i386_insert_ffrees instrs)
#endif
-- -----------------------------------------------------------------------------
-- Sequencing the basic blocks
......
-- | Types for the general graph colorer.
module GraphBase (
Triv,
Graph (..),
initGraph,
graphMapModify,
Node (..), newNode,
)
where
import UniqSet
import UniqFM
-- | A fn to check if a node is trivially colorable
-- For graphs who's color classes are disjoint then a node is 'trivially colorable'
-- when it has less neighbors and exclusions than available colors for that node.
--
-- For graph's who's color classes overlap, ie some colors alias other colors, then
-- this can be a bit more tricky. There is a general way to calculate this, but
-- it's likely be too slow for use in the code. The coloring algorithm takes
-- a canned function which can be optimised by the user to be specific to the
-- specific graph being colored.
--
-- for details, see "A Generalised Algorithm for Graph-Coloring Register Allocation"
-- Smith, Ramsey, Holloway - PLDI 2004.
--
type Triv k cls color
= cls -- ^ the class of the node we're trying to color.
-> UniqSet k -- ^ the node's neighbors.
-> UniqSet color -- ^ the node's exclusions.
-> Bool
-- | The Interference graph.
-- There used to be more fields, but they were turfed out in a previous revision.
-- maybe we'll want more later..
--
data Graph k cls color
= Graph {
-- | All active nodes in the graph.
graphMap :: UniqFM (Node k cls color) }
-- | An empty graph.
initGraph
= Graph
{ graphMap = emptyUFM }
-- | Modify the finite map holding the nodes in the graph.
graphMapModify
:: (UniqFM (Node k cls color) -> UniqFM (Node k cls color))
-> Graph k cls color -> Graph k cls color
graphMapModify f graph
= graph { graphMap = f (graphMap graph) }
-- | Graph nodes.
-- Represents a thing that can conflict with another thing.
-- For the register allocater the nodes represent registers.
--
data Node k cls color
= Node {
-- | A unique identifier for this node.
nodeId :: k
-- | The class of this node,
-- determines the set of colors that can be used.
, nodeClass :: cls
-- | The color of this node, if any.
, nodeColor :: Maybe color
-- | Neighbors which must be colored differently to this node.
, nodeConflicts :: UniqSet k
-- | Colors that cannot be used by this node.
, nodeExclusions :: UniqSet color
-- | Colors that this node would prefer to be, in decending order.
, nodePreference :: [color]
-- | Neighbors that this node would like to be colored the same as.
, nodeCoalesce :: UniqSet k }
-- | An empty node.
newNode :: k -> cls -> Node k cls color
newNode k cls
= Node
{ nodeId = k
, nodeClass = cls
, nodeColor = Nothing
, nodeConflicts = emptyUniqSet
, nodeExclusions = emptyUniqSet
, nodePreference = []
, nodeCoalesce = emptyUniqSet }
-- | Graph Coloring.
-- This is a generic graph coloring library, abstracted over the type of
-- the node keys, nodes and colors.
--
module GraphColor (
module GraphBase,
module GraphOps,
module GraphPpr,
colorGraph
)
where
import GraphBase
import GraphOps
import GraphPpr
import Unique
import UniqFM
import UniqSet
import Outputable
import Data.Maybe
import Data.List
-- | Try to color a graph with this set of colors.
-- Uses Chaitin's algorithm to color the graph.
-- The graph is scanned for nodes which are deamed 'trivially colorable'. These nodes
-- are pushed onto a stack and removed from the graph.
-- Once this process is complete the graph can be colored by removing nodes from
-- the stack (ie in reverse order) and assigning them colors different to their neighbors.
--
colorGraph
:: ( Uniquable k, Uniquable cls, Uniquable color, Eq color
, Outputable k, Outputable cls, Outputable color)
=> UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class).
-> Triv k cls color -- ^ fn to decide whether a node is trivially colorable.
-> (Graph k cls color -> k) -- ^ fn to choose a node to potentially leave uncolored if nothing is trivially colorable.
-> Graph k cls color -- ^ the graph to color.
-> ( Graph k cls color -- ^ the colored graph.
, UniqSet k ) -- ^ the set of nodes that we couldn't find a color for.
colorGraph colors triv spill graph0
= let -- run the scanner to slurp out all the trivially colorable nodes
(ksTriv, ksProblems) = colorScan colors triv spill [] emptyUniqSet graph0
-- color the trivially colorable nodes
(graph1, ksNoTriv) = assignColors colors graph0 ksTriv
-- try and color the problem nodes
(graph2, ksNoColor) = assignColors colors graph1 (uniqSetToList ksProblems)
-- if the trivially colorable nodes didn't color then something is wrong
-- with the provided triv function.
in if not $ null ksNoTriv
then pprPanic "colorGraph: trivially colorable nodes didn't color!" empty
{- ( empty
$$ text "ksTriv = " <> ppr ksTriv
$$ text "ksNoTriv = " <> ppr ksNoTriv
$$ empty
$$ dotGraph (\x -> text "white") triv graph1) -}
else (graph2, mkUniqSet ksNoColor)
colorScan colors triv spill safe prob graph
-- empty graphs are easy to color.
| isNullUFM $ graphMap graph
= (safe, prob)
-- Try and find a trivially colorable node.
| Just node <- find (\node -> triv (nodeClass node)
(nodeConflicts node)
(nodeExclusions node))
$ eltsUFM $ graphMap graph
, k <- nodeId node
= colorScan colors triv spill
(k : safe) prob (delNode k graph)
-- There was no trivially colorable node,
-- Choose one to potentially leave uncolored. We /might/ be able to find
-- a color for this later on, but no guarantees.
| k <- spill graph
= colorScan colors triv spill
safe (addOneToUniqSet prob k) (delNode k graph)
-- | Try to assign a color to all these nodes.
assignColors
:: ( Uniquable k, Uniquable cls, Uniquable color, Eq color )
=> UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class).
-> Graph k cls color -- ^ the graph
-> [k] -- ^ nodes to assign a color to.
-> ( Graph k cls color -- the colored graph
, [k]) -- the nodes that didn't color.
assignColors colors graph ks
= assignColors' colors graph [] ks
where assignColors' colors graph prob []
= (graph, prob)
assignColors' colors graph prob (k:ks)
= case assignColor colors k graph of
-- couldn't color this node
Nothing -> assignColors' colors graph (k : prob) ks
-- this node colored ok, so do the rest
Just graph' -> assignColors' colors graph' prob ks
assignColor colors u graph
| Just c <- selectColor colors graph u
= Just (setColor u c graph)
| otherwise
= Nothing
-- | Select a color for a certain node
-- taking into account preferences, neighbors and exclusions.
-- returns Nothing if no color can be assigned to this node.
--
-- TODO: avoid using the prefs of the neighbors, if at all possible.
--
selectColor
:: ( Uniquable k, Uniquable cls, Uniquable color, Eq color)
=> UniqFM (UniqSet color) -- ^ map of (node class -> set of colors available for this class).
-> Graph k cls color -- ^ the graph