Commit b13ebb67 authored by benl's avatar benl

Comments and formatting to register allocators

No functional changes.
parent f0f63a54
-- | Utils for calculating general worst, bound, squeese and free, functions.
--
-- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation"
-- Michael Smith, Normal Ramsey, Glenn Holloway.
-- PLDI 2004
-- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation"
-- Michael Smith, Normal Ramsey, Glenn Holloway.
-- PLDI 2004
--
-- These general versions are not used in GHC proper because they are too slow.
-- Instead, hand written optimised versions are provided for each architecture
-- in MachRegs*.hs
-- These general versions are not used in GHC proper because they are too slow.
-- Instead, hand written optimised versions are provided for each architecture
-- in MachRegs*.hs
--
-- This code is here because we can test the architecture specific code against it.
-- This code is here because we can test the architecture specific code against
-- it.
--
module RegAlloc.Graph.ArchBase (
RegClass(..),
Reg(..),
......@@ -20,9 +20,7 @@ module RegAlloc.Graph.ArchBase (
worst,
bound,
squeese
)
where
) where
import UniqSet
import Unique
......@@ -64,6 +62,7 @@ instance Uniquable Reg where
getUnique (RegSub _ (RegSub _ _))
= error "RegArchBase.getUnique: can't have a sub-reg of a sub-reg."
-- | A subcomponent of another register
data RegSub
= SubL16 -- lowest 16 bits
......@@ -79,11 +78,10 @@ data RegSub
--
-- (worst neighbors classN classC) is the maximum number of potential
-- colors for N that can be lost by coloring its neighbors.
--
-- This should be hand coded/cached for each particular architecture,
-- because the compute time is very long..
worst
:: (RegClass -> UniqSet Reg)
worst :: (RegClass -> UniqSet Reg)
-> (Reg -> UniqSet Reg)
-> Int -> RegClass -> RegClass -> Int
......@@ -97,10 +95,12 @@ worst regsOfClass regAlias neighbors classN classC
regsC = regsOfClass classC
-- all the possible subsets of c which have size < m
regsS = filter (\s -> sizeUniqSet s >= 1 && sizeUniqSet s <= neighbors)
regsS = filter (\s -> sizeUniqSet s >= 1
&& sizeUniqSet s <= neighbors)
$ powersetLS regsC
-- for each of the subsets of C, the regs which conflict with posiblities for N
-- for each of the subsets of C, the regs which conflict
-- with posiblities for N
regsS_conflict
= map (\s -> intersectUniqSets regsN (regAliasS s)) regsS
......@@ -110,8 +110,7 @@ worst regsOfClass regAlias neighbors classN classC
-- | For a node N of classN and neighbors of classesC
-- (bound classN classesC) is the maximum number of potential
-- colors for N that can be lost by coloring its neighbors.
bound
:: (RegClass -> UniqSet Reg)
bound :: (RegClass -> UniqSet Reg)
-> (Reg -> UniqSet Reg)
-> RegClass -> [RegClass] -> Int
......@@ -131,29 +130,25 @@ bound regsOfClass regAlias classN classesC
-- | The total squeese on a particular node with a list of neighbors.
--
-- A version of this should be constructed for each particular architecture,
-- possibly including uses of bound, so that alised registers don't get counted
-- twice, as per the paper.
squeese
:: (RegClass -> UniqSet Reg)
-- A version of this should be constructed for each particular architecture,
-- possibly including uses of bound, so that alised registers don't get
-- counted twice, as per the paper.
squeese :: (RegClass -> UniqSet Reg)
-> (Reg -> UniqSet Reg)
-> RegClass -> [(Int, RegClass)] -> Int
squeese regsOfClass regAlias classN countCs
= sum (map (\(i, classC) -> worst regsOfClass regAlias i classN classC) countCs)
= sum
$ map (\(i, classC) -> worst regsOfClass regAlias i classN classC)
$ countCs
-- | powerset (for lists)
powersetL :: [a] -> [[a]]
powersetL = map concat . mapM (\x -> [[],[x]])
-- | powersetLS (list of sets)
powersetLS :: Uniquable a => UniqSet a -> [UniqSet a]
powersetLS s = map mkUniqSet $ powersetL $ uniqSetToList s
{-
-- | unions (for sets)
unionsS :: Ord a => Set (Set a) -> Set a
unionsS ss = Set.unions $ Set.toList ss
-}
-- | A description of the register set of the X86.
-- This isn't used directly in GHC proper.
--
-- See RegArchBase.hs for the reference.
-- See MachRegs.hs for the actual trivColorable function used in GHC.
-- This isn't used directly in GHC proper.
--
-- See RegArchBase.hs for the reference.
-- See MachRegs.hs for the actual trivColorable function used in GHC.
--
module RegAlloc.Graph.ArchX86 (
classOfReg,
regsOfClass,
......@@ -13,11 +14,10 @@ module RegAlloc.Graph.ArchX86 (
worst,
squeese,
) where
import RegAlloc.Graph.ArchBase (Reg(..), RegSub(..), RegClass(..))
import UniqSet
-- | Determine the class of a register
classOfReg :: Reg -> RegClass
classOfReg reg
......@@ -34,18 +34,21 @@ regsOfClass :: RegClass -> UniqSet Reg
regsOfClass c
= case c of
ClassG32
-> mkUniqSet [ Reg ClassG32 i | i <- [0..7] ]
-> mkUniqSet [ Reg ClassG32 i
| i <- [0..7] ]
ClassG16
-> mkUniqSet [ RegSub SubL16 (Reg ClassG32 i) | i <- [0..7] ]
-> mkUniqSet [ RegSub SubL16 (Reg ClassG32 i)
| i <- [0..7] ]
ClassG8
-> unionUniqSets
(mkUniqSet [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ])
(mkUniqSet [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ])
(mkUniqSet [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ])
(mkUniqSet [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ])
ClassF64
-> mkUniqSet [ Reg ClassF64 i | i <- [0..5] ]
-> mkUniqSet [ Reg ClassF64 i
| i <- [0..5] ]
-- | Determine the common name of a reg
......@@ -54,21 +57,23 @@ regName :: Reg -> Maybe String
regName reg
= case reg of
Reg ClassG32 i
| i <= 7 -> Just ([ "eax", "ebx", "ecx", "edx", "ebp", "esi", "edi", "esp" ] !! i)
| i <= 7-> Just $ [ "eax", "ebx", "ecx", "edx"
, "ebp", "esi", "edi", "esp" ] !! i
RegSub SubL16 (Reg ClassG32 i)
| i <= 7 -> Just ([ "ax", "bx", "cx", "dx", "bp", "si", "di", "sp"] !! i)
| i <= 7 -> Just $ [ "ax", "bx", "cx", "dx"
, "bp", "si", "di", "sp"] !! i
RegSub SubL8 (Reg ClassG32 i)
| i <= 3 -> Just ([ "al", "bl", "cl", "dl"] !! i)
| i <= 3 -> Just $ [ "al", "bl", "cl", "dl"] !! i
RegSub SubL8H (Reg ClassG32 i)
| i <= 3 -> Just ([ "ah", "bh", "ch", "dh"] !! i)
| i <= 3 -> Just $ [ "ah", "bh", "ch", "dh"] !! i
_ -> Nothing
_ -> Nothing
-- | Which regs alias what other regs
-- | Which regs alias what other regs.
regAlias :: Reg -> UniqSet Reg
regAlias reg
= case reg of
......@@ -78,12 +83,14 @@ regAlias reg
-- for eax, ebx, ecx, eds
| i <= 3
-> mkUniqSet $ [ Reg ClassG32 i, RegSub SubL16 reg, RegSub SubL8 reg, RegSub SubL8H reg ]
-> mkUniqSet
$ [ Reg ClassG32 i, RegSub SubL16 reg
, RegSub SubL8 reg, RegSub SubL8H reg ]
-- for esi, edi, esp, ebp
| 4 <= i && i <= 7
-> mkUniqSet $ [ Reg ClassG32 i, RegSub SubL16 reg ]
-> mkUniqSet
$ [ Reg ClassG32 i, RegSub SubL16 reg ]
-- 16 bit subregs alias the whole reg
RegSub SubL16 r@(Reg ClassG32 _)
......@@ -104,7 +111,6 @@ regAlias reg
-- | Optimised versions of RegColorBase.{worst, squeese} specific to x86
worst :: Int -> RegClass -> RegClass -> Int
worst n classN classC
= case classN of
......@@ -138,7 +144,3 @@ squeese :: RegClass -> [(Int, RegClass)] -> Int
squeese classN countCs
= sum (map (\(i, classC) -> worst i classN classC) countCs)
-- | Register coalescing.
--
module RegAlloc.Graph.Coalesce (
regCoalesce,
slurpJoinMovs
)
where
) where
import RegAlloc.Liveness
import Instruction
import Reg
......@@ -20,10 +16,13 @@ import UniqSupply
import Data.List
-- | Do register coalescing on this top level thing
-- For Reg -> Reg moves, if the first reg dies at the same time the second reg is born
-- then the mov only serves to join live ranges. The two regs can be renamed to be
-- the same and the move instruction safely erased.
--
-- For Reg -> Reg moves, if the first reg dies at the same time the
-- second reg is born then the mov only serves to join live ranges.
-- The two regs can be renamed to be the same and the move instruction
-- safely erased.
regCoalesce
:: Instruction instr
=> [LiveCmmDecl statics instr]
......@@ -42,12 +41,18 @@ regCoalesce code
return patched
-- | Add a v1 = v2 register renaming to the map.
-- The register with the lowest lexical name is set as the
-- canonical version.
buildAlloc :: UniqFM Reg -> (Reg, Reg) -> UniqFM Reg
buildAlloc fm (r1, r2)
= let rmin = min r1 r2
rmax = max r1 r2
in addToUFM fm rmax rmin
-- | Determine the canonical name for a register by following
-- v1 = v2 renamings in this map.
sinkReg :: UniqFM Reg -> Reg -> Reg
sinkReg fm r
= case lookupUFM fm r of
......@@ -56,8 +61,10 @@ sinkReg fm r
-- | Slurp out mov instructions that only serve to join live ranges.
-- During a mov, if the source reg dies and the destiation reg is born
-- then we can rename the two regs to the same thing and eliminate the move.
--
-- During a mov, if the source reg dies and the destiation reg is
-- born then we can rename the two regs to the same thing and
-- eliminate the move.
slurpJoinMovs
:: Instruction instr
=> LiveCmmDecl statics instr
......@@ -66,9 +73,14 @@ slurpJoinMovs
slurpJoinMovs live
= slurpCmm emptyBag live
where
slurpCmm rs CmmData{} = rs
slurpCmm rs (CmmProc _ _ _ sccs) = foldl' slurpBlock rs (flattenSCCs sccs)
slurpBlock rs (BasicBlock _ instrs) = foldl' slurpLI rs instrs
slurpCmm rs CmmData{}
= rs
slurpCmm rs (CmmProc _ _ _ sccs)
= foldl' slurpBlock rs (flattenSCCs sccs)
slurpBlock rs (BasicBlock _ instrs)
= foldl' slurpLI rs instrs
slurpLI rs (LiveInstr _ Nothing) = rs
slurpLI rs (LiveInstr instr (Just live))
......@@ -76,12 +88,12 @@ slurpJoinMovs live
, elementOfUniqSet r1 $ liveDieRead live
, elementOfUniqSet r2 $ liveBorn live
-- only coalesce movs between two virtuals for now, else we end up with
-- allocatable regs in the live regs list..
-- only coalesce movs between two virtuals for now,
-- else we end up with allocatable regs in the live
-- regs list..
, isVirtualReg r1 && isVirtualReg r2
= consBag (r1, r2) rs
| otherwise
= rs
-- | Graph coloring register allocator.
--
-- TODO: The colors in graphviz graphs for x86_64 and ppc could be nicer.
--
module RegAlloc.Graph.Main (
regAlloc
)
where
) where
import qualified GraphColor as Color
import RegAlloc.Liveness
import RegAlloc.Graph.Spill
......@@ -21,7 +14,6 @@ import TargetReg
import RegClass
import Reg
import UniqSupply
import UniqSet
import UniqFM
......@@ -34,10 +26,12 @@ import Data.List
import Data.Maybe
import Control.Monad
-- | The maximum number of build\/spill cycles we'll allow.
-- We should only need 3 or 4 cycles tops.
-- If we run for any longer than this we're probably in an infinite loop,
-- It's probably better just to bail out and report a bug at this stage.
--
-- It should only take 3 or 4 cycles for the allocator to converge.
-- If it takes any longer than this it's probably in an infinite loop,
-- so it's better just to bail out and report a bug.
maxSpinCount :: Int
maxSpinCount = 10
......@@ -46,8 +40,8 @@ maxSpinCount = 10
regAlloc
:: (Outputable statics, Outputable instr, Instruction instr)
=> DynFlags
-> UniqFM (UniqSet RealReg) -- ^ the registers we can use for allocation
-> UniqSet Int -- ^ the set of available spill slots.
-> UniqFM (UniqSet RealReg) -- ^ registers we can use for allocation
-> UniqSet Int -- ^ set of available spill slots.
-> [LiveCmmDecl statics instr] -- ^ code annotated with liveness information.
-> UniqSM ( [NatCmmDecl statics instr], [RegAllocStats statics instr] )
-- ^ code with registers allocated and stats for each stage of
......@@ -55,8 +49,8 @@ regAlloc
regAlloc dflags regsFree slotsFree code
= do
-- TODO: the regClass function is currently hard coded to the default target
-- architecture. Would prefer to determine this from dflags.
-- TODO: the regClass function is currently hard coded to the default
-- target architecture. Would prefer to determine this from dflags.
-- There are other uses of targetRegClass later in this module.
let platform = targetPlatform dflags
triv = trivColorable platform
......@@ -71,77 +65,91 @@ regAlloc dflags regsFree slotsFree code
return ( code_final
, reverse debug_codeGraphs )
regAlloc_spin :: (Instruction instr,
Outputable instr,
Outputable statics)
=> DynFlags
-> Int
-> Color.Triv VirtualReg RegClass RealReg
-> UniqFM (UniqSet RealReg)
-> UniqSet Int
-> [RegAllocStats statics instr]
-> [LiveCmmDecl statics instr]
-> UniqSM ([NatCmmDecl statics instr],
[RegAllocStats statics instr],
Color.Graph VirtualReg RegClass RealReg)
-- | Perform solver iterations for the graph coloring allocator.
--
-- We extract a register confict graph from the provided cmm code,
-- and try to colour it. If that works then we use the solution rewrite
-- the code with real hregs. If coloring doesn't work we add spill code
-- and try to colour it again. After `maxSpinCount` iterations we give up.
--
regAlloc_spin
:: (Instruction instr,
Outputable instr,
Outputable statics)
=> DynFlags
-> Int -- ^ Number of solver iterations we've already performed.
-> Color.Triv VirtualReg RegClass RealReg
-- ^ Function for calculating whether a register is trivially
-- colourable.
-> UniqFM (UniqSet RealReg) -- ^ Free registers that we can allocate.
-> UniqSet Int -- ^ Free stack slots that we can use.
-> [RegAllocStats statics instr] -- ^ Current regalloc stats to add to.
-> [LiveCmmDecl statics instr] -- ^ Liveness annotated code to allocate.
-> UniqSM ( [NatCmmDecl statics instr]
, [RegAllocStats statics instr]
, Color.Graph VirtualReg RegClass RealReg)
regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
= do
let platform = targetPlatform dflags
-- if any of these dump flags are turned on we want to hang on to
-- intermediate structures in the allocator - otherwise tell the
-- allocator to ditch them early so we don't end up creating space leaks.
-- If any of these dump flags are turned on we want to hang on to
-- intermediate structures in the allocator - otherwise tell the
-- allocator to ditch them early so we don't end up creating space leaks.
let dump = or
[ dopt Opt_D_dump_asm_regalloc_stages dflags
, dopt Opt_D_dump_asm_stats dflags
, dopt Opt_D_dump_asm_conflicts dflags ]
-- check that we're not running off down the garden path.
-- Check that we're not running off down the garden path.
when (spinCount > maxSpinCount)
$ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
( text "It looks like the register allocator is stuck in an infinite loop."
$$ text "max cycles = " <> int maxSpinCount
$$ text "regsFree = " <> (hcat $ punctuate space $ map ppr
$ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree)
$$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
-- build a conflict graph from the code.
( text "It looks like the register allocator is stuck in an infinite loop."
$$ text "max cycles = " <> int maxSpinCount
$$ text "regsFree = " <> (hcat $ punctuate space $ map ppr
$ uniqSetToList $ unionManyUniqSets
$ eltsUFM regsFree)
$$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
-- Build the register conflict graph from the cmm code.
(graph :: Color.Graph VirtualReg RegClass RealReg)
<- {-# SCC "BuildGraph" #-} buildGraph code
-- VERY IMPORTANT:
-- We really do want the graph to be fully evaluated _before_ we start coloring.
-- If we don't do this now then when the call to Color.colorGraph forces bits of it,
-- the heap will be filled with half evaluated pieces of graph and zillions of apply thunks.
--
-- We really do want the graph to be fully evaluated _before_ we
-- start coloring. If we don't do this now then when the call to
-- Color.colorGraph forces bits of it, the heap will be filled with
-- half evaluated pieces of graph and zillions of apply thunks.
seqGraph graph `seq` return ()
-- build a map of the cost of spilling each instruction
-- this will only actually be computed if we have to spill something.
-- Build a map of the cost of spilling each instruction.
-- This is a lazy binding, so the map will only be computed if we
-- actually have to spill to the stack.
let spillCosts = foldl' plusSpillCostInfo zeroSpillCostInfo
$ map (slurpSpillCostInfo platform) code
-- the function to choose regs to leave uncolored
-- The function to choose regs to leave uncolored.
let spill = chooseSpill spillCosts
-- record startup state
let stat1 =
if spinCount == 0
-- Record startup state in our log.
let stat1
= if spinCount == 0
then Just $ RegAllocStatsStart
{ raLiveCmm = code
, raGraph = graph
, raSpillCosts = spillCosts }
else Nothing
-- try and color the graph
-- Try and color the graph.
let (graph_colored, rsSpill, rmCoalesce)
= {-# SCC "ColorGraph" #-}
Color.colorGraph
(gopt Opt_RegsIterative dflags)
spinCount
regsFree triv spill graph
= {-# SCC "ColorGraph" #-}
Color.colorGraph
(gopt Opt_RegsIterative dflags)
spinCount
regsFree triv spill graph
-- rewrite regs in the code that have been coalesced
-- Rewrite registers in the code that have been coalesced.
let patchF reg
| RegVirtual vr <- reg
= case lookupUFM rmCoalesce vr of
......@@ -152,33 +160,43 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
= reg
let code_coalesced
= map (patchEraseLive patchF) code
= map (patchEraseLive patchF) code
-- see if we've found a coloring
-- Check whether we've found a coloring.
if isEmptyUniqSet rsSpill
-- Coloring was successful because no registers needed to be spilled.
then do
-- if -fasm-lint is turned on then validate the graph
-- if -fasm-lint is turned on then validate the graph.
-- This checks for bugs in the graph allocator itself.
let graph_colored_lint =
if gopt Opt_DoAsmLinting dflags
then Color.validateGraph (text "")
True -- require all nodes to be colored
True -- Require all nodes to be colored.
graph_colored
else graph_colored
-- patch the registers using the info in the graph
let code_patched = map (patchRegsFromGraph platform graph_colored_lint) code_coalesced
-- clean out unneeded SPILL/RELOADs
let code_spillclean = map (cleanSpills platform) code_patched
-- strip off liveness information,
-- and rewrite SPILL/RELOAD pseudos into real instructions along the way
let code_final = map (stripLive dflags) code_spillclean
-- record what happened in this stage for debugging
let stat =
RegAllocStatsColored
-- Rewrite the code to use real hregs, using the colored graph.
let code_patched
= map (patchRegsFromGraph platform graph_colored_lint)
code_coalesced
-- Clean out unneeded SPILL/RELOAD meta instructions.
-- The spill code generator just spills the entire live range
-- of a vreg, but it might not need to be on the stack for
-- its entire lifetime.
let code_spillclean
= map (cleanSpills platform) code_patched
-- Strip off liveness information from the allocated code.
-- Also rewrite SPILL/RELOAD meta instructions into real machine
-- instructions along the way
let code_final
= map (stripLive dflags) code_spillclean
-- Record what happened in this stage for debugging
let stat
= RegAllocStatsColored
{ raCode = code
, raGraph = graph
, raGraphColored = graph_colored_lint
......@@ -187,21 +205,25 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
, raPatched = code_patched
, raSpillClean = code_spillclean
, raFinal = code_final
, raSRMs = foldl' addSRM (0, 0, 0) $ map countSRMs code_spillclean }
, raSRMs = foldl' addSRM (0, 0, 0)
$ map countSRMs code_spillclean }
-- Bundle up all the register allocator statistics.
-- .. but make sure to drop them on the floor if they're not
-- needed, otherwise we'll get a space leak.
let statList =
if dump then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
else []
-- space leak avoidance
-- Ensure all the statistics are evaluated, to avoid space leaks.
seqList statList `seq` return ()
return ( code_final
, statList
, graph_colored_lint)
-- we couldn't find a coloring, time to spill something
-- Coloring was unsuccessful. We need to spill some register to the
-- stack, make a new graph, and try to color it again.
else do
-- if -fasm-lint is turned on then validate the graph
let graph_colored_lint =
......@@ -211,17 +233,18 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
graph_colored
else graph_colored
-- spill the uncolored regs
-- Spill uncolored regs to the stack.
(code_spilled, slotsFree', spillStats)
<- regSpill platform code_coalesced slotsFree rsSpill
-- recalculate liveness
-- NOTE: we have to reverse the SCCs here to get them back into the reverse-dependency
-- order required by computeLiveness. If they're not in the correct order
-- that function will panic.
code_relive <- mapM (regLiveness platform . reverseBlocksInTops) code_spilled
-- Recalculate liveness information.
-- NOTE: we have to reverse the SCCs here to get them back into
-- the reverse-dependency order required by computeLiveness.
-- If they're not in the correct order that function will panic.
code_relive <- mapM (regLiveness platform . reverseBlocksInTops)
code_spilled
-- record what happened in this stage for debugging
-- Record what happened in this stage for debugging.