Commit 643eb066 authored by Simon Marlow's avatar Simon Marlow

Remove the old stack layout algorithms

Also, do removeDeadAssignments instead of cmmLiveness before stack
allocation, because the former also does liveness analysis, and we can
do just one liveness analysis instead of two.  The stack layout
algorithm doesn't introduce any dead assignments, so this doesn't
affect the generated code.
parent 2c4b427c
......@@ -37,7 +37,6 @@ import Bitmap
import CLabel
import Cmm
import CmmUtils
import CmmStackLayout
import Module
import FastString
import ForeignCall
......
......@@ -107,10 +107,10 @@ instance Outputable StackMap where
cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
-> FuelUniqSM (CmmGraph, BlockEnv StackMap)
cmmLayoutStack procpoints entry_args
graph@(CmmGraph { g_entry = entry })
graph0@(CmmGraph { g_entry = entry })
= do
pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
liveness <- cmmLiveness graph
(graph, liveness) <- removeDeadAssignments graph0
pprTrace "liveness" (ppr liveness) $ return ()
let blocks = postorderDfs graph
......
......@@ -48,7 +48,8 @@ cmmLiveness :: CmmGraph -> FuelUniqSM BlockEntryLiveness
cmmLiveness graph =
liftM check $ dataflowAnalBwd graph [] $ analBwd liveLattice xferLive
where entry = g_entry graph
check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
check facts = noLiveOnEntry entry
(expectJust "check" $ mapLookup entry facts) facts
-- | On entry to the procedure, there had better not be any LocalReg's live-in.
noLiveOnEntry :: BlockId -> CmmLive -> a -> a
......@@ -63,13 +64,11 @@ gen a live = foldRegsUsed extendRegSet live a
kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet
kill a live = foldRegsDefd deleteFromRegSet live a
gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive
gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a)
=> a -> CmmLive -> CmmLive
gen_kill a = gen a . kill a
-- | The transfer function
-- EZY: Bits of this analysis are duplicated in CmmSpillReload, though
-- it's not really easy to efficiently reuse all of this. Keep in mind
-- if you need to update this analysis.
xferLive :: BwdTransfer CmmNode CmmLive
xferLive = mkBTransfer3 fst mid lst
where fst _ f = f
......@@ -82,18 +81,23 @@ xferLive = mkBTransfer3 fst mid lst
-- Removing assignments to dead variables
-----------------------------------------------------------------------------
removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph
removeDeadAssignments :: CmmGraph -> FuelUniqSM (CmmGraph, BlockEnv CmmLive)
removeDeadAssignments g =
liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
where rewrites = mkBRewrite3 nothing middle nothing
-- SDM: no need for deepBwdRw here, we only rewrite to empty
-- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
-- but GHC panics while compiling, see bug #4045.
-- Beware: deepBwdRw with one polymorphic function seems more
-- reasonable here, but GHC panics while compiling, see bug
-- #4045.
middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O
middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph
middle (CmmAssign (CmmLocal reg') _) live
| not (reg' `elemRegSet` live)
= return $ Just emptyGraph
-- XXX maybe this should be somewhere else...
middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph
middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph
middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs
= return $ Just emptyGraph
middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs
= return $ Just emptyGraph
middle _ _ = return Nothing
nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x
......
......@@ -16,12 +16,12 @@ import CmmLive
import CmmBuildInfoTables
import CmmCommonBlockElim
import CmmProcPoint
import CmmSpillReload
import CmmRewriteAssignments
import CmmStackLayout
import CmmContFlowOpt
import OptimizationFuel
import CmmLayoutStack
import Hoopl
import CmmUtils
import DynFlags
import ErrUtils
......@@ -95,9 +95,6 @@ cpsTop :: HscEnv -> CmmDecl -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmDecl)])
cpsTop _ p@(CmmData {}) = return ([], [(Set.empty, p)])
cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) =
do
-- Why bother doing these early: dualLivenessWithInsertion,
-- insertLateReloads, rewriteAssignments?
----------- Control-flow optimisations ---------------
g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g
dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g
......@@ -110,63 +107,33 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
----------- Proc points -------------------
let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g
procPoints <- {-# SCC "minimalProcPointSet" #-} run $ minimalProcPointSet (targetPlatform dflags) callPPs g
procPoints <- {-# SCC "minimalProcPointSet" #-} run $
minimalProcPointSet (targetPlatform dflags) callPPs g
----------- Layout the stack and manifest Sp ---------------
-- (also does: removeDeadAssignments, and lowerSafeForeignCalls)
(g, stackmaps) <- {-# SCC "layoutStack" #-}
run $ cmmLayoutStack procPoints entry_off g
dump Opt_D_dump_cmmz_sp "Layout Stack" g
-- g <- {-# SCC "addProcPointProtocols" #-} run $ addProcPointProtocols callPPs procPoints g
-- dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g
--
-- ----------- Spills and reloads -------------------
-- g <- {-# SCC "dualLivenessWithInsertion" #-} run $ dualLivenessWithInsertion procPoints g
-- dump Opt_D_dump_cmmz_spills "Post spills and reloads" g
--
-- ----------- Sink and inline assignments -------------------
-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ rewriteAssignments platform g
-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $
-- rewriteAssignments platform g
-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
--
----------- Eliminate dead assignments -------------------
g <- {-# SCC "removeDeadAssignments" #-} runOptimization $ removeDeadAssignments g
dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g
-- ----------- Zero dead stack slots (Debug only) ---------------
-- -- Debugging: stubbing slots on death can cause crashes early
-- g <- if opt_StubDeadValues
-- then {-# SCC "stubSlotsOnDeath" #-} run $ stubSlotsOnDeath g
-- else return g
-- dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g
--
-- --------------- Stack layout ----------------
-- slotEnv <- {-# SCC "liveSlotAnal" #-} run $ liveSlotAnal g
-- let spEntryMap = getSpEntryMap entry_off g
-- mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return ()
-- let areaMap = {-# SCC "layout" #-} layout procPoints spEntryMap slotEnv entry_off g
-- mbpprTrace "areaMap" (ppr areaMap) $ return ()
--
-- ------------ Manifest the stack pointer --------
-- g <- {-# SCC "manifestSP" #-} run $ manifestSP spEntryMap areaMap entry_off g
-- dump Opt_D_dump_cmmz_sp "Post manifestSP" g
-- -- UGH... manifestSP can require updates to the procPointMap.
-- -- We can probably do something quicker here for the update...
------------- Split into separate procedures ------------
procPointMap <- {-# SCC "procPointAnalysis" #-} run $ procPointAnalysis procPoints g
procPointMap <- {-# SCC "procPointAnalysis" #-} run $
procPointAnalysis procPoints g
dumpWith dflags ppr Opt_D_dump_cmmz_procmap "procpoint map" procPointMap
gs <- {-# SCC "splitAtProcPoints" #-} run $ splitAtProcPoints l callPPs procPoints procPointMap
(CmmProc h l g)
gs <- {-# SCC "splitAtProcPoints" #-} run $
splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g)
dumps Opt_D_dump_cmmz_split "Post splitting" gs
------------- More CAFs and foreign calls ------------
------------- More CAFs ------------------------------
cafEnv <- {-# SCC "cafAnal" #-} run $ cafAnal platform g
let localCAFs = catMaybes $ map (localCAFInfo platform cafEnv) gs
mbpprTrace "localCAFs" (pprPlatform platform localCAFs) $ return ()
-- gs <- {-# SCC "lowerSafeForeignCalls" #-} run $ mapM (lowerSafeForeignCalls areaMap) gs
-- dumps Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls" gs
-- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES
gs <- {-# SCC "setInfoTableStackMap" #-}
return $ map (setInfoTableStackMap stackmaps) gs
......
......@@ -403,11 +403,12 @@ splitAtProcPoints entry_label callPPs procPoints procMap
[] -> graphEnv
[id] -> add graphEnv id bid b
_ -> panic "Each block should be reachable from only one ProcPoint"
Nothing -> pprPanic "block not reached by a proc point?" (ppr bid)
Nothing -> graphEnv
where bid = entryLabel b
add graphEnv procId bid b = mapInsert procId graph' graphEnv
where graph = mapLookup procId graphEnv `orElse` mapEmpty
graph' = mapInsert bid b graph
graphEnv <- return $ foldGraphBlocks addBlock emptyBlockMap g
-- Build a map from proc point BlockId to pairs of:
-- * Labels for their new procedures
......
{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-}
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of this flag in due course
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
-- TODO: Get rid of this flag:
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module CmmSpillReload
( dualLivenessWithInsertion
)
where
import Outputable
dualLivenessWithInsertion = panic "BANG BANG BANG BANG BANG BANG CLICK CLICK"
{-
import BlockId
import Cmm
import CmmUtils
import CmmLive
import OptimizationFuel
import Control.Monad
import Outputable hiding (empty)
import qualified Outputable as PP
import UniqSet
import Hoopl
import Data.Maybe
import Prelude hiding (succ, zip)
{- Note [Overview of spill/reload]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The point of this module is to insert spills and reloads to establish
the invariant that at a call or any proc point with an established
protocol all live variables not expected in registers are sitting on the
stack. We use a backward dual liveness analysis (both traditional
register liveness as well as register slot liveness on the stack) to
insert spills and reloads. It should be followed by a forward
transformation to sink reloads as deeply as possible, so as to reduce
register pressure: this transformation is performed by
CmmRewriteAssignments.
A variable can be expected to be live in a register, live on the
stack, or both. This analysis ensures that spills and reloads are
inserted as needed to make sure that every live variable needed
after a call is available on the stack. Spills are placed immediately
after their reaching definitions, but reloads are placed immediately
after a return from a call (the entry point.)
Note that we offer no guarantees about the consistency of the value
in memory and the value in the register, except that they are
equal across calls/procpoints. If the variable is changed, this
mapping breaks: but as the original value of the register may still
be useful in a different context, the memory location is not updated.
-}
data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet }
changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive
changeStack f live = live { on_stack = f (on_stack live) }
changeRegs f live = live { in_regs = f (in_regs live) }
dualLiveLattice :: DataflowLattice DualLive
dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add
where empty = DualLive emptyRegSet emptyRegSet
add _ (OldFact old) (NewFact new) = (changeIf $ change1 || change2, DualLive stack regs)
where (change1, stack) = add1 (on_stack old) (on_stack new)
(change2, regs) = add1 (in_regs old) (in_regs new)
add1 old new = if sizeRegSet join > sizeRegSet old then (True, join) else (False, old)
where join = plusRegSet old new
dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
dualLivenessWithInsertion procPoints g =
liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
(dualLiveTransfers (g_entry g) procPoints)
(insertSpillsAndReloads g procPoints)
-- Note [Live registers on entry to procpoints]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Remember that the transfer function is only ever run on the rewritten
-- version of a graph, and the rewrite function for spills and reloads
-- enforces the invariant that no local registers are live on entry to
-- a procpoint. Accordingly, we check for this invariant here. An old
-- version of this code incorrectly claimed that any live registers were
-- live on the stack before entering the function: this is wrong, but
-- didn't cause bugs because it never actually was invoked.
dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive)
dualLiveTransfers entry procPoints = mkBTransfer3 first middle last
where first :: CmmNode C O -> DualLive -> DualLive
first (CmmEntry id) live -- See Note [Live registers on entry to procpoints]
| id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live
| otherwise = live
middle :: CmmNode O O -> DualLive -> DualLive
middle m = changeStack updSlots
. changeRegs updRegs
where -- Reuse middle of liveness analysis from CmmLive
updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m
updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m
spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r
spill live _ = live
reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r
reload live _ = live
-- Ensure the assignment refers to the entirety of the
-- register slot (and not just a slice).
check (RegSlot (LocalReg _ ty), o, w) x
| o == w && w == widthInBytes (typeWidth ty) = x
check _ _ = panic "dualLiveTransfers: slices unsupported"
-- Register analysis is identical to liveness analysis from CmmLive.
last :: CmmNode O C -> FactBase DualLive -> DualLive
last l fb = changeRegs (gen_kill l) $ case l of
CmmCall {cml_cont=Nothing} -> empty
CmmCall {cml_cont=Just k} -> keep_stack_only k
CmmForeignCall {succ=k} -> keep_stack_only k
_ -> joinOutFacts dualLiveLattice l fb
where empty = fact_bot dualLiveLattice
lkp k = fromMaybe empty (lookupFact k fb)
keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet
insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive
insertSpillsAndReloads graph procPoints = mkBRewrite3 first middle nothing
-- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
-- but GHC miscompiles it, see bug #4044.
where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O
first e@(CmmEntry id) live =
if id /= (g_entry graph) && setMember id procPoints then
case map reload (regSetToList (in_regs live)) of
[] -> return Nothing
is -> return $ Just $ mkFirst e <*> mkMiddles is
else return Nothing
-- EZY: There was some dead code for handling the case where
-- we were not splitting procedures. Check Git history if
-- you're interested (circa e26ea0f41).
middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O
-- Don't add spills next to reloads.
middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing
-- Spill if register is live on stack.
middle m@(CmmAssign (CmmLocal reg) _) live
| reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg]))
middle _ _ = return Nothing
nothing _ _ = return Nothing
spill, reload :: LocalReg -> CmmNode O O
spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
---------------------
-- prettyprinting
ppr_regs :: String -> RegSet -> SDoc
ppr_regs s regs = text s <+> commafy (map ppr $ regSetToList regs)
where commafy xs = hsep $ punctuate comma xs
instance Outputable DualLive where
ppr (DualLive {in_regs = regs, on_stack = stack}) =
if nullRegSet regs && nullRegSet stack then
text "<nothing-live>"
else
nest 2 $ fsep [if nullRegSet regs then PP.empty
else (ppr_regs "live in regs =" regs),
if nullRegSet stack then PP.empty
else (ppr_regs "live on stack =" stack)]
-}
module CmmStackLayout () where
#if 0
{-# OPTIONS_GHC -XGADTs -XNoMonoLocalBinds #-}
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of the -XNoMonoLocalBinds
-- flag in due course
{-# OPTIONS -fno-warn-tabs #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and
-- detab the module (please do the detabbing in a separate patch). See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
-- for details
-- Todo: remove -fno-warn-warnings-deprecations
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
#if __GLASGOW_HASKELL__ >= 703
-- GHC 7.0.1 improved incomplete pattern warnings with GADTs
{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
#endif
module CmmStackLayout
( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
, getSpEntryMap, layout, manifestSP, igraph, areaBuilder
, stubSlotsOnDeath ) -- to help crash early during debugging
where
import Constants
import Prelude hiding (succ, zip, unzip, last)
import BlockId
import Cmm
import CmmUtils
import CmmProcPoint
import Maybes
import MkGraph (stackStubExpr)
import Control.Monad
import OptimizationFuel
import Outputable
import SMRep (ByteOff)
import Hoopl
import Data.Map (Map)
import qualified Data.Map as Map
import qualified FiniteMap as Map
------------------------------------------------------------------------
-- Stack Layout --
------------------------------------------------------------------------
-- | Before we lay out the stack, we need to know something about the
-- liveness of the stack slots. In particular, to decide whether we can
-- reuse a stack location to hold multiple stack slots, we need to know
-- when each of the stack slots is used.
-- Although tempted to use something simpler, we really need a full interference
-- graph. Consider the following case:
-- case <...> of
-- 1 -> <spill x>; // y is dead out
-- 2 -> <spill y>; // x is dead out
-- 3 -> <spill x and y>
-- If we consider the arms in order and we use just the deadness information given by a
-- dataflow analysis, we might decide to allocate the stack slots for x and y
-- to the same stack location, which will lead to incorrect code in the third arm.
-- We won't make this mistake with an interference graph.
-- First, the liveness analysis.
-- We represent a slot with an area, an offset into the area, and a width.
-- Tracking the live slots is a bit tricky because there may be loads and stores
-- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
-- e.g. Slot A 0 8 overlaps with Slot A 4 4.
--
-- The definition of a slot set is intended to reduce the number of overlap
-- checks we have to make. There's no reason to check for overlap between
-- slots in different areas, so we segregate the map by Area's.
-- We expect few slots in each Area, so we collect them in an unordered list.
-- To keep these lists short, any contiguous live slots are coalesced into
-- a single slot, on insertion.
slotLattice :: DataflowLattice SubAreaSet
slotLattice = DataflowLattice "live slots" Map.empty add
where add _ (OldFact old) (NewFact new) = case Map.foldRightWithKey addArea (False, old) new of
(change, x) -> (changeIf change, x)
addArea a newSlots z = foldr (addSlot a) z newSlots
addSlot a slot (changed, map) =
let (c, live) = liveGen slot $ Map.findWithDefault [] a map
in (c || changed, Map.insert a live map)
slotLatticeJoin :: [SubAreaSet] -> SubAreaSet
slotLatticeJoin facts = foldr extend (fact_bot slotLattice) facts
where extend fact res = snd $ fact_join slotLattice undefined (OldFact fact) (NewFact res)
type SlotEnv = BlockEnv SubAreaSet
-- The sub-areas live on entry to the block
liveSlotAnal :: CmmGraph -> FuelUniqSM SlotEnv
liveSlotAnal g = dataflowAnalBwd g [] $ analBwd slotLattice liveSlotTransfers
-- Add the subarea s to the subareas in the list-set (possibly coalescing it with
-- adjacent subareas), and also return whether s was a new addition.
liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
liveGen s set = liveGen' s set []
where liveGen' s [] z = (True, s : z)
liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
if a /= a' || hi < lo' || lo > hi' then -- no overlap
liveGen' s rst (s' : z)
else if s' `contains` s then -- old contains new
(False, set)
else -- overlap: coalesce the slots
let new_hi = max hi hi'
new_lo = min lo lo'
in liveGen' (a, new_hi, new_hi - new_lo) rst z
where lo = hi - w -- remember: areas grow down
lo' = hi' - w'
contains (a, hi, w) (a', hi', w') =
a == a' && hi >= hi' && hi - w <= hi' - w'
liveKill :: SubArea -> [SubArea] -> [SubArea]
liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $
liveKill' set []
where liveKill' [] z = z
liveKill' (s'@(a', hi', w') : rst) z =
if a /= a' || hi < lo' || lo > hi' then -- no overlap
liveKill' rst (s' : z)
else -- overlap: split the old slot
let z' = if hi' > hi then (a, hi', hi' - hi) : z else z
z'' = if lo > lo' then (a, lo, lo - lo') : z' else z'
in liveKill' rst z''
where lo = hi - w -- remember: areas grow down
lo' = hi' - w'
-- Note: the stack slots that hold variables returned on the stack are not
-- considered live in to the block -- we treat the first node as a definition site.
-- BEWARE?: Am I being a little careless here in failing to check for the
-- entry Id (which would use the CallArea Old).
liveSlotTransfers :: BwdTransfer CmmNode SubAreaSet
liveSlotTransfers = mkBTransfer3 frt mid lst
where frt :: CmmNode C O -> SubAreaSet -> SubAreaSet
frt (CmmEntry l) f = Map.delete (CallArea (Young l)) f
mid :: CmmNode O O -> SubAreaSet -> SubAreaSet
mid n f = foldSlotsUsed addSlot (removeLiveSlotDefs f n) n
lst :: CmmNode O C -> FactBase SubAreaSet -> SubAreaSet
lst n f = liveInSlots n $ case n of
CmmCall {cml_cont=Nothing, cml_args=args} -> add_area (CallArea Old) args out
CmmCall {cml_cont=Just k, cml_args=args} -> add_area (CallArea Old) args (add_area (CallArea (Young k)) args out)
CmmForeignCall {succ=k, updfr=oldend} -> add_area (CallArea Old) oldend (add_area (CallArea (Young k)) wORD_SIZE out)
_ -> out
where out = joinOutFacts slotLattice n f
add_area _ n live | n == 0 = live
add_area a n live = Map.insert a (snd $ liveGen (a, n, n) $ Map.findWithDefault [] a live) live
-- Slot sets: adding slots, removing slots, and checking for membership.
liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet
addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet
elemSlot :: SubAreaSet -> SubArea -> Bool
liftToArea a f map = Map.insert a (f (Map.findWithDefault [] a map)) map
addSlot live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live
removeSlot live (a, i, w) = liftToArea a (liveKill (a, i, w)) live
elemSlot live (a, i, w) =
not $ fst $ liveGen (a, i, w) (Map.findWithDefault [] a live)
removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
removeLiveSlotDefs = foldSlotsDefd removeSlot
liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => s -> SubAreaSet -> SubAreaSet
liveInSlots x live = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x
liveLastIn :: CmmNode O C -> (BlockId -> SubAreaSet) -> SubAreaSet
liveLastIn l env = liveInSlots l (liveLastOut env l)
-- Don't forget to keep the outgoing parameters in the CallArea live,
-- as well as the update frame.
-- Note: We have to keep the update frame live at a call because of the
-- case where the function doesn't return -- in that case, there won't
-- be a return to keep the update frame live. We'd still better keep the
-- info pointer in the update frame live at any call site;
-- otherwise we could screw up the garbage collector.
liveLastOut :: (BlockId -> SubAreaSet) -> CmmNode O C -> SubAreaSet
liveLastOut env l =
case l of
CmmCall _ Nothing n _ _ ->
add_area (CallArea Old) n out -- add outgoing args (includes upd frame)
CmmCall _ (Just k) n _ _ ->
add_area (CallArea Old) n (add_area (CallArea (Young k)) n out)
CmmForeignCall { succ = k, updfr = oldend } ->
add_area (CallArea Old) oldend (add_area (CallArea (Young k)) wORD_SIZE out)
_ -> out
where out = slotLatticeJoin $ map env $ successors l
add_area _ n live | n == 0 = live
add_area a n live =
Map.insert a (snd $ liveGen (a, n, n) $ Map.findWithDefault [] a live) live
-- The liveness analysis must be precise: otherwise, we won't know if a definition
-- should really kill a live-out stack slot.
-- But the interference graph does not have to be precise -- it might decide that
-- any live areas interfere. To maintain both a precise analysis and an imprecise
-- interference graph, we need to convert the live-out stack slots to graph nodes
-- at each and every instruction; rather than reconstruct a new list of nodes
-- every time, I provide a function to fold over the nodes, which should be a
-- reasonably efficient approach for the implementations we envision.
-- Of course, it will probably be much easier to program if we just return a list...
type Set x = Map x ()
data IGraphBuilder n =
Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z
, _wordsOccupied :: AreaSizeMap -> AreaMap -> n -> [Int]
}
areaBuilder :: IGraphBuilder Area
areaBuilder = Builder fold words
where fold (a, _, _) f z = f a z
words areaSize areaMap a =
case Map.lookup a areaMap of
Just addr -> [addr .. addr + (Map.lookup a areaSize `orElse`
pprPanic "wordsOccupied: unknown area" (ppr areaSize <+> ppr a))]
Nothing -> []
--slotBuilder :: IGraphBuilder (Area, Int)
--slotBuilder = undefined
-- Now, we can build the interference graph.
-- The usual story: a definition interferes with all live outs and all other
-- definitions.
type IGraph x = Map x (Set x)
type IGPair x = (IGraph x, IGraphBuilder x)
igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> CmmGraph -> IGraph x
igraph builder env g = foldr interfere Map.empty (postorderDfs g)
where foldN = foldNodes builder
interfere block igraph = foldBlockNodesB3 (first, middle, last) block igraph
where first _ (igraph, _) = igraph
middle node (igraph, liveOut) =
(addEdges igraph node liveOut, liveInSlots node liveOut)
last node igraph =
(addEdges igraph node $ liveLastOut env' node, liveLastIn node env')
-- add edges between a def and the other defs and liveouts
addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
addDef (igraph, out) def@(a, _, _) =
(foldN def (addDefN out) igraph,
Map.insert a (snd $ liveGen def (Map.findWithDefault [] a out)) out)
addDefN out n igraph =
let addEdgeNO o igraph = foldN o addEdgeNN igraph
addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
addEdgeNN' n n' igraph = Map.insert n (Map.insert n' () set) igraph
where set = Map.findWithDefault Map.empty n igraph
in Map.foldRightWithKey (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph"
-- Before allocating stack slots, we need to collect one more piece of information:
-- what's the highest offset (in bytes) used in each Area?
-- We'll need to allocate that much space for each Area.
-- Mapping of areas to area sizes (not offsets!)
type AreaSizeMap = AreaMap
-- JD: WHY CAN'T THIS COME FROM THE slot-liveness info?
getAreaSize :: ByteOff -> CmmGraph -> AreaSizeMap
-- The domain of the returned mapping consists only of Areas
-- used for (a) variable spill slots, and (b) parameter passing areas for calls
getAreaSize entry_off g =
foldGraphBlocks (foldBlockNodesF3 (first, add_regslots, last))
(Map.singleton (CallArea Old) entry_off) g
where first _ z = z
last :: CmmNode O C -> Map Area Int -> Map Area Int
last l@(CmmCall _ Nothing args res _) z = add_regslots l (add (add z area args) area res)
where area = CallArea Old
last l@(CmmCall _ (Just k) args res _) z = add_regslots l (add (add z area args) area res)
where area = CallArea (Young k)
last l@(CmmForeignCall {succ = k}) z = add_regslots l (add z area wORD_SIZE)
where area = CallArea (Young k)
last l z = add_regslots l z
add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i
addSlot z (a@(RegSlot (LocalReg _ ty)), _, _) =
add z a $ widthInBytes $ typeWidth ty
addSlot z _ = z
add z a off = Map.insert a (max off (Map.findWithDefault 0 a z)) z
-- The 'max' is important. Two calls, to f and g, might share a common
-- continuation (and hence a common CallArea), but their number of overflow
-- parameters might differ.
-- EZY: Ought to use insert with combining function...
-- Find the Stack slots occupied by the subarea's conflicts
conflictSlots :: Ord x => IGPair x -> AreaSizeMap -> AreaMap -> SubArea -> Set Int
conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
foldNodes subarea foldNode Map.empty
where foldNode n set = Map.foldRightWithKey conflict set $ Map.findWithDefault Map.empty n ig
conflict n' () set = liveInSlots areaMap n' set
-- Add stack slots occupied by igraph node n
liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
setAdd w s = Map.insert w () s
-- Find any open space for 'area' on the stack, starting from the
-- 'offset'. If the area is a CallArea or a spill slot for a pointer,
-- then it must be word-aligned.
freeSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> Int
freeSlotFrom ig areaSize offset areaMap area =
let size = Map.lookup area areaSize `orElse` 0
conflicts = conflictSlots ig areaSize areaMap (area, size, size)
-- CallAreas and Ptrs need to be word-aligned (round up!)
align = case area of CallArea _ -> align'
RegSlot r | isGcPtrType (localRegType r) -> align'
RegSlot _ -> id
align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE
-- Find a space big enough to hold the area
findSpace curr 0 = curr
findSpace curr cnt = -- part of target slot, # of bytes left to check
if Map.member curr conflicts then
findSpace (align (curr + size)) size -- try the next (possibly) open space
else findSpace (curr - 1) (cnt - 1)
in findSpace (align (offset + size)) size
-- Find an open space on the stack, and assign it to the area.
allocSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> AreaMap
allocSlotFrom ig areaSize from areaMap area =
if Map.member area areaMap then areaMap
else Map.insert area (freeSlotFrom ig areaSize from areaMap area) areaMap
-- Figure out all of the offsets from the slot location; this will be
-- non-zero for procpoints.
type SpEntryMap = BlockEnv Int
getSpEntryMap :: Int -> CmmGraph -> SpEntryMap
getSpEntryMap entry_off g@(CmmGraph {g_entry = entry})
= foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g
where add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int
add_sp_off b env =
case lastNode b of
CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env