NCG: Split linear allocator into separate modules.

parent d7d09c18
......@@ -28,7 +28,9 @@ import NCGMonad
import PositionIndependentCode
import RegLiveness
import RegCoalesce
import qualified RegAllocLinear as Linear
import qualified RegAlloc.Linear.Main as Linear
import qualified RegAllocColor as Color
import qualified RegAllocStats as Color
import qualified GraphColor as Color
......
-- | Put common type definitions here to break recursive module dependencies.
module RegAlloc.Linear.Base (
BlockAssignment,
Loc(..),
-- for stats
SpillReason(..),
RegAllocStats(..),
-- the allocator monad
RA_State(..),
RegM(..)
)
where
import RegAlloc.Linear.FreeRegs
import RegAlloc.Linear.StackMap
import RegLiveness
import MachRegs
import Outputable
import Unique
import UniqFM
import UniqSupply
-- | Used to store the register assignment on entry to a basic block.
-- We use this to handle join points, where multiple branch instructions
-- target a particular label. We have to insert fixup code to make
-- the register assignments from the different sources match up.
--
type BlockAssignment
= BlockMap (FreeRegs, RegMap Loc)
-- | Where a vreg is currently stored
-- A temporary can be marked as living in both a register and memory
-- (InBoth), for example if it was recently loaded from a spill location.
-- This makes it cheap to spill (no save instruction required), but we
-- have to be careful to turn this into InReg if the value in the
-- register is changed.
-- This is also useful when a temporary is about to be clobbered. We
-- save it in a spill location, but mark it as InBoth because the current
-- instruction might still want to read it.
--
data Loc
-- | vreg is in a register
= InReg {-# UNPACK #-} !RegNo
-- | vreg is held in a stack slot
| InMem {-# UNPACK #-} !StackSlot
-- | vreg is held in both a register and a stack slot
| InBoth {-# UNPACK #-} !RegNo
{-# UNPACK #-} !StackSlot
deriving (Eq, Show, Ord)
instance Outputable Loc where
ppr l = text (show l)
-- | Reasons why instructions might be inserted by the spiller.
-- Used when generating stats for -ddrop-asm-stats.
--
data SpillReason
-- | vreg was spilled to a slot so we could use its
-- current hreg for another vreg
= SpillAlloc !Unique
-- | vreg was moved because its hreg was clobbered
| SpillClobber !Unique
-- | vreg was loaded from a spill slot
| SpillLoad !Unique
-- | reg-reg move inserted during join to targets
| SpillJoinRR !Unique
-- | reg-mem move inserted during join to targets
| SpillJoinRM !Unique
-- | Used to carry interesting stats out of the register allocator.
data RegAllocStats
= RegAllocStats
{ ra_spillInstrs :: UniqFM [Int] }
-- | The register alloctor state
data RA_State
= RA_State
-- | the current mapping from basic blocks to
-- the register assignments at the beginning of that block.
{ ra_blockassig :: BlockAssignment
-- | free machine registers
, ra_freeregs :: {-#UNPACK#-}!FreeRegs
-- | assignment of temps to locations
, ra_assig :: RegMap Loc
-- | current stack delta
, ra_delta :: Int
-- | free stack slots for spilling
, ra_stack :: StackMap
-- | unique supply for generating names for join point fixup blocks.
, ra_us :: UniqSupply
-- | Record why things were spilled, for -ddrop-asm-stats.
-- Just keep a list here instead of a map of regs -> reasons.
-- We don't want to slow down the allocator if we're not going to emit the stats.
, ra_spills :: [SpillReason] }
-- | The register allocator monad type.
newtype RegM a
= RegM { unReg :: RA_State -> (# RA_State, a #) }
module RegAlloc.Linear.FreeRegs (
FreeRegs(),
noFreeRegs,
releaseReg,
initFreeRegs,
getFreeRegs,
allocateReg
)
#include "HsVersions.h"
where
-- -----------------------------------------------------------------------------
-- The free register set
-- This needs to be *efficient*
-- Here's an inefficient 'executable specification' of the FreeRegs data type:
--
-- type FreeRegs = [RegNo]
-- noFreeRegs = 0
-- releaseReg n f = if n `elem` f then f else (n : f)
-- initFreeRegs = allocatableRegs
-- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
-- allocateReg f r = filter (/= r) f
#if defined(powerpc_TARGET_ARCH)
import RegAlloc.Linear.PPC.FreeRegs
#elif defined(sparc_TARGET_ARCH)
import RegAlloc.Linear.SPARC.FreeRegs
#elif defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
import RegAlloc.Linear.X86.FreeRegs
#else
#error "RegAlloc.Linear.FreeRegs not defined for this architecture."
#endif
......@@ -81,13 +81,21 @@ The algorithm is roughly:
-}
module RegAllocLinear (
module RegAlloc.Linear.Main (
regAlloc,
RegAllocStats, pprStats
module RegAlloc.Linear.Base,
module RegAlloc.Linear.Stats
) where
#include "HsVersions.h"
import RegAlloc.Linear.State
import RegAlloc.Linear.Base
import RegAlloc.Linear.StackMap
import RegAlloc.Linear.FreeRegs
import RegAlloc.Linear.Stats
import BlockId
import MachRegs
import MachInstrs
......@@ -101,296 +109,14 @@ import UniqSet
import UniqFM
import UniqSupply
import Outputable
import State
import FastString
import Data.Maybe
import Data.List
import Control.Monad
import Data.Word
import Data.Bits
#include "../includes/MachRegs.h"
-- -----------------------------------------------------------------------------
-- The free register set
-- This needs to be *efficient*
{- Here's an inefficient 'executable specification' of the FreeRegs data type:
type FreeRegs = [RegNo]
noFreeRegs = 0
releaseReg n f = if n `elem` f then f else (n : f)
initFreeRegs = allocatableRegs
getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
allocateReg f r = filter (/= r) f
-}
#if defined(powerpc_TARGET_ARCH)
-- The PowerPC has 32 integer and 32 floating point registers.
-- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much
-- better.
-- Note that when getFreeRegs scans for free registers, it starts at register
-- 31 and counts down. This is a hack for the PowerPC - the higher-numbered
-- registers are callee-saves, while the lower regs are caller-saves, so it
-- makes sense to start at the high end.
-- Apart from that, the code does nothing PowerPC-specific, so feel free to
-- add your favourite platform to the #if (if you have 64 registers but only
-- 32-bit words).
data FreeRegs = FreeRegs !Word32 !Word32
deriving( Show ) -- The Show is used in an ASSERT
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0 0
releaseReg :: RegNo -> FreeRegs -> FreeRegs
releaseReg r (FreeRegs g f)
| r > 31 = FreeRegs g (f .|. (1 `shiftL` (fromIntegral r - 32)))
| otherwise = FreeRegs (g .|. (1 `shiftL` fromIntegral r)) f
initFreeRegs :: FreeRegs
initFreeRegs = foldr releaseReg noFreeRegs allocatableRegs
getFreeRegs :: RegClass -> FreeRegs -> [RegNo] -- lazilly
getFreeRegs cls (FreeRegs g f)
| RcDouble <- cls = go f (0x80000000) 63
| RcInteger <- cls = go g (0x80000000) 31
| otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls)
where
go _ 0 _ = []
go x m i | x .&. m /= 0 = i : (go x (m `shiftR` 1) $! i-1)
| otherwise = go x (m `shiftR` 1) $! i-1
allocateReg :: RegNo -> FreeRegs -> FreeRegs
allocateReg r (FreeRegs g f)
| r > 31 = FreeRegs g (f .&. complement (1 `shiftL` (fromIntegral r - 32)))
| otherwise = FreeRegs (g .&. complement (1 `shiftL` fromIntegral r)) f
#elif defined(sparc_TARGET_ARCH)
--------------------------------------------------------------------------------
-- SPARC is like PPC, except for twinning of floating point regs.
-- When we allocate a double reg we must take an even numbered
-- float reg, as well as the one after it.
-- Holds bitmaps showing what registers are currently allocated.
-- The float and double reg bitmaps overlap, but we only alloc
-- float regs into the float map, and double regs into the double map.
--
-- Free regs have a bit set in the corresponding bitmap.
--
data FreeRegs
= FreeRegs
!Word32 -- int reg bitmap regs 0..31
!Word32 -- float reg bitmap regs 32..63
!Word32 -- double reg bitmap regs 32..63
deriving( Show )
-- | A reg map where no regs are free to be allocated.
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0 0 0
-- | The initial set of free regs.
-- Don't treat the top half of reg pairs we're using as doubles as being free.
initFreeRegs :: FreeRegs
initFreeRegs
= regs
where
-- freeDouble = getFreeRegs RcDouble regs
regs = foldr releaseReg noFreeRegs allocable
allocable = allocatableRegs \\ doublePairs
doublePairs = [43, 45, 47, 49, 51, 53]
-- | Get all the free registers of this class.
getFreeRegs :: RegClass -> FreeRegs -> [RegNo] -- lazilly
getFreeRegs cls (FreeRegs g f d)
| RcInteger <- cls = go g 1 0
| RcFloat <- cls = go f 1 32
| RcDouble <- cls = go d 1 32
| otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls)
where
go _ 0 _ = []
go x m i | x .&. m /= 0 = i : (go x (m `shiftL` 1) $! i+1)
| otherwise = go x (m `shiftL` 1) $! i+1
{-
showFreeRegs :: FreeRegs -> String
showFreeRegs regs
= "FreeRegs\n"
++ " integer: " ++ (show $ getFreeRegs RcInteger regs) ++ "\n"
++ " float: " ++ (show $ getFreeRegs RcFloat regs) ++ "\n"
++ " double: " ++ (show $ getFreeRegs RcDouble regs) ++ "\n"
-}
{-
-- | Check whether a reg is free
regIsFree :: RegNo -> FreeRegs -> Bool
regIsFree r (FreeRegs g f d)
-- a general purpose reg
| r <= 31
, mask <- 1 `shiftL` fromIntegral r
= g .&. mask /= 0
-- use the first 22 float regs as double precision
| r >= 32
, r <= 53
, mask <- 1 `shiftL` (fromIntegral r - 32)
= d .&. mask /= 0
-- use the last 10 float regs as single precision
| otherwise
, mask <- 1 `shiftL` (fromIntegral r - 32)
= f .&. mask /= 0
-}
-- | Grab a register.
grabReg :: RegNo -> FreeRegs -> FreeRegs
grabReg r (FreeRegs g f d)
-- a general purpose reg
| r <= 31
, mask <- complement (1 `shiftL` fromIntegral r)
= FreeRegs (g .&. mask) f d
-- use the first 22 float regs as double precision
| r >= 32
, r <= 53
, mask <- complement (1 `shiftL` (fromIntegral r - 32))
= FreeRegs g f (d .&. mask)
-- use the last 10 float regs as single precision
| otherwise
, mask <- complement (1 `shiftL` (fromIntegral r - 32))
= FreeRegs g (f .&. mask) d
-- | Release a register from allocation.
-- The register liveness information says that most regs die after a C call,
-- but we still don't want to allocate to some of them.
--
releaseReg :: RegNo -> FreeRegs -> FreeRegs
releaseReg r regs@(FreeRegs g f d)
-- used by STG machine, or otherwise unavailable
| r >= 0 && r <= 15 = regs
| r >= 17 && r <= 21 = regs
| r >= 24 && r <= 31 = regs
| r >= 32 && r <= 41 = regs
| r >= 54 && r <= 59 = regs
-- never release the high part of double regs.
| r == 43 = regs
| r == 45 = regs
| r == 47 = regs
| r == 49 = regs
| r == 51 = regs
| r == 53 = regs
-- a general purpose reg
| r <= 31
, mask <- 1 `shiftL` fromIntegral r
= FreeRegs (g .|. mask) f d
-- use the first 22 float regs as double precision
| r >= 32
, r <= 53
, mask <- 1 `shiftL` (fromIntegral r - 32)
= FreeRegs g f (d .|. mask)
-- use the last 10 float regs as single precision
| otherwise
, mask <- 1 `shiftL` (fromIntegral r - 32)
= FreeRegs g (f .|. mask) d
-- | Allocate a register in the map.
allocateReg :: RegNo -> FreeRegs -> FreeRegs
allocateReg r regs -- (FreeRegs g f d)
-- if the reg isn't actually free then we're in trouble
{- | not $ regIsFree r regs
= pprPanic
"RegAllocLinear.allocateReg"
(text "reg " <> ppr r <> text " is not free")
-}
| otherwise
= grabReg r regs
--------------------------------------------------------------------------------
-- If we have less than 32 registers, or if we have efficient 64-bit words,
-- we will just use a single bitfield.
#else
# if defined(alpha_TARGET_ARCH)
type FreeRegs = Word64
# else
type FreeRegs = Word32
# endif
noFreeRegs :: FreeRegs
noFreeRegs = 0
releaseReg :: RegNo -> FreeRegs -> FreeRegs
releaseReg n f = f .|. (1 `shiftL` n)
initFreeRegs :: FreeRegs
initFreeRegs = foldr releaseReg noFreeRegs allocatableRegs
getFreeRegs :: RegClass -> FreeRegs -> [RegNo] -- lazilly
getFreeRegs cls f = go f 0
where go 0 _ = []
go n m
| n .&. 1 /= 0 && regClass (RealReg m) == cls
= m : (go (n `shiftR` 1) $! (m+1))
| otherwise
= go (n `shiftR` 1) $! (m+1)
-- ToDo: there's no point looking through all the integer registers
-- in order to find a floating-point one.
allocateReg :: RegNo -> FreeRegs -> FreeRegs
allocateReg r f = f .&. complement (1 `shiftL` fromIntegral r)
#endif
-- -----------------------------------------------------------------------------
-- The assignment of virtual registers to stack slots
-- We have lots of stack slots. Memory-to-memory moves are a pain on most
-- architectures. Therefore, we avoid having to generate memory-to-memory moves
-- by simply giving every virtual register its own stack slot.
-- The StackMap stack map keeps track of virtual register - stack slot
-- associations and of which stack slots are still free. Once it has been
-- associated, a stack slot is never "freed" or removed from the StackMap again,
-- it remains associated until we are done with the current CmmProc.
type StackSlot = Int
data StackMap = StackMap [StackSlot] (UniqFM StackSlot)
emptyStackMap :: StackMap
emptyStackMap = StackMap [0..maxSpillSlots] emptyUFM
getStackSlotFor :: StackMap -> Unique -> (StackMap,Int)
getStackSlotFor (StackMap [] _) _
= panic "RegAllocLinear.getStackSlotFor: out of stack slots, try -fregs-graph"
-- This happens with darcs' SHA1.hs, see #1993
getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg =
case lookupUFM reserved reg of
Just slot -> (fs,slot)
Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot)
-- -----------------------------------------------------------------------------
-- Top level of the register allocator
......@@ -436,27 +162,6 @@ regAlloc (CmmProc _ _ _ _)
-- -----------------------------------------------------------------------------
-- Linear sweep to allocate registers
data Loc = InReg {-# UNPACK #-} !RegNo
| InMem {-# UNPACK #-} !Int -- stack slot
| InBoth {-# UNPACK #-} !RegNo
{-# UNPACK #-} !Int -- stack slot
deriving (Eq, Show, Ord)
{-
A temporary can be marked as living in both a register and memory
(InBoth), for example if it was recently loaded from a spill location.
This makes it cheap to spill (no save instruction required), but we
have to be careful to turn this into InReg if the value in the
register is changed.
This is also useful when a temporary is about to be clobbered. We
save it in a spill location, but mark it as InBoth because the current
instruction might still want to read it.
-}
instance Outputable Loc where
ppr l = text (show l)
-- | Do register allocation on some basic blocks.
-- But be careful to allocate a block in an SCC only if it has
......@@ -548,8 +253,6 @@ linearRA block_live instr_acc fixups (instr:instrs)
-- -----------------------------------------------------------------------------
-- Register allocation for a single instruction
type BlockAssignment = BlockMap (FreeRegs, RegMap Loc)
raInsn :: BlockMap RegSet -- Live temporaries at each basic block
-> [Instr] -- new instructions (accum.)
-> LiveInstr -- the instruction (with "deaths")
......@@ -1101,189 +804,6 @@ handleComponent _ _ (CyclicSCC _)
-- -----------------------------------------------------------------------------
-- The register allocator's monad.
-- Here we keep all the state that the register allocator keeps track
-- of as it walks the instructions in a basic block.
data RA_State
= RA_State {
ra_blockassig :: BlockAssignment,
-- The current mapping from basic blocks to
-- the register assignments at the beginning of that block.
ra_freeregs :: {-#UNPACK#-}!FreeRegs, -- free machine registers
ra_assig :: RegMap Loc, -- assignment of temps to locations
ra_delta :: Int, -- current stack delta
ra_stack :: StackMap, -- free stack slots for spilling
ra_us :: UniqSupply, -- unique supply for generating names
-- for fixup blocks.
-- Record why things were spilled, for -ddrop-asm-stats.
-- Just keep a list here instead of a map of regs -> reasons.
-- We don't want to slow down the allocator if we're not going to emit the stats.
ra_spills :: [SpillReason]
}
newtype RegM a = RegM { unReg :: RA_State -> (# RA_State, a #) }
instance Monad RegM where
m >>= k = RegM $ \s -> case unReg m s of { (# s, a #) -> unReg (k a) s }
return a = RegM $ \s -> (# s, a #)
runR :: BlockAssignment -> FreeRegs -> RegMap Loc -> StackMap -> UniqSupply
-> RegM a -> (BlockAssignment, StackMap, RegAllocStats, a)
runR block_assig freeregs assig stack us thing =
case unReg thing (RA_State{ ra_blockassig=block_assig, ra_freeregs=freeregs,
ra_assig=assig, ra_delta=0{-???-}, ra_stack=stack,
ra_us = us, ra_spills = [] }) of
(# state'@RA_State{ ra_blockassig=block_assig, ra_stack=stack' }, returned_thing #)
-> (block_assig, stack', makeRAStats state', returned_thing)
spillR :: Reg -> Unique -> RegM (Instr, Int)
spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
let (stack',slot) = getStackSlotFor stack temp
instr = mkSpillInstr reg delta slot
in
(# s{ra_stack=stack'}, (instr,slot) #)
loadR :: Reg -> Int -> RegM Instr
loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
(# s, mkLoadInstr reg delta slot #)
getFreeRegsR :: RegM FreeRegs
getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} ->
(# s, freeregs #)
setFreeRegsR :: FreeRegs -> RegM ()
setFreeRegsR regs = RegM $ \ s ->
(# s{ra_freeregs = regs}, () #)
getAssigR :: RegM (RegMap Loc)
getAssigR = RegM $ \ s@RA_State{ra_assig = assig} ->
(# s, assig #)
setAssigR :: RegMap Loc -> RegM ()
setAssigR assig = RegM $ \ s ->
(# s{ra_assig=assig}, () #)
getBlockAssigR :: RegM BlockAssignment
getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} ->
(# s, assig #)
setBlockAssigR :: BlockAssignment -> RegM ()
setBlockAssigR assig = RegM $ \ s ->
(# s{ra_blockassig = assig}, () #)
setDeltaR :: Int -> RegM ()
setDeltaR n = RegM $ \ s ->
(# s{ra_delta = n}, () #)
getDeltaR :: RegM Int
getDeltaR = RegM $ \s -> (# s, ra_delta s #)
getUniqueR :: RegM Unique
getUniqueR = RegM $ \s ->
case splitUniqSupply (ra_us s) of
(us1, us2) -> (# s{ra_us = us2}, uniqFromSupply us1 #)
-- | Record that a spill instruction was inserted, for profiling.
recordSpill :: SpillReason -> RegM ()