Commit 889c084e authored by Simon Marlow's avatar Simon Marlow

Merge in new code generator branch.

This changes the new code generator to make use of the Hoopl package
for dataflow analysis.  Hoopl is a new boot package, and is maintained
in a separate upstream git repository (as usual, GHC has its own
lagging darcs mirror in

During this merge I squashed recent history into one patch.  I tried
to rebase, but the history had some internal conflicts of its own
which made rebase extremely confusing, so I gave up. The history I
squashed was:

  - Update new codegen to work with latest Hoopl
  - Add some notes on new code gen to cmm-notes
  - Enable Hoopl lag package.
  - Add SPJ note to cmm-notes
  - Improve GC calls on new code generator.

Work in this branch was done by:
   - Milan Straka <>
   - John Dias <>
   - David Terei <>

Edward Z. Yang <> merged in further changes from GHC HEAD
and fixed a few bugs.
parent f1a90f54
{- BlockId module should probably go away completely, being superseded by Label -}
module BlockId
( BlockId(..), mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet
, BlockEnv, emptyBlockEnv, elemBlockEnv, lookupBlockEnv, extendBlockEnv
, mkBlockEnv, mapBlockEnv
, eltsBlockEnv, plusBlockEnv, delFromBlockEnv, blockEnvToList, lookupWithDefaultBEnv
, isNullBEnv, sizeBEnv, foldBlockEnv, foldBlockEnv', addToBEnv_Acc
, BlockSet, emptyBlockSet, unitBlockSet, isEmptyBlockSet
, elemBlockSet, extendBlockSet, sizeBlockSet, unionBlockSets
, removeBlockSet, mkBlockSet, blockSetToList, foldBlockSet
( BlockId, mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet
, BlockSet, BlockEnv
, IsSet(..), setInsertList, setDeleteList, setUnions
, IsMap(..), mapInsertList, mapDeleteList, mapUnions
, emptyBlockSet, emptyBlockMap
, blockLbl, infoTblLbl, retPtLbl
) where
import CLabel
import IdInfo
import Maybes
import Name
import Outputable
import UniqFM
import Unique
import UniqSet
import Compiler.Hoopl hiding (Unique)
import Compiler.Hoopl.GHC (uniqueToInt, uniqueToLbl, lblToUnique)
--- Block Ids, their environments, and their sets
......@@ -31,129 +29,40 @@ most assembly languages allow, a label is visible throughout the entire
compilation unit in which it appears.
data BlockId = BlockId Unique
deriving (Eq,Ord)
type BlockId = Label
instance Uniquable BlockId where
getUnique (BlockId id) = id
getUnique label = getUnique (uniqueToInt $ lblToUnique label)
mkBlockId :: Unique -> BlockId
mkBlockId uniq = BlockId uniq
instance Show BlockId where
show (BlockId u) = show u
mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique
instance Outputable BlockId where
ppr (BlockId id) = ppr id
ppr label = ppr (getUnique label)
retPtLbl :: BlockId -> CLabel
retPtLbl (BlockId id) = mkReturnPtLabel id
retPtLbl label = mkReturnPtLabel $ getUnique label
blockLbl :: BlockId -> CLabel
blockLbl (BlockId id) = mkEntryLabel (mkFCallName id "block") NoCafRefs
blockLbl label = mkEntryLabel (mkFCallName (getUnique label) "block") NoCafRefs
infoTblLbl :: BlockId -> CLabel
infoTblLbl (BlockId id) = mkInfoTableLabel (mkFCallName id "block") NoCafRefs
infoTblLbl label = mkInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs
-- Block environments: Id blocks
newtype BlockEnv a = BlockEnv (UniqFM {- id -} a)
type BlockEnv a = LabelMap a
instance Outputable a => Outputable (BlockEnv a) where
ppr (BlockEnv env) = ppr env
-- This is pretty horrid. There must be common patterns here that can be
-- abstracted into wrappers.
emptyBlockEnv :: BlockEnv a
emptyBlockEnv = BlockEnv emptyUFM
isNullBEnv :: BlockEnv a -> Bool
isNullBEnv (BlockEnv env) = isNullUFM env
sizeBEnv :: BlockEnv a -> Int
sizeBEnv (BlockEnv env) = sizeUFM env
mkBlockEnv :: [(BlockId,a)] -> BlockEnv a
mkBlockEnv = foldl (uncurry . extendBlockEnv) emptyBlockEnv
eltsBlockEnv :: BlockEnv elt -> [elt]
eltsBlockEnv (BlockEnv env) = eltsUFM env
delFromBlockEnv :: BlockEnv elt -> BlockId -> BlockEnv elt
delFromBlockEnv (BlockEnv env) (BlockId id) = BlockEnv (delFromUFM env id)
lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a
lookupBlockEnv (BlockEnv env) (BlockId id) = lookupUFM env id
elemBlockEnv :: BlockEnv a -> BlockId -> Bool
elemBlockEnv (BlockEnv env) (BlockId id) = isJust $ lookupUFM env id
lookupWithDefaultBEnv :: BlockEnv a -> a -> BlockId -> a
lookupWithDefaultBEnv env x id = lookupBlockEnv env id `orElse` x
extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a
extendBlockEnv (BlockEnv env) (BlockId id) x = BlockEnv (addToUFM env id x)
mapBlockEnv :: (a -> b) -> BlockEnv a -> BlockEnv b
mapBlockEnv f (BlockEnv env) = BlockEnv (mapUFM f env)
foldBlockEnv :: (BlockId -> a -> b -> b) -> b -> BlockEnv a -> b
foldBlockEnv f b (BlockEnv env) =
foldUFM_Directly (\u x y -> f (mkBlockId u) x y) b env
ppr = ppr . mapToList
foldBlockEnv' :: (a -> b -> b) -> b -> BlockEnv a -> b
foldBlockEnv' f b (BlockEnv env) = foldUFM f b env
emptyBlockMap :: BlockEnv a
emptyBlockMap = mapEmpty
plusBlockEnv :: BlockEnv elt -> BlockEnv elt -> BlockEnv elt
plusBlockEnv (BlockEnv x) (BlockEnv y) = BlockEnv (plusUFM x y)
-- Block sets
type BlockSet = LabelSet
blockEnvToList :: BlockEnv elt -> [(BlockId, elt)]
blockEnvToList (BlockEnv env) =
map (\ (id, elt) -> (BlockId id, elt)) $ ufmToList env
addToBEnv_Acc :: (elt -> elts -> elts) -- Add to existing
-> (elt -> elts) -- New element
-> BlockEnv elts -- old
-> BlockId -> elt -- new
-> BlockEnv elts -- result
addToBEnv_Acc add new (BlockEnv old) (BlockId k) v =
BlockEnv (addToUFM_Acc add new old k v)
-- I believe this is only used by obsolete code.
newtype BlockSet = BlockSet (UniqSet Unique)
instance Outputable BlockSet where
ppr (BlockSet set) = ppr set
ppr = ppr . setElems
emptyBlockSet :: BlockSet
emptyBlockSet = BlockSet emptyUniqSet
isEmptyBlockSet :: BlockSet -> Bool
isEmptyBlockSet (BlockSet s) = isEmptyUniqSet s
unitBlockSet :: BlockId -> BlockSet
unitBlockSet = extendBlockSet emptyBlockSet
elemBlockSet :: BlockId -> BlockSet -> Bool
elemBlockSet (BlockId id) (BlockSet set) = elementOfUniqSet id set
extendBlockSet :: BlockSet -> BlockId -> BlockSet
extendBlockSet (BlockSet set) (BlockId id) = BlockSet (addOneToUniqSet set id)
removeBlockSet :: BlockSet -> BlockId -> BlockSet
removeBlockSet (BlockSet set) (BlockId id) = BlockSet (delOneFromUniqSet set id)
mkBlockSet :: [BlockId] -> BlockSet
mkBlockSet = foldl extendBlockSet emptyBlockSet
unionBlockSets :: BlockSet -> BlockSet -> BlockSet
unionBlockSets (BlockSet s) (BlockSet s') = BlockSet (unionUniqSets s s')
sizeBlockSet :: BlockSet -> Int
sizeBlockSet (BlockSet set) = sizeUniqSet set
blockSetToList :: BlockSet -> [BlockId]
blockSetToList (BlockSet set) = map BlockId $ uniqSetToList set
foldBlockSet :: (BlockId -> b -> b) -> b -> BlockSet -> b
foldBlockSet f z (BlockSet set) = foldUniqSet (f . BlockId) z set
emptyBlockSet = setEmpty
This diff is collapsed.
module CmmBrokenBlock (
) where
#include "HsVersions.h"
import BlockId
import Cmm
import CmmUtils
import CLabel
import CgUtils (callerSaveVolatileRegs)
import ClosureInfo
import Maybes
import Data.List
import Panic
import Unique
-- This module takes a 'CmmBasicBlock' which might have 'CmmCall'
-- statements in it with 'CmmSafe' set and breaks it up at each such call.
-- It also collects information about the block for later use
-- by the CPS algorithm.
-- Data structures
-- |Similar to a 'CmmBlock' with a little extra information
-- to help the CPS analysis.
data BrokenBlock
= BrokenBlock {
brokenBlockId :: BlockId, -- ^ The block's label like a 'CmmBasicBlock'
brokenBlockEntry :: BlockEntryInfo,
-- ^ Ways this block can be entered
brokenBlockStmts :: [CmmStmt],
-- ^ Body like a CmmBasicBlock
-- (but without the last statement)
brokenBlockTargets :: [BlockId],
-- ^ Blocks that this block could
-- branch to either by conditional
-- branches or via the last statement
brokenBlockExit :: FinalStmt
-- ^ The final statement of the block
-- | How a block could be entered
-- See Note [An example of CPS conversion]
data BlockEntryInfo
= FunctionEntry CmmInfo CLabel CmmFormals
-- ^ Block is the beginning of a function, parameters are:
-- 1. Function header info
-- 2. The function name
-- 3. Aguments to function
-- Only the formal parameters are live
| ContinuationEntry CmmFormals C_SRT Bool
-- ^ Return point of a function call, parameters are:
-- 1. return values (argument to continuation)
-- 2. SRT for the continuation's info table
-- 3. True <=> GC block so ignore stack size
-- Live variables, other than
-- the return values, are on the stack
| ControlEntry
-- ^ Any other kind of block. Only entered due to control flow.
-- TODO: Consider adding ProcPointEntry
-- no return values, but some live might end up as
-- params or possibly in the frame
{- Note [An example of CPS conversion]
This is NR's and SLPJ's guess about how things might work;
it may not be consistent with the actual code (particularly
in the matter of what's in parameters and what's on the stack).
f(x,y) {
if x>2 then goto L
x = x+1
L: if x>1 then y = g(y)
else x = x+1 ;
return( x+y )
f(x,y) { // FunctionEntry
if x>2 then goto L
x = x+1
L: // ControlEntry
if x>1 then push x; push f1; jump g(y)
else x=x+1; jump f2(x, y)
f1(y) { // ContinuationEntry
pop x; jump f2(x, y);
f2(x, y) { // ProcPointEntry
return (z+y);
data ContFormat = ContFormat HintedCmmFormals C_SRT Bool
-- ^ Arguments
-- 1. return values (argument to continuation)
-- 2. SRT for the continuation's info table
-- 3. True <=> GC block so ignore stack size
deriving (Eq)
-- | Final statement in a 'BlokenBlock'.
-- Constructors and arguments match those in 'Cmm',
-- but are restricted to branches, returns, jumps, calls and switches
data FinalStmt
= FinalBranch BlockId
-- ^ Same as 'CmmBranch'. Target must be a ControlEntry
| FinalReturn HintedCmmActuals
-- ^ Same as 'CmmReturn'. Parameter is the return values.
| FinalJump CmmExpr HintedCmmActuals
-- ^ Same as 'CmmJump'. Parameters:
-- 1. The function to call,
-- 2. Arguments of the call
| FinalCall BlockId CmmCallTarget HintedCmmFormals HintedCmmActuals
C_SRT CmmReturnInfo Bool
-- ^ Same as 'CmmCallee' followed by 'CmmGoto'. Parameters:
-- 1. Target of the 'CmmGoto' (must be a 'ContinuationEntry')
-- 2. The function to call
-- 3. Results from call (redundant with ContinuationEntry)
-- 4. Arguments to call
-- 5. SRT for the continuation's info table
-- 6. Does the function return?
-- 7. True <=> GC block so ignore stack size
| FinalSwitch CmmExpr [Maybe BlockId]
-- ^ Same as a 'CmmSwitch'. Paremeters:
-- 1. Scrutinee (zero based)
-- 2. Targets
-- Operations for broken blocks
-- Naively breaking at *every* CmmCall leads to sub-optimal code.
-- In particular, a CmmCall followed by a CmmBranch would result
-- in a continuation that has the single CmmBranch statement in it.
-- It would be better have the CmmCall directly return to the block
-- that the branch jumps to.
-- This requires the target of the branch to look like the parameter
-- format that the CmmCall is expecting. If other CmmCall/CmmBranch
-- sequences go to the same place they might not be expecting the
-- same format. So this transformation uses the following solution.
-- First the blocks are broken up but none of the blocks are marked
-- as continuations yet. This is the 'breakBlock' function.
-- Second, the blocks "vote" on what other blocks need to be continuations
-- and how they should be layed out. Plurality wins, but other selection
-- methods could be selected at a later time.
-- This is the 'selectContinuations' function.
-- Finally, the blocks are upgraded to 'ContEntry' continuations
-- based on the results with the 'makeContinuationEntries' function,
-- and the blocks that didn't get the format they wanted for their
-- targets get a small adaptor block created for them by
-- the 'adaptBlockToFormat' function.
-- could be
UNUSED: 2008-12-29
breakProc ::
[BlockId] -- ^ Any GC blocks that should be special
-> [[Unique]] -- ^ An infinite list of uniques
-- to create names of the new blocks with
-> CmmInfo -- ^ Info table for the procedure
-> CLabel -- ^ Name of the procedure
-> CmmFormals -- ^ Parameters of the procedure
-> [CmmBasicBlock] -- ^ Blocks of the procecure
-- (First block is the entry block)
-> [BrokenBlock]
breakProc gc_block_idents uniques info ident params blocks =
(adaptor_uniques : block_uniques) = uniques
broken_blocks :: ([(BlockId, ContFormat)], [BrokenBlock])
broken_blocks =
let new_blocks =
zipWith3 (breakBlock gc_block_idents)
(FunctionEntry info ident params :
repeat ControlEntry)
in (concatMap fst new_blocks, concatMap snd new_blocks)
selected = selectContinuations (fst broken_blocks)
in map (makeContinuationEntries selected) $
concat $
zipWith (adaptBlockToFormat selected)
(snd broken_blocks)
-- | Takes a 'CmmBasicBlock' and breaks it up into a list of 'BrokenBlock'
-- by splitting on each 'CmmCall' in the 'CmmBasicBlock'.
breakBlock ::
[BlockId] -- ^ Any GC blocks that should be special
-> [Unique] -- ^ An infinite list of uniques
-- to create names of the new blocks with
-> CmmBasicBlock -- ^ Input block to break apart
-> BlockEntryInfo -- ^ Info for the first created 'BrokenBlock'
-> ([(BlockId, ContFormat)], [BrokenBlock])
breakBlock gc_block_idents uniques (BasicBlock ident stmts) entry =
breakBlock' uniques ident entry [] [] stmts
breakBlock' uniques current_id entry exits accum_stmts stmts =
case stmts of
[] -> panic "block doesn't end in jump, goto, return or switch"
-- Last statement. Make the 'BrokenBlock'
[CmmJump target arguments] ->
[BrokenBlock current_id entry accum_stmts
(FinalJump target arguments)])
[CmmReturn arguments] ->
[BrokenBlock current_id entry accum_stmts
(FinalReturn arguments)])
[CmmBranch target] ->
[BrokenBlock current_id entry accum_stmts
(FinalBranch target)])
[CmmSwitch expr targets] ->
[BrokenBlock current_id entry accum_stmts
(mapMaybe id targets ++ exits)
(FinalSwitch expr targets)])
-- These shouldn't happen in the middle of a block.
-- They would cause dead code.
(CmmJump _ _:_) -> panic "jump in middle of block"
(CmmReturn _:_) -> panic "return in middle of block"
(CmmBranch _:_) -> panic "branch in middle of block"
(CmmSwitch _ _:_) -> panic "switch in middle of block"
-- Detect this special case to remain an inverse of
-- 'cmmBlockFromBrokenBlock'
[CmmCall target results arguments (CmmSafe srt) ret,
CmmBranch next_id] ->
([cont_info], [block])
cont_info = (next_id,
ContFormat results srt
(ident `elem` gc_block_idents))
block = do_call current_id entry accum_stmts exits next_id
target results arguments srt ret
-- Break the block on safe calls (the main job of this function)
(CmmCall target results arguments (CmmSafe srt) ret : stmts) ->
(cont_info : cont_infos, block : blocks)
next_id = BlockId $ head uniques
block = do_call current_id entry accum_stmts exits next_id
target results arguments srt ret
cont_info = (next_id, -- Entry convention for the
-- continuation of the call
ContFormat results srt
(ident `elem` gc_block_idents))
-- Break up the part after the call
(cont_infos, blocks) = breakBlock' (tail uniques) next_id
ControlEntry [] [] stmts
-- Unsafe calls don't need a continuation
-- but they do need to be expanded
(CmmCall target results arguments CmmUnsafe ret : stmts) ->
breakBlock' remaining_uniques current_id entry exits
(accum_stmts ++
arg_stmts ++
caller_save ++
[CmmCall target results new_args CmmUnsafe ret] ++
(remaining_uniques, arg_stmts, new_args) =
loadArgsIntoTemps uniques arguments
(caller_save, caller_load) = callerSaveVolatileRegs (Just [])
-- Default case. Just keep accumulating statements
-- and branch targets.
(s : stmts) ->
breakBlock' uniques current_id entry
(cond_branch_target s++exits)
do_call current_id entry accum_stmts exits next_id
target results arguments srt ret =
BrokenBlock current_id entry accum_stmts (next_id:exits)
(FinalCall next_id target results arguments srt ret
(current_id `elem` gc_block_idents))
cond_branch_target (CmmCondBranch _ target) = [target]
cond_branch_target _ = []
selectContinuations :: [(BlockId, ContFormat)] -> [(BlockId, ContFormat)]
selectContinuations needed_continuations = formats
formats = map select_format format_groups
format_groups = groupBy by_target needed_continuations
by_target x y = fst x == fst y
select_format formats = winner
winner = head $ head $ sortBy more_votes format_votes
format_votes = groupBy by_format formats
by_format x y = snd x == snd y
more_votes x y = compare (length y) (length x)
-- sort so the most votes goes *first*
-- (thus the order of x and y is reversed)
makeContinuationEntries :: [(BlockId, ContFormat)]
-> BrokenBlock -> BrokenBlock
makeContinuationEntries formats
block@(BrokenBlock ident _entry stmts targets exit) =
case lookup ident formats of
Nothing -> block
Just (ContFormat formals srt is_gc) ->
BrokenBlock ident (ContinuationEntry (map hintlessCmm formals) srt is_gc)
stmts targets exit
adaptBlockToFormat :: [(BlockId, ContFormat)]
-> Unique
-> BrokenBlock
-> [BrokenBlock]
adaptBlockToFormat formats unique
block@(BrokenBlock ident entry stmts targets
(FinalCall next target formals
actuals srt ret is_gc)) =
if format_formals == formals &&
format_srt == srt &&
format_is_gc == is_gc
then [block] -- Woohoo! This block got the continuation format it wanted
else [adaptor_block, revised_block]
-- This block didn't get the format it wanted for the
-- continuation, so we have to build an adaptor.
(ContFormat format_formals format_srt format_is_gc) =
maybe unknown_block id $ lookup next formats
unknown_block = panic "unknown block in adaptBlockToFormat"
revised_block = BrokenBlock ident entry stmts revised_targets revised_exit
revised_targets = adaptor_ident : delete next targets