Commit 6b57d391 authored by Simon Marlow's avatar Simon Marlow

Eliminate some redundant stack assignments and empty stack checks

parent d4befa38
...@@ -24,6 +24,7 @@ import qualified Data.Map as Map ...@@ -24,6 +24,7 @@ import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import Control.Monad.Fix import Control.Monad.Fix
import Data.Array as Array import Data.Array as Array
import Data.Bits
#include "HsVersions.h" #include "HsVersions.h"
...@@ -183,20 +184,59 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks ...@@ -183,20 +184,59 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
let hwm' = maximum (acc_hwm : map sm_sp (mapElems out)) let hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves
area_off = getAreaOff final_stackmaps
-- manifest Sp: turn all CmmStackSlots into actual loads -- manifest Sp: turn all CmmStackSlots into actual loads
fiddle_middle = mapExpDeep (areaToSp sp0 sp_high final_stackmaps) adj_middle = mapExpDeep (areaToSp sp0 sp_high area_off)
fiddle_last = mapExpDeep (areaToSp (sp0 - sp_off) sp_high adj_last = optStackCheck .
final_stackmaps) mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
middle3 = blockFromList $
map adj_middle $
elimStackStores stack0 final_stackmaps area_off $
blockToList middle2
newblock = blockJoin entry0 middle3 (adj_last last1)
fixup_blocks' = map (blockMapNodes3 (id, adj_middle, id)) fixup_blocks
stackmaps' = mapUnion acc_stackmaps out stackmaps' = mapUnion acc_stackmaps out
newblock = blockJoin entry0 middle2 last1
newblock' = blockMapNodes3 (id, fiddle_middle, fiddle_last) newblock
fixup_blocks' = map (blockMapNodes3 (id, fiddle_middle, id))
fixup_blocks
pprTrace "layout(out)" (ppr out) $ return () pprTrace "layout(out)" (ppr out) $ return ()
go bs stackmaps' hwm' (newblock' : fixup_blocks' ++ acc_blocks) go bs stackmaps' hwm' (newblock : fixup_blocks' ++ acc_blocks)
-- | Eliminate stores of the form
--
-- Sp[area+n] = r
--
-- when we know that r is already in the same slot as Sp[area+n]. We
-- could do this in a later optimisation pass, but that would involve
-- a separate analysis and we already have the information to hand
-- here. It helps clean up some extra stack stores in common cases.
--
-- Note that we may have to modify the StackMap as we walk through the
-- code using procMiddle, since an assignment to a variable in the
-- StackMap will invalidate its mapping there.
--
elimStackStores :: StackMap
-> BlockEnv StackMap
-> (Area -> ByteOff)
-> [CmmNode O O]
-> [CmmNode O O]
elimStackStores stackmap stackmaps area_off nodes
= go stackmap nodes
where
go _stackmap [] = []
go stackmap (n:ns)
= case n of
CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
| Just (_,off) <- lookupUFM (sm_regs stackmap) r
, area_off area + m == off
-> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns
_otherwise
-> n : go (procMiddle stackmaps n stackmap) ns
-- This doesn't seem right somehow. We need to find out whether this -- This doesn't seem right somehow. We need to find out whether this
...@@ -234,7 +274,7 @@ maybeAddSpAdj sp_off block ...@@ -234,7 +274,7 @@ maybeAddSpAdj sp_off block
procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
procMiddle stackmaps node sm procMiddle stackmaps node sm
= case node of = case node of
CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) t) CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
-> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
where loc = getStackLoc area off stackmaps where loc = getStackLoc area off stackmaps
CmmAssign (CmmLocal r) _other CmmAssign (CmmLocal r) _other
...@@ -275,11 +315,31 @@ handleLastNode procpoints liveness cont_info stackmaps ...@@ -275,11 +315,31 @@ handleLastNode procpoints liveness cont_info stackmaps
return ([], mapEmpty, sp_off, last, []) return ([], mapEmpty, sp_off, last, [])
-- At each CmmCall with a continuation: -- At each CmmCall with a continuation:
CmmCall{ cml_cont = Just cont_lbl, .. } CmmCall{ cml_cont = Just cont_lbl, .. } ->
lastCall cont_lbl cml_args cml_ret_args cml_ret_off
CmmForeignCall{ succ = cont_lbl, .. } ->
lastCall cont_lbl 0{-no args-} 0{-no results-} (sm_ret_off stack0)
CmmBranch{..} -> handleProcPoints
CmmCondBranch{..} -> handleProcPoints
CmmSwitch{..} -> handleProcPoints
where
lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
-> UniqSM
( [CmmNode O O]
, BlockEnv StackMap
, ByteOff
, CmmNode O C
, [CmmBlock]
)
lastCall cont_lbl cml_args cml_ret_args cml_ret_off
-- If we have already seen this continuation before, then -- If we have already seen this continuation before, then
-- we just have to make the stack look the same: -- we just have to make the stack look the same:
| Just cont_stack <- mapLookup cont_lbl stackmaps | Just cont_stack <- mapLookup cont_lbl stackmaps
-> =
return ( fixupStack stack0 cont_stack return ( fixupStack stack0 cont_stack
, stackmaps , stackmaps
, sp0 - sm_sp cont_stack , sp0 - sm_sp cont_stack
...@@ -288,7 +348,7 @@ handleLastNode procpoints liveness cont_info stackmaps ...@@ -288,7 +348,7 @@ handleLastNode procpoints liveness cont_info stackmaps
-- a continuation we haven't seen before: -- a continuation we haven't seen before:
-- allocate the stack frame for it. -- allocate the stack frame for it.
| otherwise -> do | otherwise = do
-- get the set of LocalRegs live in the continuation -- get the set of LocalRegs live in the continuation
let target_live = mapFindWithDefault Set.empty cont_lbl let target_live = mapFindWithDefault Set.empty cont_lbl
...@@ -328,11 +388,7 @@ handleLastNode procpoints liveness cont_info stackmaps ...@@ -328,11 +388,7 @@ handleLastNode procpoints liveness cont_info stackmaps
, [] -- no new blocks , [] -- no new blocks
) )
CmmBranch{..} -> handleProcPoints
CmmCondBranch{..} -> handleProcPoints
CmmSwitch{..} -> handleProcPoints
where
handleProcPoints :: UniqSM ( [CmmNode O O] handleProcPoints :: UniqSM ( [CmmNode O O]
, BlockEnv StackMap , BlockEnv StackMap
, ByteOff , ByteOff
...@@ -350,7 +406,7 @@ handleLastNode procpoints liveness cont_info stackmaps ...@@ -350,7 +406,7 @@ handleLastNode procpoints liveness cont_info stackmaps
, mapSuccessors fix_lbl last , mapSuccessors fix_lbl last
, concat [ blk | (_,_,_,blk) <- pps ] ) , concat [ blk | (_,_,_,blk) <- pps ] )
-- For each proc point that is a successor of this block, we need to -- For each proc point that is a successor of this block
-- (a) if the proc point already has a stackmap, we need to -- (a) if the proc point already has a stackmap, we need to
-- shuffle the current stack to make it look the same. -- shuffle the current stack to make it look the same.
-- We have to insert a new block to make this happen. -- We have to insert a new block to make this happen.
...@@ -384,10 +440,6 @@ handleLastNode procpoints liveness cont_info stackmaps ...@@ -384,10 +440,6 @@ handleLastNode procpoints liveness cont_info stackmaps
return (l, tmp_lbl, stack3, [block]) return (l, tmp_lbl, stack3, [block])
passthrough :: BlockEnv StackMap
passthrough = mapFromList (zip (successors last) (repeat stack0))
-- | create a sequence of assignments to establish the new StackMap, -- | create a sequence of assignments to establish the new StackMap,
-- given the old StackMap. -- given the old StackMap.
fixupStack :: StackMap -> StackMap -> [CmmNode O O] fixupStack :: StackMap -> StackMap -> [CmmNode O O]
...@@ -414,29 +466,51 @@ OLD area. ...@@ -414,29 +466,51 @@ OLD area.
SpArgs(L) is the size of the young area for L, i.e. the number of SpArgs(L) is the size of the young area for L, i.e. the number of
arguments. arguments.
- in block L, each reference to (OldArea[N]) turns into - in block L, each reference to [old + N] turns into
[Sp + Sp(L) - N] [Sp + Sp(L) - N]
- in block L, each reference to (Young(L')[N]) turns into - in block L, each reference to [young(L') + N] turns into
[Sp + Sp(L) - Sp(L') + SpArgs(L') - N] [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
- be careful with the last node of each block: Sp has already been adjusted - be careful with the last node of each block: Sp has already been adjusted
to be Sp + Sp(L) - Sp(L') to be Sp + Sp(L) - Sp(L')
-} -}
areaToSp :: ByteOff -> ByteOff -> BlockEnv StackMap -> CmmExpr -> CmmExpr areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
areaToSp sp_old _sp_hwm stackmaps (CmmStackSlot area n) = areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) =
cmmOffset (CmmReg spReg) (sp_old - area_off - n) cmmOffset (CmmReg spReg) (sp_old - area_off area - n)
where
area_off = case area of
Old -> 0
Young l ->
case mapLookup l stackmaps of
Just sm -> sm_sp sm - sm_args sm
Nothing -> pprPanic "areaToSp(2)" (ppr l)
areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm) areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
[CmmMachOp (MO_Sub _)
[ CmmReg (CmmGlobal Sp)
, CmmLit (CmmInt 0 _)],
CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
areaToSp _ _ _ other = other areaToSp _ _ _ other = other
-- Note [null stack check]
--
-- If the high-water Sp is zero, then we end up with
--
-- if (Sp - 0 < SpLim) then .. else ..
--
-- and possibly some dead code for the failure case. Optimising this
-- away depends on knowing that SpLim <= Sp, so it is really the job
-- of the stack layout algorithm, hence we do it now. This is also
-- convenient because control-flow optimisation later will drop the
-- dead code.
optStackCheck :: CmmNode O C -> CmmNode O C
optStackCheck n = -- Note [null stack check]
case n of
CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
other -> other
getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
getAreaOff _ Old = 0
getAreaOff stackmaps (Young l) =
case mapLookup l stackmaps of
Just sm -> sm_sp sm - sm_args sm
Nothing -> pprPanic "getAreaOff" (ppr l)
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- Saving live registers -- Saving live registers
...@@ -495,7 +569,7 @@ allocate ret_off live stackmap@StackMap{ sm_sp = sp0 ...@@ -495,7 +569,7 @@ allocate ret_off live stackmap@StackMap{ sm_sp = sp0
select_save :: [LocalReg] -> [StackSlot] select_save :: [LocalReg] -> [StackSlot]
-> Maybe ([StackSlot], LocalReg, [LocalReg]) -> Maybe ([StackSlot], LocalReg, [LocalReg])
select_save regs stack = go regs [] select_save regs stack = go regs []
where go [] no_fit = Nothing where go [] _no_fit = Nothing
go (r:rs) no_fit go (r:rs) no_fit
| Just rest <- dropEmpty words stack | Just rest <- dropEmpty words stack
= Just (replicate words Occupied ++ rest, r, rs++no_fit) = Just (replicate words Occupied ++ rest, r, rs++no_fit)
...@@ -514,16 +588,14 @@ allocate ret_off live stackmap@StackMap{ sm_sp = sp0 ...@@ -514,16 +588,14 @@ allocate ret_off live stackmap@StackMap{ sm_sp = sp0
push r (n, assigs, regs) push r (n, assigs, regs)
= (n', assig : assigs, (r,(r,n')) : regs) = (n', assig : assigs, (r,(r,n')) : regs)
where where
w = typeWidth (localRegType r) n' = n + localRegBytes r
n' = n + widthInBytes w
assig = CmmStore (CmmStackSlot Old n') assig = CmmStore (CmmStackSlot Old n')
(CmmReg (CmmLocal r)) (CmmReg (CmmLocal r))
trim_sp trim_sp
| not (null push_regs) = push_sp | not (null push_regs) = push_sp
| otherwise | otherwise
= case break notEmpty save_stack of = n `plusW` (- length (takeWhile isEmpty save_stack))
(empties, rest) -> n `plusW` (- length empties)
final_regs = regs1 `addListToUFM` push_regs final_regs = regs1 `addListToUFM` push_regs
`addListToUFM` save_regs `addListToUFM` save_regs
...@@ -532,10 +604,11 @@ allocate ret_off live stackmap@StackMap{ sm_sp = sp0 ...@@ -532,10 +604,11 @@ allocate ret_off live stackmap@StackMap{ sm_sp = sp0
-- XXX should be an assert -- XXX should be an assert
if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
( stackmap { sm_regs = final_regs , sm_sp = trim_sp } ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
, push_assigs ++ save_assigs ) , push_assigs ++ save_assigs )
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- Update info tables to include stack liveness -- Update info tables to include stack liveness
...@@ -555,6 +628,9 @@ setInfoTableStackMap stackmaps ...@@ -555,6 +628,9 @@ setInfoTableStackMap stackmaps
Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl) Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl)
Just sm -> stackMapToLiveness sm Just sm -> stackMapToLiveness sm
setInfoTableStackMap _ d = d
stackMapToLiveness :: StackMap -> Liveness stackMapToLiveness :: StackMap -> Liveness
stackMapToLiveness StackMap{..} = stackMapToLiveness StackMap{..} =
reverse $ Array.elems $ reverse $ Array.elems $
...@@ -573,17 +649,14 @@ plusW b w = b + w * wORD_SIZE ...@@ -573,17 +649,14 @@ plusW b w = b + w * wORD_SIZE
dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
dropEmpty 0 ss = Just ss dropEmpty 0 ss = Just ss
dropEmpty n (Empty : ss) = dropEmpty (n-1) ss dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
dropEmpty n _ = Nothing dropEmpty _ _ = Nothing
pushEmpty :: ByteOff -> [StackSlot] -> [StackSlot]
pushEmpty n stack = replicate (toWords n) Empty ++ stack
notEmpty :: StackSlot -> Bool isEmpty :: StackSlot -> Bool
notEmpty Empty = False isEmpty Empty = True
notEmpty _ = True isEmpty _ = False
localRegBytes :: LocalReg -> ByteOff localRegBytes :: LocalReg -> ByteOff
localRegBytes r = widthInBytes (typeWidth (localRegType r)) localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
localRegWords :: LocalReg -> WordOff localRegWords :: LocalReg -> WordOff
localRegWords = toWords . localRegBytes localRegWords = toWords . localRegBytes
......
...@@ -21,6 +21,7 @@ module SMRep ( ...@@ -21,6 +21,7 @@ module SMRep (
StgWord, StgHalfWord, StgWord, StgHalfWord,
hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS, hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS,
WordOff, ByteOff, WordOff, ByteOff,
roundUpToWords,
-- * Closure repesentation -- * Closure repesentation
SMRep(..), -- CmmInfo sees the rep; no one else does SMRep(..), -- CmmInfo sees the rep; no one else does
...@@ -57,6 +58,7 @@ import FastString ...@@ -57,6 +58,7 @@ import FastString
import Data.Char( ord ) import Data.Char( ord )
import Data.Word import Data.Word
import Data.Bits
\end{code} \end{code}
...@@ -69,6 +71,9 @@ import Data.Word ...@@ -69,6 +71,9 @@ import Data.Word
\begin{code} \begin{code}
type WordOff = Int -- Word offset, or word count type WordOff = Int -- Word offset, or word count
type ByteOff = Int -- Byte offset, or byte count type ByteOff = Int -- Byte offset, or byte count
roundUpToWords :: ByteOff -> ByteOff
roundUpToWords n = (n + (wORD_SIZE - 1)) .&. (complement (wORD_SIZE - 1))
\end{code} \end{code}
StgWord is a type representing an StgWord on the target platform. StgWord is a type representing an StgWord on the target platform.
...@@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32 ...@@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32
#endif #endif
\end{code} \end{code}
%************************************************************************ %************************************************************************
%* * %* *
\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} \subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment