From 7930221e2bb11b46ae32f0dda8f945b2893637c6 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 1 Aug 2012 10:36:08 +0100 Subject: [PATCH] Don't shortcut call-returns when not splitting proc points See Note [shortcut call returns] --- compiler/cmm/CmmContFlowOpt.hs | 54 ++++++++++++++++++++++++++++------ compiler/cmm/CmmPipeline.hs | 9 ++++-- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index b2dbef49da..964f9f56da 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -24,12 +24,12 @@ import Prelude hiding (succ, unzip, zip) -- ----------------------------------------------------------------------------- -cmmCfgOpts :: CmmGraph -> CmmGraph -cmmCfgOpts g = fst (blockConcat g) +cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph +cmmCfgOpts split g = fst (blockConcat split g) -cmmCfgOptsProc :: CmmDecl -> CmmDecl -cmmCfgOptsProc (CmmProc info lbl g) = CmmProc info' lbl g' - where (g', env) = blockConcat g +cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl +cmmCfgOptsProc split (CmmProc info lbl g) = CmmProc info' lbl g' + where (g', env) = blockConcat split g info' = info{ info_tbls = new_info_tbls } new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info))) @@ -44,7 +44,7 @@ cmmCfgOptsProc (CmmProc info lbl g) = CmmProc info' lbl g' | otherwise = (k,info) -cmmCfgOptsProc top = top +cmmCfgOptsProc _ top = top ----------------------------------------------------------------------------- @@ -54,6 +54,7 @@ cmmCfgOptsProc top = top ----------------------------------------------------------------------------- -- This optimisation does three things: +-- -- - If a block finishes with an unconditional branch, then we may -- be able to concatenate the block it points to and remove the -- branch. We do this either if the destination block is small @@ -63,6 +64,7 @@ cmmCfgOptsProc top = top -- - If a block finishes in a call whose continuation block is a -- goto, then we can shortcut the destination, making the -- continuation block the destination of the goto. +-- (but see Note [shortcut call returns]) -- -- - removes any unreachable blocks from the graph. This is a side -- effect of starting with a postorder DFS traversal of the graph @@ -93,8 +95,8 @@ cmmCfgOptsProc top = top -- which labels we have renamed and apply the mapping at the end -- with replaceLabels. -blockConcat :: CmmGraph -> (CmmGraph, BlockEnv BlockId) -blockConcat g@CmmGraph { g_entry = entry_id } +blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId) +blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map) where -- we might be able to shortcut the entry BlockId itself @@ -125,7 +127,8 @@ blockConcat g@CmmGraph { g_entry = entry_id } -- calls: if we can shortcut the continuation label, then -- we must *also* remember to substitute for the label in the -- code, because we will push it somewhere. - | Just b' <- callContinuation_maybe last + | splitting_procs -- Note [shortcut call returns] + , Just b' <- callContinuation_maybe last , Just blk' <- mapLookup b' blocks , Just dest <- canShortcut blk' = (blocks, mapInsert b' dest shortcut_map) @@ -184,6 +187,39 @@ okToDuplicate block -- has a CmmExpr inside it. _otherwise -> False + +{- Note [shortcut call returns] + +Consider this code that you might get from a recursive let-no-escape: + + goto L1 + L1: + if (Hp > HpLim) then L2 else L3 + L2: + call stg_gc_noregs returns to L4 + L4: + goto L1 + L3: + ... + goto L1 + +Then the control-flow optimiser shortcuts L4. But that turns L1 +into the call-return proc point, and every iteration of the loop +has to shuffle variables to and from the stack. So we must *not* +shortcut L4. + +Moreover not shortcutting call returns is probably fine. If L4 can +concat with its branch target then it will still do so. And we +save some compile time because we don't have to traverse all the +code in replaceLabels. + +However, we probably do want to do this if we are splitting proc +points, because L1 will be a proc-point anyway, so merging it with L4 +reduces the number of proc points. Unfortunately recursive +let-no-escapes won't generate very good code with proc-point +splitting on - we should probably +-} + ------------------------------------------------------------------------ -- Map over the CmmGraph, replacing each label with its mapping in the -- supplied BlockEnv. diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index f96e77bda7..03d11f258b 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -56,7 +56,8 @@ cpsTop _ p@(CmmData {}) = return (mapEmpty, [p]) cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) = do ----------- Control-flow optimisations --------------- - g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g + g <- {-# SCC "cmmCfgOpts(1)" #-} + return $ cmmCfgOpts splitting_proc_points g dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g ----------- Eliminate common blocks ------------------- @@ -114,7 +115,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs ----------- Control-flow optimisations --------------- - gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs + gs <- {-# SCC "cmmCfgOpts(2)" #-} + return $ map (cmmCfgOptsProc splitting_proc_points) gs dumps Opt_D_dump_cmmz_cfg "Post control-flow optimsations" gs return (cafEnv, gs) @@ -129,7 +131,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) dump' Opt_D_dump_cmmz_info "after setInfoTableStackMap" g ----------- Control-flow optimisations --------------- - g <- {-# SCC "cmmCfgOpts(2)" #-} return $ cmmCfgOptsProc g + g <- {-# SCC "cmmCfgOpts(2)" #-} + return $ cmmCfgOptsProc splitting_proc_points g dump' Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g return (cafEnv, [g]) -- GitLab