diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs index 481f2bb545b0fb12367131c75f37de643a667378..7de0ce0cb83659301a3193a1a845875b0c7914b5 100644 --- a/compiler/GHC/Cmm/Pipeline.hs +++ b/compiler/GHC/Cmm/Pipeline.hs @@ -142,7 +142,7 @@ cpsTop logger platform dflags proc = ----------- Control-flow optimisations ----------------------------- g <- {-# SCC "cmmCfgOpts(2)" #-} - return $ if optLevel dflags >= 1 + return $ if gopt Opt_CmmControlFlow dflags then map (cmmCfgOptsProc splitting_proc_points) g else g g <- return (map removeUnreachableBlocksProc g) diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index ee79e28b60db7fa0aa4fcfd5658ddd84a5732209..9c4ad7d3a416eefb3853b073c41a5b868abb4953 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -221,10 +221,8 @@ getCoreToDo logger dflags core_todo = if opt_level == 0 then - [ static_ptrs_float_outwards, - CoreDoSimplify max_iter - (base_mode { sm_phase = FinalPhase - , sm_names = ["Non-opt simplification"] }) + [ static_ptrs_float_outwards + , simplify "Non-opt simplification" , add_caller_ccs ] diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index 02b42b1dcd655a8eaa21bb505399a2f7c87819e2..e7205a8620ea7d2751e16da4cb25cbc4b43c215d 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -212,6 +212,7 @@ data GeneralFlag | Opt_CmmSink | Opt_CmmStaticPred | Opt_CmmElimCommonBlocks + | Opt_CmmControlFlow | Opt_AsmShortcutting | Opt_OmitYields | Opt_FunToThunk -- allow GHC.Core.Opt.WorkWrap.Utils.mkWorkerArgs to remove all value lambdas @@ -227,6 +228,7 @@ data GeneralFlag | Opt_AlignmentSanitisation | Opt_CatchBottoms | Opt_NumConstantFolding + | Opt_FastPAPCalls -- #6084 -- PreInlining is on by default. The option is there just to see how -- bad things get if you turn it off! diff --git a/compiler/GHC/Driver/Pipeline/Execute.hs b/compiler/GHC/Driver/Pipeline/Execute.hs index 08a63c01252ffef62ef1d0d1ecb2b5e84b615e82..133f3005b2ae886ced627fb687b5fc8758e5c6c5 100644 --- a/compiler/GHC/Driver/Pipeline/Execute.hs +++ b/compiler/GHC/Driver/Pipeline/Execute.hs @@ -216,7 +216,7 @@ runLlvmLlcPhase pipe_env hsc_env input_fn = do -- let dflags = hsc_dflags hsc_env logger = hsc_logger hsc_env - llvmOpts = case optLevel dflags of + llvmOpts = case llvmOptLevel dflags of 0 -> "-O1" -- required to get the non-naive reg allocator. Passing -regalloc=greedy is not sufficient. 1 -> "-O1" _ -> "-O2" @@ -250,7 +250,7 @@ runLlvmOptPhase pipe_env hsc_env input_fn = do logger = hsc_logger hsc_env let -- we always (unless -optlo specified) run Opt since we rely on it to -- fix up some pretty big deficiencies in the code we generate - optIdx = max 0 $ min 2 $ optLevel dflags -- ensure we're in [0,2] + optIdx = max 0 $ min 2 $ llvmOptLevel dflags -- ensure we're in [0,2] llvmOpts = case lookup optIdx $ llvmPasses $ llvmConfig dflags of Just passes -> passes Nothing -> panic ("runPhase LlvmOpt: llvm-passes file " @@ -410,8 +410,8 @@ runCcPhase cc_phase pipe_env hsc_env input_fn = do | otherwise = [] - let cc_opt | optLevel dflags >= 2 = [ "-O2" ] - | optLevel dflags >= 1 = [ "-O" ] + let cc_opt | llvmOptLevel dflags >= 2 = [ "-O2" ] + | llvmOptLevel dflags >= 1 = [ "-O" ] | otherwise = [] -- Decide next phase diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index 92149c96f4e075588509784aa8bfe548e2116067..2a4d5aae3cb9a1e6310b122ed3fbd7cca3226a17 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -447,6 +447,7 @@ data DynFlags = DynFlags { llvmConfig :: LlvmConfig, -- ^ N.B. It's important that this field is lazy since we load the LLVM -- configuration lazily. See Note [LLVM Configuration] in "GHC.SysTools". + llvmOptLevel :: Int, -- ^ LLVM optimisation level verbosity :: Int, -- ^ Verbosity level: see Note [Verbosity levels] optLevel :: Int, -- ^ Optimisation level debugLevel :: Int, -- ^ How much debug information to produce @@ -1191,6 +1192,7 @@ defaultDynFlags mySettings llvmConfig = -- See Note [LLVM configuration]. llvmConfig = llvmConfig, + llvmOptLevel = 0, -- ghc -M values depMakefile = "Makefile", @@ -1779,7 +1781,7 @@ setInteractivePrint f d = d { interactivePrint = Just f} updOptLevel :: Int -> DynFlags -> DynFlags -- ^ Sets the 'DynFlags' to be appropriate to the optimisation level updOptLevel n dfs - = dfs2{ optLevel = final_n } + = dfs2{ optLevel = final_n, llvmOptLevel = final_n } where final_n = max 0 (min 2 n) -- Clamp to 0 <= n <= 2 dfs1 = foldr (flip gopt_unset) dfs remove_gopts @@ -3375,6 +3377,8 @@ fFlagsDeps = [ flagSpec "catch-bottoms" Opt_CatchBottoms, flagSpec "alignment-sanitisation" Opt_AlignmentSanitisation, flagSpec "num-constant-folding" Opt_NumConstantFolding, + flagSpec "fast-pap-calls" Opt_FastPAPCalls, + flagSpec "cmm-control-flow" Opt_CmmControlFlow, flagSpec "show-warning-groups" Opt_ShowWarnGroups, flagSpec "hide-source-paths" Opt_HideSourcePaths, flagSpec "show-loaded-modules" Opt_ShowLoadedModules, @@ -3853,6 +3857,7 @@ optLevelFlags -- see Note [Documenting optimisation flags] , ([1,2], Opt_CSE) , ([1,2], Opt_StgCSE) , ([2], Opt_StgLiftLams) + , ([1,2], Opt_CmmControlFlow) , ([1,2], Opt_EnableRewriteRules) -- Off for -O0. Otherwise we desugar list literals @@ -3878,6 +3883,7 @@ optLevelFlags -- see Note [Documenting optimisation flags] , ([2], Opt_LiberateCase) , ([2], Opt_SpecConstr) + , ([2], Opt_FastPAPCalls) -- , ([2], Opt_RegsGraph) -- RegsGraph suffers performance regression. See #7679 -- , ([2], Opt_StaticArgumentTransformation) diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs index 0a74291ebdda83d8c40b39c9c90c5553434d82d4..6924e304831caf3937d3903a765374c672027d6b 100644 --- a/compiler/GHC/StgToCmm/Layout.hs +++ b/compiler/GHC/StgToCmm/Layout.hs @@ -212,7 +212,7 @@ slowCall fun stg_args -- Note [avoid intermediate PAPs] let n_args = length stg_args - if n_args > arity && optLevel dflags >= 2 + if n_args > arity && gopt Opt_FastPAPCalls dflags then do ptr_opts <- getPtrOpts funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun diff --git a/docs/users_guide/expected-undocumented-flags.txt b/docs/users_guide/expected-undocumented-flags.txt index b155c82505c46e5fc44855c8e8c0e52d97f2394a..7af6d42080bc7cfd295f8a6793e1e0918ae40ab1 100644 --- a/docs/users_guide/expected-undocumented-flags.txt +++ b/docs/users_guide/expected-undocumented-flags.txt @@ -41,6 +41,7 @@ -fdiagnostics-color=never -fembed-manifest -fextended-default-rules +-ffast-pap-calls -fffi -ffi -ffloat-all-lams diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index 6d33c5b5bc5f12818d26621c859ccd133ff22d7a..5fa9a81a5e185ca2edfeeaaa680b58b2f92ae32d 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -227,6 +227,17 @@ by saying ``-fno-wombat``. loops and hot code paths. This information is then used by the register allocation and code layout passes. +.. ghc-flag:: -fcmm-control-flow + :shortdesc: Enable control flow optimisation in the Cmm backend. Implied by :ghc-flag:`-O`. + :type: dynamic + :reverse: -fno-cmm-control-flow + :category: + + :default: on + + Enables some control flow optimisations in the Cmm code + generator, merging basic blocks and avoiding jumps right after jumps. + .. ghc-flag:: -fasm-shortcutting :shortdesc: Enable shortcutting on assembly. Implied by :ghc-flag:`-O2`. :type: dynamic @@ -1554,4 +1565,3 @@ by saying ``-fno-wombat``. This flag sets the size (in bytes) threshold above which the second approach is used. You can disable the second approach entirely by setting the threshold to 0. -