Commit a66541af authored by Simon Marlow's avatar Simon Marlow
Browse files

Float out partial applications

This fixes at least one case of performance regression in 7.0, and
is nice win on nofib:

        Program           Size    Allocs   Runtime   Elapsed
            Min          +0.3%    -63.0%    -38.5%    -38.7%
            Max          +1.2%     +0.2%     +0.9%     +0.9%
 Geometric Mean          +0.6%     -3.0%     -6.4%     -6.6%
parent 5e86045a
......@@ -59,6 +59,9 @@ module CoreSyn (
-- * Annotated expression data types
AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
-- ** Operations on annotated expressions
collectAnnArgs,
-- ** Operations on annotations
deAnnotate, deAnnotate', deAnnAlt, collectAnnBndrs,
......@@ -1141,6 +1144,17 @@ data AnnBind bndr annot
| AnnRec [(bndr, AnnExpr bndr annot)]
\end{code}
\begin{code}
-- | Takes a nested application expression and returns the the function
-- being applied and the arguments to which it is applied
collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
collectAnnArgs expr
= go expr []
where
go (_, AnnApp f a) as = go f (a:as)
go e as = (e, as)
\end{code}
\begin{code}
deAnnotate :: AnnExpr bndr annot -> Expr bndr
deAnnotate (_, e) = deAnnotate' e
......
......@@ -304,8 +304,10 @@ data SimplifierSwitch
\begin{code}
data FloatOutSwitches = FloatOutSwitches {
floatOutLambdas :: Bool, -- ^ True <=> float lambdas to top level
floatOutConstants :: Bool -- ^ True <=> float constants to top level,
floatOutConstants :: Bool, -- ^ True <=> float constants to top level,
-- even if they do not escape a lambda
floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
-- based on arity information.
}
instance Outputable FloatOutSwitches where
ppr = pprFloatOutSwitches
......@@ -320,10 +322,6 @@ pprFloatOutSwitches sw = pp_not (floatOutLambdas sw) <+> text "lambdas" <> comma
-- | Switches that specify the minimum amount of floating out
-- gentleFloatOutSwitches :: FloatOutSwitches
-- gentleFloatOutSwitches = FloatOutSwitches False False
-- | Switches that do not specify floating out of lambdas, just of constants
constantsOnlyFloatOutSwitches :: FloatOutSwitches
constantsOnlyFloatOutSwitches = FloatOutSwitches False True
\end{code}
......@@ -420,14 +418,28 @@ getCoreToDo dflags
-- so that overloaded functions have all their dictionary lambdas manifest
runWhen do_specialise CoreDoSpecialising,
runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
runWhen full_laziness $
CoreDoFloatOutwards FloatOutSwitches {
floatOutLambdas = False,
floatOutConstants = True,
floatOutPartialApplications = False },
-- Was: gentleFloatOutSwitches
-- I have no idea why, but not floating constants to top level is
-- very bad in some cases.
--
-- I have no idea why, but not floating constants to
-- top level is very bad in some cases.
--
-- Notably: p_ident in spectral/rewrite
-- Changing from "gentle" to "constantsOnly" improved
-- rewrite's allocation by 19%, and made 0.0% difference
-- to any other nofib benchmark
-- Changing from "gentle" to "constantsOnly"
-- improved rewrite's allocation by 19%, and
-- made 0.0% difference to any other nofib
-- benchmark
--
-- Not doing floatOutPartialApplications yet, we'll do
-- that later on when we've had a chance to get more
-- accurate arity information. In fact it makes no
-- difference at all to performance if we do it here,
-- but maybe we save some unnecessary to-and-fro in
-- the simplifier.
runWhen do_float_in CoreDoFloatInwards,
......@@ -452,8 +464,11 @@ getCoreToDo dflags
simpl_phase 0 ["post-worker-wrapper"] max_iter
]),
runWhen full_laziness
(CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
runWhen full_laziness $
CoreDoFloatOutwards FloatOutSwitches {
floatOutLambdas = False,
floatOutConstants = True,
floatOutPartialApplications = True },
-- nofib/spectral/hartel/wang doubles in speed if you
-- do full laziness late in the day. It only happens
-- after fusion and other stuff, so the early pass doesn't
......
......@@ -60,11 +60,7 @@ import CoreArity ( exprBotStrictness_maybe )
import CoreFVs -- all of it
import CoreSubst ( Subst, emptySubst, extendInScope, extendInScopeList,
extendIdSubst, cloneIdBndr, cloneRecIdBndrs )
import Id ( idType, mkLocalIdWithInfo, mkSysLocal, isOneShotLambda,
zapDemandIdInfo, transferPolyIdInfo,
idSpecialisation, idUnfolding, setIdInfo,
setIdStrictness, setIdArity
)
import Id
import IdInfo
import Var
import VarSet
......@@ -250,10 +246,42 @@ lvlExpr _ _ ( _, AnnType ty) = return (Type ty)
lvlExpr _ env (_, AnnVar v) = return (lookupVar env v)
lvlExpr _ _ (_, AnnLit lit) = return (Lit lit)
lvlExpr ctxt_lvl env (_, AnnApp fun arg) = do
fun' <- lvlExpr ctxt_lvl env fun -- We don't do MFE on partial applications
arg' <- lvlMFE False ctxt_lvl env arg
return (App fun' arg')
lvlExpr ctxt_lvl env expr@(_, AnnApp _ _) = do
let
(fun, args) = collectAnnArgs expr
--
case fun of
-- float out partial applications. This is very beneficial
-- in some cases (-7% runtime -4% alloc over nofib -O2).
-- In order to float a PAP, there must be a function at the
-- head of the application, and the application must be
-- over-saturated with respect to the function's arity.
(_, AnnVar f) | floatPAPs env &&
arity > 0 && arity < n_val_args ->
do
let (lapp, rargs) = left (n_val_args - arity) expr []
rargs' <- mapM (lvlMFE False ctxt_lvl env) rargs
lapp' <- lvlMFE False ctxt_lvl env lapp
return (foldl App lapp' rargs')
where
n_val_args = count (isValArg . deAnnotate) args
arity = idArity f
-- separate out the PAP that we are floating from the extra
-- arguments, by traversing the spine until we have collected
-- (n_val_args - arity) value arguments.
left 0 e rargs = (e, rargs)
left n (_, AnnApp f a) rargs
| isValArg (deAnnotate a) = left (n-1) f (a:rargs)
| otherwise = left n f (a:rargs)
left _ _ _ = panic "SetLevels.lvlExpr.left"
-- No PAPs that we can float: just carry on with the
-- arguments and the function.
_otherwise -> do
args' <- mapM (lvlMFE False ctxt_lvl env) args
fun' <- lvlExpr ctxt_lvl env fun
return (foldl App fun' args')
lvlExpr ctxt_lvl env (_, AnnNote note expr) = do
expr' <- lvlExpr ctxt_lvl env expr
......@@ -741,6 +769,9 @@ floatLams (fos, _, _, _) = floatOutLambdas fos
floatConsts :: LevelEnv -> Bool
floatConsts (fos, _, _, _) = floatOutConstants fos
floatPAPs :: LevelEnv -> Bool
floatPAPs (fos, _, _, _) = floatOutPartialApplications fos
extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv
-- Used when *not* cloning
extendLvlEnv (float_lams, lvl_env, subst, id_env) prs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment