Commit a66541af authored by Simon Marlow's avatar Simon Marlow

Float out partial applications

This fixes at least one case of performance regression in 7.0, and
is nice win on nofib:

        Program           Size    Allocs   Runtime   Elapsed
            Min          +0.3%    -63.0%    -38.5%    -38.7%
            Max          +1.2%     +0.2%     +0.9%     +0.9%
 Geometric Mean          +0.6%     -3.0%     -6.4%     -6.6%
parent 5e86045a
......@@ -59,6 +59,9 @@ module CoreSyn (
-- * Annotated expression data types
AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
-- ** Operations on annotated expressions
collectAnnArgs,
-- ** Operations on annotations
deAnnotate, deAnnotate', deAnnAlt, collectAnnBndrs,
......@@ -1141,6 +1144,17 @@ data AnnBind bndr annot
| AnnRec [(bndr, AnnExpr bndr annot)]
\end{code}
\begin{code}
-- | Takes a nested application expression and returns the the function
-- being applied and the arguments to which it is applied
collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
collectAnnArgs expr
= go expr []
where
go (_, AnnApp f a) as = go f (a:as)
go e as = (e, as)
\end{code}
\begin{code}
deAnnotate :: AnnExpr bndr annot -> Expr bndr
deAnnotate (_, e) = deAnnotate' e
......
......@@ -304,8 +304,10 @@ data SimplifierSwitch
\begin{code}
data FloatOutSwitches = FloatOutSwitches {
floatOutLambdas :: Bool, -- ^ True <=> float lambdas to top level
floatOutConstants :: Bool -- ^ True <=> float constants to top level,
floatOutConstants :: Bool, -- ^ True <=> float constants to top level,
-- even if they do not escape a lambda
floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications
-- based on arity information.
}
instance Outputable FloatOutSwitches where
ppr = pprFloatOutSwitches
......@@ -320,10 +322,6 @@ pprFloatOutSwitches sw = pp_not (floatOutLambdas sw) <+> text "lambdas" <> comma
-- | Switches that specify the minimum amount of floating out
-- gentleFloatOutSwitches :: FloatOutSwitches
-- gentleFloatOutSwitches = FloatOutSwitches False False
-- | Switches that do not specify floating out of lambdas, just of constants
constantsOnlyFloatOutSwitches :: FloatOutSwitches
constantsOnlyFloatOutSwitches = FloatOutSwitches False True
\end{code}
......@@ -420,14 +418,28 @@ getCoreToDo dflags
-- so that overloaded functions have all their dictionary lambdas manifest
runWhen do_specialise CoreDoSpecialising,
runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
runWhen full_laziness $
CoreDoFloatOutwards FloatOutSwitches {
floatOutLambdas = False,
floatOutConstants = True,
floatOutPartialApplications = False },
-- Was: gentleFloatOutSwitches
-- I have no idea why, but not floating constants to top level is
-- very bad in some cases.
--
-- I have no idea why, but not floating constants to
-- top level is very bad in some cases.
--
-- Notably: p_ident in spectral/rewrite
-- Changing from "gentle" to "constantsOnly" improved
-- rewrite's allocation by 19%, and made 0.0% difference
-- to any other nofib benchmark
-- Changing from "gentle" to "constantsOnly"
-- improved rewrite's allocation by 19%, and
-- made 0.0% difference to any other nofib
-- benchmark
--
-- Not doing floatOutPartialApplications yet, we'll do
-- that later on when we've had a chance to get more
-- accurate arity information. In fact it makes no
-- difference at all to performance if we do it here,
-- but maybe we save some unnecessary to-and-fro in
-- the simplifier.
runWhen do_float_in CoreDoFloatInwards,
......@@ -452,8 +464,11 @@ getCoreToDo dflags
simpl_phase 0 ["post-worker-wrapper"] max_iter
]),
runWhen full_laziness
(CoreDoFloatOutwards constantsOnlyFloatOutSwitches),
runWhen full_laziness $
CoreDoFloatOutwards FloatOutSwitches {
floatOutLambdas = False,
floatOutConstants = True,
floatOutPartialApplications = True },
-- nofib/spectral/hartel/wang doubles in speed if you
-- do full laziness late in the day. It only happens
-- after fusion and other stuff, so the early pass doesn't
......
......@@ -60,11 +60,7 @@ import CoreArity ( exprBotStrictness_maybe )
import CoreFVs -- all of it
import CoreSubst ( Subst, emptySubst, extendInScope, extendInScopeList,
extendIdSubst, cloneIdBndr, cloneRecIdBndrs )
import Id ( idType, mkLocalIdWithInfo, mkSysLocal, isOneShotLambda,
zapDemandIdInfo, transferPolyIdInfo,
idSpecialisation, idUnfolding, setIdInfo,
setIdStrictness, setIdArity
)
import Id
import IdInfo
import Var
import VarSet
......@@ -250,10 +246,42 @@ lvlExpr _ _ ( _, AnnType ty) = return (Type ty)
lvlExpr _ env (_, AnnVar v) = return (lookupVar env v)
lvlExpr _ _ (_, AnnLit lit) = return (Lit lit)
lvlExpr ctxt_lvl env (_, AnnApp fun arg) = do
fun' <- lvlExpr ctxt_lvl env fun -- We don't do MFE on partial applications
arg' <- lvlMFE False ctxt_lvl env arg
return (App fun' arg')
lvlExpr ctxt_lvl env expr@(_, AnnApp _ _) = do
let
(fun, args) = collectAnnArgs expr
--
case fun of
-- float out partial applications. This is very beneficial
-- in some cases (-7% runtime -4% alloc over nofib -O2).
-- In order to float a PAP, there must be a function at the
-- head of the application, and the application must be
-- over-saturated with respect to the function's arity.
(_, AnnVar f) | floatPAPs env &&
arity > 0 && arity < n_val_args ->
do
let (lapp, rargs) = left (n_val_args - arity) expr []
rargs' <- mapM (lvlMFE False ctxt_lvl env) rargs
lapp' <- lvlMFE False ctxt_lvl env lapp
return (foldl App lapp' rargs')
where
n_val_args = count (isValArg . deAnnotate) args
arity = idArity f
-- separate out the PAP that we are floating from the extra
-- arguments, by traversing the spine until we have collected
-- (n_val_args - arity) value arguments.
left 0 e rargs = (e, rargs)
left n (_, AnnApp f a) rargs
| isValArg (deAnnotate a) = left (n-1) f (a:rargs)
| otherwise = left n f (a:rargs)
left _ _ _ = panic "SetLevels.lvlExpr.left"
-- No PAPs that we can float: just carry on with the
-- arguments and the function.
_otherwise -> do
args' <- mapM (lvlMFE False ctxt_lvl env) args
fun' <- lvlExpr ctxt_lvl env fun
return (foldl App fun' args')
lvlExpr ctxt_lvl env (_, AnnNote note expr) = do
expr' <- lvlExpr ctxt_lvl env expr
......@@ -741,6 +769,9 @@ floatLams (fos, _, _, _) = floatOutLambdas fos
floatConsts :: LevelEnv -> Bool
floatConsts (fos, _, _, _) = floatOutConstants fos
floatPAPs :: LevelEnv -> Bool
floatPAPs (fos, _, _, _) = floatOutPartialApplications fos
extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv
-- Used when *not* cloning
extendLvlEnv (float_lams, lvl_env, subst, id_env) prs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment