Commit cac2aca1 authored by simonpj@microsoft.com's avatar simonpj@microsoft.com

Major improvement to SpecConstr

This patch improves the SpecConstr pass, by 
  a) making it work with join points
  b) making it generate specialisations transitively

As part of it, SpecConstr now carries a substitution with it, which
runs over the whole program as it goes.  This turned out to be 
a big win; simplified the implementation quite a bit.

I have *disabled* the specialisation on lambdas; it's pretty fragile,
and sometimes generates more and more specialisations. Something to
come back to, perhaps.

I rejigged the flag-handling a bit.  Now the specification of passes
in DynFlags is a bit nicer; see
	- optLevelFlags top-level data structure
	- runWhen function
	- CoreDoPasses constructor

There are now command-line flags
	-fspec-constr
	-fliberate-case
	-fspec-threshold=N
which do the obvious thing.  -O2 switches on both spec-constr and liberate-case.
You can use -fno-liberate-case, -fno-spec-constr after -O2 to switch them off again.

The spec-threshold applies to both these transformations; default value 200 for now.



parent e9f23b4c
......@@ -16,7 +16,8 @@ module CoreSubst (
emptySubst, mkEmptySubst, mkSubst, substInScope, isEmptySubst,
extendIdSubst, extendIdSubstList, extendTvSubst, extendTvSubstList,
extendInScope, extendInScopeIds,
extendSubstList, zapSubstEnv,
extendInScope, extendInScopeList, extendInScopeIds,
isInScope,
-- Binders
......@@ -56,6 +57,7 @@ import FastTypes
\begin{code}
data Subst
= Subst InScopeSet -- Variables in in scope (both Ids and TyVars)
-- *after* applying the substitution
IdSubstEnv -- Substitution for Ids
TvSubstEnv -- Substitution for TyVars
......@@ -144,8 +146,8 @@ mkSubst in_scope tvs ids = Subst in_scope ids tvs
substInScope :: Subst -> InScopeSet
substInScope (Subst in_scope _ _) = in_scope
-- zapSubstEnv :: Subst -> Subst
-- zapSubstEnv (Subst in_scope _ _) = Subst in_scope emptyVarEnv emptyVarEnv
zapSubstEnv :: Subst -> Subst
zapSubstEnv (Subst in_scope _ _) = Subst in_scope emptyVarEnv emptyVarEnv
-- ToDo: add an ASSERT that fvs(subst-result) is already in the in-scope set
extendIdSubst :: Subst -> Id -> CoreExpr -> Subst
......@@ -160,6 +162,14 @@ extendTvSubst (Subst in_scope ids tvs) v r = Subst in_scope ids (extendVarEnv tv
extendTvSubstList :: Subst -> [(TyVar,Type)] -> Subst
extendTvSubstList (Subst in_scope ids tvs) prs = Subst in_scope ids (extendVarEnvList tvs prs)
extendSubstList :: Subst -> [(Var,CoreArg)] -> Subst
extendSubstList subst []
= subst
extendSubstList (Subst in_scope ids tvs) ((tv,Type ty):prs)
= ASSERT( isTyVar tv ) extendSubstList (Subst in_scope ids (extendVarEnv tvs tv ty)) prs
extendSubstList (Subst in_scope ids tvs) ((id,expr):prs)
= ASSERT( isId id ) extendSubstList (Subst in_scope (extendVarEnv ids id expr) tvs) prs
lookupIdSubst :: Subst -> Id -> CoreExpr
lookupIdSubst (Subst in_scope ids tvs) v
| not (isLocalId v) = Var v
......@@ -181,6 +191,11 @@ extendInScope (Subst in_scope ids tvs) v
= Subst (in_scope `extendInScopeSet` v)
(ids `delVarEnv` v) (tvs `delVarEnv` v)
extendInScopeList :: Subst -> [Var] -> Subst
extendInScopeList (Subst in_scope ids tvs) vs
= Subst (in_scope `extendInScopeSetList` vs)
(ids `delVarEnvList` vs) (tvs `delVarEnvList` vs)
extendInScopeIds :: Subst -> [Id] -> Subst
extendInScopeIds (Subst in_scope ids tvs) vs
= Subst (in_scope `extendInScopeSetList` vs)
......
......@@ -268,7 +268,6 @@ sizeExpr bOMB_OUT_SIZE top_args expr
-- The 1+ is a little discount for reduced allocation in the caller
alts_size tot_size _ = tot_size
-- gaw 2004
size_up (Case e _ _ alts) = nukeScrutDiscount (size_up e) `addSize`
foldr (addSize . size_up_alt) sizeZero alts
-- We don't charge for the case itself
......
......@@ -13,7 +13,7 @@ module CoreUtils (
mkIfThenElse, mkAltExpr, mkPiType, mkPiTypes,
-- Taking expressions apart
findDefault, findAlt, isDefaultAlt, mergeAlts,
findDefault, findAlt, isDefaultAlt, mergeAlts, trimConArgs,
-- Properties of expressions
exprType, coreAltType,
......@@ -314,6 +314,18 @@ mergeAlts (a1:as1) (a2:as2)
LT -> a1 : mergeAlts as1 (a2:as2)
EQ -> a1 : mergeAlts as1 as2 -- Discard a2
GT -> a2 : mergeAlts (a1:as1) as2
---------------------------------
trimConArgs :: AltCon -> [CoreArg] -> [CoreArg]
-- Given case (C a b x y) of
-- C b x y -> ...
-- we want to drop the leading type argument of the scrutinee
-- leaving the arguments to match agains the pattern
trimConArgs DEFAULT args = ASSERT( null args ) []
trimConArgs (LitAlt lit) args = ASSERT( null args ) []
trimConArgs (DataAlt dc) args = dropList (dataConUnivTyVars dc) args
\end{code}
......
......@@ -182,6 +182,8 @@ data DynFlag
| Opt_Strictness
| Opt_FullLaziness
| Opt_CSE
| Opt_LiberateCase
| Opt_SpecConstr
| Opt_IgnoreInterfacePragmas
| Opt_OmitInterfacePragmas
| Opt_DoLambdaEtaExpansion
......@@ -232,7 +234,8 @@ data DynFlags = DynFlags {
optLevel :: Int, -- optimisation level
maxSimplIterations :: Int, -- max simplifier iterations
ruleCheck :: Maybe String,
libCaseThreshold :: Int, -- Threshold for liberate-case
specThreshold :: Int, -- Threshold for function specialisation
stolen_x86_regs :: Int,
cmdlineHcIncludes :: [String], -- -#includes
......@@ -388,7 +391,7 @@ defaultDynFlags =
optLevel = 0,
maxSimplIterations = 4,
ruleCheck = Nothing,
libCaseThreshold = 20,
specThreshold = 200,
stolen_x86_regs = 4,
cmdlineHcIncludes = [],
importPaths = ["."],
......@@ -442,27 +445,14 @@ defaultDynFlags =
Opt_ImplicitPrelude,
Opt_MonomorphismRestriction,
Opt_Strictness,
-- strictness is on by default, but this only
-- applies to -O.
Opt_CSE, -- similarly for CSE.
Opt_FullLaziness, -- ...and for full laziness
Opt_DoLambdaEtaExpansion,
-- This one is important for a tiresome reason:
-- we want to make sure that the bindings for data
-- constructors are eta-expanded. This is probably
-- a good thing anyway, but it seems fragile.
Opt_DoAsmMangling,
-- and the default no-optimisation options:
Opt_IgnoreInterfacePragmas,
Opt_OmitInterfacePragmas,
-- on by default:
Opt_PrintBindResult
] ++ standardWarnings,
Opt_PrintBindResult ]
++ [f | (ns,f) <- optLevelFlags, 0 `elem` ns]
-- The default -O0 options
++ standardWarnings,
log_action = \severity srcSpan style msg ->
case severity of
......@@ -564,25 +554,29 @@ updOptLevel n dfs
dfs1 = foldr (flip dopt_unset) dfs remove_dopts
dfs2 = foldr (flip dopt_set) dfs1 extra_dopts
extra_dopts
| n == 0 = opt_0_dopts
| otherwise = opt_1_dopts
remove_dopts
| n == 0 = opt_1_dopts
| otherwise = opt_0_dopts
extra_dopts = [ f | (ns,f) <- optLevelFlags, n `elem` ns ]
remove_dopts = [ f | (ns,f) <- optLevelFlags, n `notElem` ns ]
opt_0_dopts = [
Opt_IgnoreInterfacePragmas,
Opt_OmitInterfacePragmas
optLevelFlags :: [([Int], DynFlag)]
optLevelFlags
= [ ([0], Opt_IgnoreInterfacePragmas)
, ([0], Opt_OmitInterfacePragmas)
, ([1,2], Opt_IgnoreAsserts)
, ([1,2], Opt_DoEtaReduction)
, ([1,2], Opt_CaseMerge)
, ([1,2], Opt_Strictness)
, ([1,2], Opt_CSE)
, ([1,2], Opt_FullLaziness)
, ([2], Opt_LiberateCase)
, ([2], Opt_SpecConstr)
, ([0,1,2], Opt_DoLambdaEtaExpansion)
-- This one is important for a tiresome reason:
-- we want to make sure that the bindings for data
-- constructors are eta-expanded. This is probably
-- a good thing anyway, but it seems fragile.
]
opt_1_dopts = [
Opt_IgnoreAsserts,
Opt_DoEtaReduction,
Opt_CaseMerge
]
-- -----------------------------------------------------------------------------
-- Standard sets of warning options
......@@ -638,8 +632,8 @@ data CoreToDo -- These are diff core-to-core passes,
| CoreCSE
| CoreDoRuleCheck Int{-CompilerPhase-} String -- Check for non-application of rules
-- matching this string
| CoreDoNothing -- useful when building up lists of these things
| CoreDoNothing -- Useful when building up
| CoreDoPasses [CoreToDo] -- lists of these things
data SimplifierMode -- See comments in SimplMonad
= SimplGently
......@@ -656,6 +650,9 @@ data FloatOutSwitches
-- The core-to-core pass ordering is derived from the DynFlags:
runWhen :: Bool -> CoreToDo -> CoreToDo
runWhen True do_this = do_this
runWhen False do_this = CoreDoNothing
getCoreToDo :: DynFlags -> [CoreToDo]
getCoreToDo dflags
......@@ -667,6 +664,8 @@ getCoreToDo dflags
strictness = dopt Opt_Strictness dflags
full_laziness = dopt Opt_FullLaziness dflags
cse = dopt Opt_CSE dflags
spec_constr = dopt Opt_SpecConstr dflags
liberate_case = dopt Opt_LiberateCase dflags
rule_check = ruleCheck dflags
core_todo =
......@@ -699,8 +698,7 @@ getCoreToDo dflags
-- so that overloaded functions have all their dictionary lambdas manifest
CoreDoSpecialising,
if full_laziness then CoreDoFloatOutwards (FloatOutSw False False)
else CoreDoNothing,
runWhen full_laziness (CoreDoFloatOutwards (FloatOutSw False False)),
CoreDoFloatInwards,
......@@ -739,20 +737,19 @@ getCoreToDo dflags
case rule_check of { Just pat -> CoreDoRuleCheck 0 pat; Nothing -> CoreDoNothing },
#ifdef OLD_STRICTNESS
CoreDoOldStrictness
CoreDoOldStrictness,
#endif
if strictness then CoreDoStrictness else CoreDoNothing,
CoreDoWorkerWrapper,
CoreDoGlomBinds,
CoreDoSimplify (SimplPhase 0) [
MaxSimplifierIterations max_iter
],
if full_laziness then
CoreDoFloatOutwards (FloatOutSw False -- Not lambdas
True) -- Float constants
else CoreDoNothing,
runWhen strictness (CoreDoPasses [
CoreDoStrictness,
CoreDoWorkerWrapper,
CoreDoGlomBinds,
CoreDoSimplify (SimplPhase 0) [
MaxSimplifierIterations max_iter
]]),
runWhen full_laziness
(CoreDoFloatOutwards (FloatOutSw False -- Not lambdas
True)), -- Float constants
-- nofib/spectral/hartel/wang doubles in speed if you
-- do full laziness late in the day. It only happens
-- after fusion and other stuff, so the early pass doesn't
......@@ -760,38 +757,29 @@ getCoreToDo dflags
-- f_el22 (f_el21 r_midblock)
-- We want CSE to follow the final full-laziness pass, because it may
-- succeed in commoning up things floated out by full laziness.
-- CSE used to rely on the no-shadowing invariant, but it doesn't any more
if cse then CoreCSE else CoreDoNothing,
runWhen cse CoreCSE,
-- We want CSE to follow the final full-laziness pass, because it may
-- succeed in commoning up things floated out by full laziness.
-- CSE used to rely on the no-shadowing invariant, but it doesn't any more
CoreDoFloatInwards,
-- Case-liberation for -O2. This should be after
-- strictness analysis and the simplification which follows it.
case rule_check of { Just pat -> CoreDoRuleCheck 0 pat; Nothing -> CoreDoNothing }
]
++
case rule_check of { Just pat -> CoreDoRuleCheck 0 pat; Nothing -> CoreDoNothing },
(if opt_level >= 2 then
[ CoreLiberateCase,
CoreDoSimplify (SimplPhase 0) [
-- Case-liberation for -O2. This should be after
-- strictness analysis and the simplification which follows it.
runWhen liberate_case (CoreDoPasses [
CoreLiberateCase,
CoreDoSimplify (SimplPhase 0) [
MaxSimplifierIterations max_iter
], -- Run the simplifier after LiberateCase to vastly
] ]), -- Run the simplifier after LiberateCase to vastly
-- reduce the possiblility of shadowing
-- Reason: see Note [Shadowing] in SpecConstr.lhs
CoreDoSpecConstr
]
else
[])
++
runWhen spec_constr CoreDoSpecConstr,
-- Final clean-up simplification:
[ CoreDoSimplify (SimplPhase 0) [
CoreDoSimplify (SimplPhase 0) [
MaxSimplifierIterations max_iter
]
]
......@@ -995,7 +983,11 @@ dynamic_flags = [
, ( "fmax-simplifier-iterations", IntSuffix (\n ->
upd (\dfs -> dfs{ maxSimplIterations = n })) )
, ( "fliberate-case-threshold", IntSuffix (\n -> upd (\dfs -> dfs{ libCaseThreshold = n })))
-- liberate-case-threshold is an old flag for '-fspec-threshold'
, ( "fspec-threshold", IntSuffix (\n -> upd (\dfs -> dfs{ specThreshold = n })))
, ( "fliberate-case-threshold", IntSuffix (\n -> upd (\dfs -> dfs{ specThreshold = n })))
, ( "frule-check", SepArg (\s -> upd (\dfs -> dfs{ ruleCheck = Just s })))
, ( "fcontext-stack" , IntSuffix $ \n -> upd $ \dfs -> dfs{ ctxtStkDepth = n })
......@@ -1055,6 +1047,8 @@ fFlags = [
( "generics", Opt_Generics ),
( "strictness", Opt_Strictness ),
( "full-laziness", Opt_FullLaziness ),
( "liberate-case", Opt_LiberateCase ),
( "spec-constr", Opt_SpecConstr ),
( "cse", Opt_CSE ),
( "ignore-interface-pragmas", Opt_IgnoreInterfacePragmas ),
( "omit-interface-pragmas", Opt_OmitInterfacePragmas ),
......
......@@ -410,7 +410,7 @@ data LibCaseEnv
initEnv :: DynFlags -> FamInstEnvs -> LibCaseEnv
initEnv dflags fams
= LibCaseEnv { lc_size = libCaseThreshold dflags,
= LibCaseEnv { lc_size = specThreshold dflags,
lc_lvl = 0,
lc_lvl_env = emptyVarEnv,
lc_rec_env = emptyVarEnv,
......
......@@ -126,12 +126,17 @@ doCorePasses :: HscEnv
doCorePasses hsc_env rb us stats guts []
= return (stats, guts)
doCorePasses hsc_env rb us stats guts (CoreDoPasses to_dos1 : to_dos2)
= doCorePasses hsc_env rb us stats guts (to_dos1 ++ to_dos2)
doCorePasses hsc_env rb us stats guts (to_do : to_dos)
= do
let (us1, us2) = splitUniqSupply us
(stats1, guts1) <- doCorePass to_do hsc_env us1 rb guts
doCorePasses hsc_env rb us2 (stats `plusSimplCount` stats1) guts1 to_dos
doCorePass :: CoreToDo -> HscEnv -> UniqSupply -> RuleBase
-> ModGuts -> IO (SimplCount, ModGuts)
doCorePass (CoreDoSimplify mode sws) = _scc_ "Simplify" simplifyPgm mode sws
doCorePass CoreCSE = _scc_ "CommonSubExpr" trBinds cseProgram
doCorePass CoreLiberateCase = _scc_ "LiberateCase" liberateCase
......@@ -151,6 +156,7 @@ doCorePass CoreDoOldStrictness = _scc_ "OldStrictness" trBinds doOldStric
#else
doCorePass CoreDoOldStrictness = panic "CoreDoOldStrictness"
#endif
doCorePass (CoreDoPasses _) = panic "CoreDoPasses"
#ifdef OLD_STRICTNESS
doOldStrictness dfs binds
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment