Commit 2112f43c authored by Simon Peyton Jones's avatar Simon Peyton Jones

Be a little less aggressive about inlining (fixes Trac #5623)

When inlining, we are making a copy of the expression, so we have to
be careful about duplicating work.  Previously we were using
exprIsCheap for that, but it is willing to duplicate a cheap primop --
and that is terribly bad if it happens inside some inner array loop
(Trac #5623).  So now we use a new function exprIsWorkFree.  Even
then there is some wiggle room:
   see Note [exprIsWorkFree] in CoreUtils

This commit does make wheel-sieve1 allocate a lot more, but we decided
that's just tough; it's more important for inlining to be robust
about not duplicating work.
parent 75336070
......@@ -600,7 +600,7 @@ data Unfolding
-- a `seq` on this variable
uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
-- Cached version of exprIsConLike
uf_is_cheap :: Bool, -- True <=> doesn't waste (much) work to expand
uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
-- inside an inlining
-- Cached version of exprIsCheap
uf_expandable :: Bool, -- True <=> can expand in RULE matching
......@@ -618,8 +618,8 @@ data Unfolding
-- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
-- this variable
--
-- uf_is_cheap: Does this waste only a little work if we expand it inside an inlining?
-- Basically this is a cached version of 'exprIsCheap'
-- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
-- Basically this is a cached version of 'exprIsWorkFree'
--
-- uf_guidance: Tells us about the /size/ of the unfolding template
......@@ -738,7 +738,7 @@ mkOtherCon = OtherCon
seqUnfolding :: Unfolding -> ()
seqUnfolding (CoreUnfolding { uf_tmpl = e, uf_is_top = top,
uf_is_value = b1, uf_is_cheap = b2,
uf_is_value = b1, uf_is_work_free = b2,
uf_expandable = b3, uf_is_conlike = b4,
uf_arity = a, uf_guidance = g})
= seqExpr e `seq` top `seq` b1 `seq` a `seq` b2 `seq` b3 `seq` b4 `seq` seqGuidance g
......@@ -801,8 +801,8 @@ isConLikeUnfolding _ = False
-- | Is the thing we will unfold into certainly cheap?
isCheapUnfolding :: Unfolding -> Bool
isCheapUnfolding (CoreUnfolding { uf_is_cheap = is_cheap }) = is_cheap
isCheapUnfolding _ = False
isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
isCheapUnfolding _ = False
isExpandableUnfolding :: Unfolding -> Bool
isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
......
......@@ -145,15 +145,15 @@ mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
-> Arity -> UnfoldingGuidance -> Unfolding
-- Occurrence-analyses the expression before capturing it
mkCoreUnfolding src top_lvl expr arity guidance
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
uf_is_value = exprIsHNF expr,
uf_is_conlike = exprIsConLike expr,
uf_is_cheap = exprIsCheap expr,
uf_expandable = exprIsExpandable expr,
uf_guidance = guidance }
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
uf_is_value = exprIsHNF expr,
uf_is_conlike = exprIsConLike expr,
uf_is_work_free = exprIsWorkFree expr,
uf_expandable = exprIsExpandable expr,
uf_guidance = guidance }
mkUnfolding :: UnfoldingSource -> Bool -> Bool -> CoreExpr -> Unfolding
-- Calculates unfolding guidance
......@@ -163,15 +163,15 @@ mkUnfolding src top_lvl is_bottoming expr
, not (exprIsTrivial expr)
= NoUnfolding -- See Note [Do not inline top-level bottoming functions]
| otherwise
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
uf_is_value = exprIsHNF expr,
uf_is_conlike = exprIsConLike expr,
uf_expandable = exprIsExpandable expr,
uf_is_cheap = exprIsCheap expr,
uf_guidance = guidance }
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
uf_is_value = exprIsHNF expr,
uf_is_conlike = exprIsConLike expr,
uf_expandable = exprIsExpandable expr,
uf_is_work_free = exprIsWorkFree expr,
uf_guidance = guidance }
where
(arity, guidance) = calcUnfoldingGuidance expr
-- Sometimes during simplification, there's a large let-bound thing
......@@ -918,11 +918,11 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
-- Things with an INLINE pragma may have an unfolding *and*
-- be a loop breaker (maybe the knot is not yet untied)
CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
, uf_is_cheap = is_cheap, uf_arity = uf_arity
, uf_is_work_free = is_wf, uf_arity = uf_arity
, uf_guidance = guidance, uf_expandable = is_exp }
| active_unfolding -> tryUnfolding dflags id lone_variable
arg_infos cont_info unf_template is_top
is_cheap is_exp uf_arity guidance
is_wf is_exp uf_arity guidance
| dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
-> pprTrace "Inactive unfolding:" (ppr id) Nothing
| otherwise -> Nothing
......@@ -935,7 +935,7 @@ tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
-> Maybe CoreExpr
tryUnfolding dflags id lone_variable
arg_infos cont_info unf_template is_top
is_cheap is_exp uf_arity guidance
is_wf is_exp uf_arity guidance
-- uf_arity will typically be equal to (idArity id),
-- but may be less for InlineRules
| dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
......@@ -945,7 +945,7 @@ tryUnfolding dflags id lone_variable
text "interesting continuation" <+> ppr cont_info,
text "some_benefit" <+> ppr some_benefit,
text "is exp:" <+> ppr is_exp,
text "is cheap:" <+> ppr is_cheap,
text "is work-free:" <+> ppr is_wf,
text "guidance" <+> ppr guidance,
extra_doc,
text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
......@@ -979,7 +979,7 @@ tryUnfolding dflags id lone_variable
interesting_saturated_call
= case cont_info of
BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
CaseCtxt -> not (lone_variable && is_cheap) -- Note [Lone variables]
CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables]
ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
ValAppCtxt -> True -- Note [Cast then apply]
......@@ -993,7 +993,7 @@ tryUnfolding dflags id lone_variable
enough_args = saturated || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
-> ( is_cheap && some_benefit && small_enough
-> ( is_wf && some_benefit && small_enough
, (text "discounted size =" <+> int discounted_size) )
where
discounted_size = size - discount
......@@ -1105,7 +1105,7 @@ call is at least CONLIKE. At least for the cases where we use ArgCtxt
for the RHS of a 'let', we only profit from the inlining if we get a
CONLIKE thing (modulo lets).
Note [Lone variables] See also Note [Interaction of exprIsCheap and lone variables]
Note [Lone variables] See also Note [Interaction of exprIsWorkFree and lone variables]
~~~~~~~~~~~~~~~~~~~~~ which appears below
The "lone-variable" case is important. I spent ages messing about
with unsatisfactory varaints, but this is nice. The idea is that if a
......@@ -1152,7 +1152,7 @@ However, watch out:
So the non-inlining of lone_variables should only apply if the
unfolding is regarded as cheap; because that is when exprIsConApp_maybe
looks through the unfolding. Hence the "&& is_cheap" in the
looks through the unfolding. Hence the "&& is_wf" in the
InlineRule branch.
* Even a type application or coercion isn't a lone variable.
......@@ -1167,7 +1167,7 @@ However, watch out:
There's no advantage in inlining f here, and perhaps
a significant disadvantage. Hence some_val_args in the Stop case
Note [Interaction of exprIsCheap and lone variables]
Note [Interaction of exprIsWorkFree and lone variables]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The lone-variable test says "don't inline if a case expression
scrutines a lone variable whose unfolding is cheap". It's very
......@@ -1178,9 +1178,9 @@ consider
to be cheap, and that's good because exprIsConApp_maybe doesn't
think that expression is a constructor application.
I used to test is_value rather than is_cheap, which was utterly
wrong, because the above expression responds True to exprIsHNF,
which is what sets is_value.
In the 'not (lone_variable && is_wf)' test, I used to test is_value
rather than is_wf, which was utterly wrong, because the above
expression responds True to exprIsHNF, which is what sets is_value.
This kind of thing can occur if you have
......
......@@ -22,7 +22,7 @@ module CoreUtils (
exprType, coreAltType, coreAltsType,
exprIsDupable, exprIsTrivial, getIdFromTrivialExpr, exprIsBottom,
exprIsCheap, exprIsExpandable, exprIsCheap', CheapAppFun,
exprIsHNF, exprOkForSpeculation, exprOkForSideEffects,
exprIsHNF, exprOkForSpeculation, exprOkForSideEffects, exprIsWorkFree,
exprIsBig, exprIsConLike,
rhsIsStatic, isCheapApp, isExpandableApp,
......@@ -640,6 +640,68 @@ dupAppSize = 8 -- Size of term we are prepared to duplicate
%* *
%************************************************************************
Note [exprIsWorkFree]
~~~~~~~~~~~~~~~~~~~~~
exprIsWorkFree is used when deciding whether to inline something; we
don't inline it if doing so might duplicate work, by peeling off a
complete copy of the expression. Here we do not want even to
duplicate a primop (Trac #5623):
eg let x = a #+ b in x +# x
we do not want to inline/duplicate x
Previously we were a bit more liberal, which led to the primop-duplicating
problem. However, being more conservative did lead to a big regression in
one nofib benchmark, wheel-sieve1. The situation looks like this:
let noFactor_sZ3 :: GHC.Types.Int -> GHC.Types.Bool
noFactor_sZ3 = case s_adJ of _ { GHC.Types.I# x_aRs ->
case GHC.Prim.<=# x_aRs 2 of _ {
GHC.Types.False -> notDivBy ps_adM qs_adN;
GHC.Types.True -> lvl_r2Eb }}
go = \x. ...(noFactor (I# y))....(go x')...
The function 'noFactor' is heap-allocated and then called. Turns out
that 'notDivBy' is strict in its THIRD arg, but that is invisible to
the caller of noFactor, which therefore cannot do w/w and
heap-allocates noFactor's argument. At the moment (May 12) we are just
going to put up with this, because the previous more aggressive inlining
(which treated 'noFactor' as work-free) was duplicating primops, which
in turn was making inner loops of array calculations runs slow (#5623)
\begin{code}
exprIsWorkFree :: CoreExpr -> Bool
-- See Note [exprIsWorkFree]
exprIsWorkFree e = go 0 e
where -- n is the number of value arguments
go _ (Lit {}) = True
go _ (Type {}) = True
go _ (Coercion {}) = True
go n (Cast e _) = go n e
go n (Case scrut _ _ alts) = foldl (&&) (exprIsWorkFree scrut)
[ go n rhs | (_,_,rhs) <- alts ]
-- See Note [Case expressions are work-free]
go _ (Let {}) = False
go n (Var v) = n==0 || n < idArity v
go n (Tick t e) | tickishCounts t = False
| otherwise = go n e
go n (Lam x e) | isRuntimeVar x = n==0 || go (n-1) e
| otherwise = go n e
go n (App f e) | isRuntimeArg e = exprIsWorkFree e && go (n+1) f
| otherwise = go n f
\end{code}
Note [Case expressions are work-free]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Are case-expressions work-free? Consider
let v = case x of (p,q) -> p
go = \y -> ...case v of ...
Should we inline 'v' at its use site inside the loop? At the moment
we do. I experimented with saying that case are *not* work-free, but
that increased allocation slightly. It's a fairly small effect, and at
the moment we go for the slightly more aggressive version which treats
(case x of ....) as work-free if the alterantives are.
Note [exprIsCheap] See also Note [Interaction of exprIsCheap and lone variables]
~~~~~~~~~~~~~~~~~~ in CoreUnfold.lhs
@exprIsCheap@ looks at a Core expression and returns \tr{True} if
......
......@@ -419,7 +419,7 @@ instance Outputable Unfolding where
<+> ppr con <+> brackets (pprWithCommas ppr ops)
ppr (CoreUnfolding { uf_src = src
, uf_tmpl=rhs, uf_is_top=top, uf_is_value=hnf
, uf_is_conlike=conlike, uf_is_cheap=cheap
, uf_is_conlike=conlike, uf_is_work_free=wf
, uf_expandable=exp, uf_guidance=g, uf_arity=arity})
= ptext (sLit "Unf") <> braces (pp_info $$ pp_rhs)
where
......@@ -429,7 +429,7 @@ instance Outputable Unfolding where
, ptext (sLit "Arity=") <> int arity
, ptext (sLit "Value=") <> ppr hnf
, ptext (sLit "ConLike=") <> ppr conlike
, ptext (sLit "Cheap=") <> ppr cheap
, ptext (sLit "WorkFree=") <> ppr wf
, ptext (sLit "Expandable=") <> ppr exp
, ptext (sLit "Guidance=") <> ppr g ]
pp_tmpl = ptext (sLit "Tmpl=") <+> ppr rhs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment