CoreUnfold.hs 65.4 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2 3 4
{-
(c) The University of Glasgow 2006
(c) The AQUA Project, Glasgow University, 1994-1998

Simon Marlow's avatar
Simon Marlow committed
5 6

Core-syntax unfoldings
7 8 9 10 11 12 13 14 15

Unfoldings (which can travel across module boundaries) are in Core
syntax (namely @CoreExpr@s).

The type @Unfolding@ sits ``above'' simply-Core-expressions
unfoldings, capturing ``higher-level'' things we know about a binding,
usually things that the simplifier found out (e.g., ``it's a
literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
find, unsurprisingly, a Core expression.
Austin Seipp's avatar
Austin Seipp committed
16
-}
17

18
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
19

20
module CoreUnfold (
21
        Unfolding, UnfoldingGuidance,   -- Abstract types
22

23
        noUnfolding, mkImplicitUnfolding,
24
        mkUnfolding, mkCoreUnfolding,
25
        mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
26 27
        mkInlineUnfolding, mkInlineUnfoldingWithArity,
        mkInlinableUnfolding, mkWwInlineRule,
28
        mkCompulsoryUnfolding, mkDFunUnfolding,
Simon Peyton Jones's avatar
Simon Peyton Jones committed
29
        specUnfolding,
30

31
        ArgSummary(..),
32

33 34
        couldBeSmallEnoughToInline, inlineBoringOk,
        certainlyWillInline, smallEnoughToInline,
35

36
        callSiteInline, CallCtxt(..),
37

38 39
        -- Reexport from CoreSubst (it only live there so it can be used
        -- by the Very Simple Optimiser)
40
        exprIsConApp_maybe, exprIsLiteral_maybe
41 42
    ) where

43 44
#include "HsVersions.h"

45 46
import GhcPrelude

Simon Marlow's avatar
Simon Marlow committed
47
import DynFlags
48
import CoreSyn
49
import OccurAnal        ( occurAnalyseExpr_NoBinderSwap )
50
import CoreOpt
51
import CoreArity       ( manifestArity )
Simon Marlow's avatar
Simon Marlow committed
52 53
import CoreUtils
import Id
54
import Demand          ( isBottomingSig )
Simon Marlow's avatar
Simon Marlow committed
55 56 57 58
import DataCon
import Literal
import PrimOp
import IdInfo
59
import BasicTypes       ( Arity, InlineSpec(..), inlinePragmaSpec )
60
import Type
Simon Marlow's avatar
Simon Marlow committed
61
import PrelNames
62
import TysPrim          ( realWorldStatePrimTy )
63
import Bag
64
import Util
65
import Outputable
66
import ForeignCall
67
import Name
68

69
import qualified Data.ByteString as BS
70
import Data.List
71

Austin Seipp's avatar
Austin Seipp committed
72 73 74
{-
************************************************************************
*                                                                      *
75
\subsection{Making unfoldings}
Austin Seipp's avatar
Austin Seipp committed
76 77 78
*                                                                      *
************************************************************************
-}
79

80
mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
81 82
mkTopUnfolding dflags is_bottoming rhs
  = mkUnfolding dflags InlineRhs True is_bottoming rhs
83

84
mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
85
-- For implicit Ids, do a tiny bit of optimising first
86
mkImplicitUnfolding dflags expr
87
  = mkTopUnfolding dflags False (simpleOptExpr dflags expr)
Simon Marlow's avatar
Simon Marlow committed
88

89 90 91 92 93
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Slight hack: note that mk_inline_rules conservatively sets the
-- top-level flag to True.  It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
94

95
mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
96 97
mkSimpleUnfolding dflags rhs
  = mkUnfolding dflags InlineRhs False False rhs
98

99
mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
100 101 102
mkDFunUnfolding bndrs con ops
  = DFunUnfolding { df_bndrs = bndrs
                  , df_con = con
103
                  , df_args = map occurAnalyseExpr_NoBinderSwap ops }
104
                  -- See Note [Occurrence analysis of unfoldings]
Simon Marlow's avatar
Simon Marlow committed
105

106 107
mkWwInlineRule :: DynFlags -> CoreExpr -> Arity -> Unfolding
mkWwInlineRule dflags expr arity
108
  = mkCoreUnfolding InlineStable True
109
                   (simpleOptExpr dflags expr)
Simon Peyton Jones's avatar
Simon Peyton Jones committed
110 111
                   (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
                            , ug_boring_ok = boringCxtNotOk })
112

113
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
114
mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
115
  = mkCoreUnfolding InlineCompulsory True
116
                    (simpleOptExpr unsafeGlobalDynFlags expr)
Simon Peyton Jones's avatar
Simon Peyton Jones committed
117 118
                    (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
                             , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
119

120 121 122 123 124 125 126 127
mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
mkWorkerUnfolding dflags work_fn
                  (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                                 , uf_is_top = top_lvl })
  | isStableSource src
  = mkCoreUnfolding src top_lvl new_tmpl guidance
  where
128
    new_tmpl = simpleOptExpr dflags (work_fn tmpl)
129
    guidance = calcUnfoldingGuidance dflags False new_tmpl
130 131 132

mkWorkerUnfolding _ _ _ = noUnfolding

133 134 135 136 137 138
-- | Make an unfolding that may be used unsaturated
-- (ug_unsat_ok = unSaturatedOk) and that is reported as having its
-- manifest arity (the number of outer lambdas applications will
-- resolve before doing any work).
mkInlineUnfolding :: CoreExpr -> Unfolding
mkInlineUnfolding expr
139
  = mkCoreUnfolding InlineStable
140
                    True         -- Note [Top-level flag on inline rules]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
141
                    expr' guide
142
  where
143
    expr' = simpleOptExpr unsafeGlobalDynFlags expr
144 145 146 147 148 149 150 151 152 153 154 155 156
    guide = UnfWhen { ug_arity = manifestArity expr'
                    , ug_unsat_ok = unSaturatedOk
                    , ug_boring_ok = boring_ok }
    boring_ok = inlineBoringOk expr'

-- | Make an unfolding that will be used once the RHS has been saturated
-- to the given arity.
mkInlineUnfoldingWithArity :: Arity -> CoreExpr -> Unfolding
mkInlineUnfoldingWithArity arity expr
  = mkCoreUnfolding InlineStable
                    True         -- Note [Top-level flag on inline rules]
                    expr' guide
  where
157
    expr' = simpleOptExpr unsafeGlobalDynFlags expr
158 159 160
    guide = UnfWhen { ug_arity = arity
                    , ug_unsat_ok = needSaturated
                    , ug_boring_ok = boring_ok }
161 162 163 164
    -- See Note [INLINE pragmas and boring contexts] as to why we need to look
    -- at the arity here.
    boring_ok | arity == 0 = True
              | otherwise  = inlineBoringOk expr'
165

166 167
mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkInlinableUnfolding dflags expr
168
  = mkUnfolding dflags InlineStable False False expr'
169
  where
170
    expr' = simpleOptExpr dflags expr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
171

172 173
specUnfolding :: DynFlags -> [Var] -> (CoreExpr -> CoreExpr) -> Arity
              -> Unfolding -> Unfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
174
-- See Note [Specialising unfoldings]
175 176
-- specUnfolding spec_bndrs spec_app arity_decrease unf
--   = \spec_bndrs. spec_app( unf )
177
--
178
specUnfolding dflags spec_bndrs spec_app arity_decrease
179 180 181 182 183 184 185 186 187 188
              df@(DFunUnfolding { df_bndrs = old_bndrs, df_con = con, df_args = args })
  = ASSERT2( arity_decrease == count isId old_bndrs - count isId spec_bndrs, ppr df )
    mkDFunUnfolding spec_bndrs con (map spec_arg args)
      -- There is a hard-to-check assumption here that the spec_app has
      -- enough applications to exactly saturate the old_bndrs
      -- For DFunUnfoldings we transform
      --       \old_bndrs. MkD <op1> ... <opn>
      -- to
      --       \new_bndrs. MkD (spec_app(\old_bndrs. <op1>)) ... ditto <opn>
      -- The ASSERT checks the value part of that
Simon Peyton Jones's avatar
Simon Peyton Jones committed
189
  where
190
    spec_arg arg = simpleOptExpr dflags (spec_app (mkLams old_bndrs arg))
191 192
                   -- The beta-redexes created by spec_app will be
                   -- simplified away by simplOptExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
193

194
specUnfolding dflags spec_bndrs spec_app arity_decrease
Simon Peyton Jones's avatar
Simon Peyton Jones committed
195 196 197 198
              (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                             , uf_is_top = top_lvl
                             , uf_guidance = old_guidance })
 | isStableSource src  -- See Note [Specialising unfoldings]
199 200
 , UnfWhen { ug_arity     = old_arity
           , ug_unsat_ok  = unsat_ok
Simon Peyton Jones's avatar
Simon Peyton Jones committed
201
           , ug_boring_ok = boring_ok } <- old_guidance
202 203
 = let guidance = UnfWhen { ug_arity     = old_arity - arity_decrease
                          , ug_unsat_ok  = unsat_ok
Simon Peyton Jones's avatar
Simon Peyton Jones committed
204
                          , ug_boring_ok = boring_ok }
205
       new_tmpl = simpleOptExpr dflags (mkLams spec_bndrs (spec_app tmpl))
206 207
                   -- The beta-redexes created by spec_app will be
                   -- simplified away by simplOptExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
208 209 210

   in mkCoreUnfolding src top_lvl new_tmpl guidance

211
specUnfolding _ _ _ _ _ = noUnfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
212

213 214
{- Note [Specialising unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Simon Peyton Jones's avatar
Simon Peyton Jones committed
215 216 217 218 219 220 221 222 223 224
When we specialise a function for some given type-class arguments, we use
specUnfolding to specialise its unfolding.  Some important points:

* If the original function has a DFunUnfolding, the specialised one
  must do so too!  Otherwise we lose the magic rules that make it
  interact with ClassOps

* There is a bit of hack for INLINABLE functions:
     f :: Ord a => ....
     f = <big-rhs>
225
     {- INLINABLE f #-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
226
  Now if we specialise f, should the specialised version still have
227
  an INLINABLE pragma?  If it does, we'll capture a specialised copy
Simon Peyton Jones's avatar
Simon Peyton Jones committed
228 229 230 231 232
  of <big-rhs> as its unfolding, and that probaby won't inline.  But
  if we don't, the specialised version of <big-rhs> might be small
  enough to inline at a call site. This happens with Control.Monad.liftM3,
  and can cause a lot more allocation as a result (nofib n-body shows this).

233
  Moreover, keeping the INLINABLE thing isn't much help, because
Simon Peyton Jones's avatar
Simon Peyton Jones committed
234 235 236 237 238 239
  the specialised function (probaby) isn't overloaded any more.

  Conclusion: drop the INLINEALE pragma.  In practice what this means is:
     if a stable unfolding has UnfoldingGuidance of UnfWhen,
        we keep it (so the specialised thing too will always inline)
     if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
240
        (which arises from INLINABLE), we discard it
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261

Note [Honour INLINE on 0-ary bindings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider

   x = <expensive>
   {-# INLINE x #-}

   f y = ...x...

The semantics of an INLINE pragma is

  inline x at every call site, provided it is saturated;
  that is, applied to at least as many arguments as appear
  on the LHS of the Haskell source definition.

(This soure-code-derived arity is stored in the `ug_arity` field of
the `UnfoldingGuidance`.)

In the example, x's ug_arity is 0, so we should inline it at every use
site.  It's rare to have such an INLINE pragma (usually INLINE Is on
262
functions), but it's occasionally very important (#15578, #15519).
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
In #15519 we had something like
   x = case (g a b) of I# r -> T r
   {-# INLINE x #-}
   f y = ...(h x)....

where h is strict.  So we got
   f y = ...(case g a b of I# r -> h (T r))...

and that in turn allowed SpecConstr to ramp up performance.

How do we deliver on this?  By adjusting the ug_boring_ok
flag in mkInlineUnfoldingWithArity; see
Note [INLINE pragmas and boring contexts]

NB: there is a real risk that full laziness will float it right back
out again. Consider again
  x = factorial 200
  {-# INLINE x #-}
  f y = ...x...

After inlining we get
  f y = ...(factorial 200)...

but it's entirely possible that full laziness will do
  lvl23 = factorial 200
  f y = ...lvl23...

That's a problem for another day.

Note [INLINE pragmas and boring contexts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An INLINE pragma uses mkInlineUnfoldingWithArity to build the
unfolding.  That sets the ug_boring_ok flag to False if the function
Gabor Greif's avatar
Gabor Greif committed
296
is not tiny (inlineBoringOK), so that even INLINE functions are not
297 298 299 300 301 302 303 304 305 306
inlined in an utterly boring context.  E.g.
     \x y. Just (f y x)
Nothing is gained by inlining f here, even if it has an INLINE
pragma.

But for 0-ary bindings, we want to inline regardless; see
Note [Honour INLINE on 0-ary bindings].

I'm a bit worried that it's possible for the same kind of problem
to arise for non-0-ary functions too, but let's wait and see.
Austin Seipp's avatar
Austin Seipp committed
307
-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
308

309
mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
310
                -> UnfoldingGuidance -> Unfolding
311
-- Occurrence-analyses the expression before capturing it
Simon Peyton Jones's avatar
Simon Peyton Jones committed
312
mkCoreUnfolding src top_lvl expr guidance
313
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr_NoBinderSwap expr,
314
                      -- See Note [Occurrence analysis of unfoldings]
315 316 317
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
318
                    uf_is_conlike   = exprIsConLike    expr,
319 320 321
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_expandable   = exprIsExpandable expr,
                    uf_guidance     = guidance }
322

323 324 325 326 327
mkUnfolding :: DynFlags -> UnfoldingSource
            -> Bool       -- Is top-level
            -> Bool       -- Definitely a bottoming binding
                          -- (only relevant for top-level bindings)
            -> CoreExpr
328
            -> Unfolding
329 330
-- Calculates unfolding guidance
-- Occurrence-analyses the expression before capturing it
331
mkUnfolding dflags src is_top_lvl is_bottoming expr
332
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr_NoBinderSwap expr,
333
                      -- See Note [Occurrence analysis of unfoldings]
334
                    uf_src          = src,
335
                    uf_is_top       = is_top_lvl,
336
                    uf_is_value     = exprIsHNF        expr,
337
                    uf_is_conlike   = exprIsConLike    expr,
338 339 340
                    uf_expandable   = exprIsExpandable expr,
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_guidance     = guidance }
341
  where
342 343
    is_top_bottoming = is_top_lvl && is_bottoming
    guidance         = calcUnfoldingGuidance dflags is_top_bottoming expr
344
        -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr_NoBinderSwap expr))!
345
        -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
346

Austin Seipp's avatar
Austin Seipp committed
347
{-
348 349 350 351 352 353 354 355 356 357 358
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We do occurrence-analysis of unfoldings once and for all, when the
unfolding is built, rather than each time we inline them.

But given this decision it's vital that we do
*always* do it.  Consider this unfolding
    \x -> letrec { f = ...g...; g* = f } in body
where g* is (for some strange reason) the loop breaker.  If we don't
occ-anal it when reading it in, we won't mark g as a loop breaker, and
we may inline g entirely in body, dropping its binding, and leaving
359
the occurrence in f out of scope. This happened in #8892, where
360 361 362 363 364 365
the unfolding in question was a DFun unfolding.

But more generally, the simplifier is designed on the
basis that it is looking at occurrence-analysed expressions, so better
ensure that they acutally are.

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
We use occurAnalyseExpr_NoBinderSwap instead of occurAnalyseExpr;
see Note [No binder swap in unfoldings].

Note [No binder swap in unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The binder swap can temporarily violate Core Lint, by assinging
a LocalId binding to a GlobalId. For example, if A.foo{r872}
is a GlobalId with unique r872, then

 case A.foo{r872} of bar {
   K x -> ...(A.foo{r872})...
 }

gets transformed to

  case A.foo{r872} of bar {
    K x -> let foo{r872} = bar
           in ...(A.foo{r872})...

This is usually not a problem, because the simplifier will transform
this to:

  case A.foo{r872} of bar {
    K x -> ...(bar)...

However, after occurrence analysis but before simplification, this extra 'let'
violates the Core Lint invariant that we do not have local 'let' bindings for
GlobalIds.  That seems (just) tolerable for the occurrence analysis that happens
just before the Simplifier, but not for unfoldings, which are Linted
independently.
As a quick workaround, we disable binder swap in this module.
397
See #16288 and #16296 for further plans.
398

399 400 401 402 403 404 405 406 407 408
Note [Calculate unfolding guidance on the non-occ-anal'd expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Notice that we give the non-occur-analysed expression to
calcUnfoldingGuidance.  In some ways it'd be better to occur-analyse
first; for example, sometimes during simplification, there's a large
let-bound thing which has been substituted, and so is now dead; so
'expr' contains two copies of the thing while the occurrence-analysed
expression doesn't.

Nevertheless, we *don't* and *must not* occ-analyse before computing
409
the size because
410 411 412 413

a) The size computation bales out after a while, whereas occurrence
   analysis does not.

414 415
b) Residency increases sharply if you occ-anal first.  I'm not
   100% sure why, but it's a large effect.  Compiling Cabal went
416 417 418 419 420 421 422
   from residency of 534M to over 800M with this one change.

This can occasionally mean that the guidance is very pessimistic;
it gets fixed up next round.  And it should be rare, because large
let-bound things that are dead are usually caught by preInlineUnconditionally


Austin Seipp's avatar
Austin Seipp committed
423 424
************************************************************************
*                                                                      *
425
\subsection{The UnfoldingGuidance type}
Austin Seipp's avatar
Austin Seipp committed
426 427 428
*                                                                      *
************************************************************************
-}
429

430 431
inlineBoringOk :: CoreExpr -> Bool
-- See Note [INLINE for small functions]
432
-- True => the result of inlining the expression is
433 434 435 436 437 438 439 440 441 442 443 444
--         no bigger than the expression itself
--     eg      (\x y -> f y x)
-- This is a quick and dirty version. It doesn't attempt
-- to deal with  (\x y z -> x (y z))
-- The really important one is (x `cast` c)
inlineBoringOk e
  = go 0 e
  where
    go :: Int -> CoreExpr -> Bool
    go credit (Lam x e) | isId x           = go (credit+1) e
                        | otherwise        = go credit e
    go credit (App f (Type {}))            = go credit f
445
    go credit (App f a) | credit > 0
446
                        , exprIsTrivial a  = go (credit-1) f
447
    go credit (Tick _ e)                 = go credit e -- dubious
448 449 450
    go credit (Cast e _)                   = go credit e
    go _      (Var {})                     = boringCxtOk
    go _      _                            = boringCxtNotOk
451

452
calcUnfoldingGuidance
453
        :: DynFlags
454 455
        -> Bool          -- Definitely a top-level, bottoming binding
        -> CoreExpr      -- Expression to look at
Simon Peyton Jones's avatar
Simon Peyton Jones committed
456
        -> UnfoldingGuidance
457
calcUnfoldingGuidance dflags is_top_bottoming (Tick t expr)
Peter Wortmann's avatar
Peter Wortmann committed
458
  | not (tickishIsCode t)  -- non-code ticks don't matter for unfolding
459 460
  = calcUnfoldingGuidance dflags is_top_bottoming expr
calcUnfoldingGuidance dflags is_top_bottoming expr
461
  = case sizeExpr dflags bOMB_OUT_SIZE val_bndrs body of
Simon Peyton Jones's avatar
Simon Peyton Jones committed
462 463
      TooBig -> UnfNever
      SizeIs size cased_bndrs scrut_discount
464
        | uncondInline expr n_val_bndrs size
Simon Peyton Jones's avatar
Simon Peyton Jones committed
465 466 467
        -> UnfWhen { ug_unsat_ok = unSaturatedOk
                   , ug_boring_ok =  boringCxtOk
                   , ug_arity = n_val_bndrs }   -- Note [INLINE for small functions]
468 469 470 471

        | is_top_bottoming
        -> UnfNever   -- See Note [Do not inline top-level bottoming functions]

Simon Peyton Jones's avatar
Simon Peyton Jones committed
472 473
        | otherwise
        -> UnfIfGoodArgs { ug_args  = map (mk_discount cased_bndrs) val_bndrs
474 475
                         , ug_size  = size
                         , ug_res   = scrut_discount }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
476 477 478 479 480 481 482 483 484 485

  where
    (bndrs, body) = collectBinders expr
    bOMB_OUT_SIZE = ufCreationThreshold dflags
           -- Bomb out if size gets bigger than this
    val_bndrs   = filter isId bndrs
    n_val_bndrs = length val_bndrs

    mk_discount :: Bag (Id,Int) -> Id -> Int
    mk_discount cbs bndr = foldlBag combine 0 cbs
486
           where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
487
             combine acc (bndr', disc)
488 489
               | bndr == bndr' = acc `plus_disc` disc
               | otherwise     = acc
Simon Peyton Jones's avatar
Simon Peyton Jones committed
490

491 492 493 494
             plus_disc :: Int -> Int -> Int
             plus_disc | isFunTy (idType bndr) = max
                       | otherwise             = (+)
             -- See Note [Function and non-function discounts]
495

Austin Seipp's avatar
Austin Seipp committed
496
{-
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
Note [Computing the size of an expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
heuristics right has taken a long time.  Here's the basic strategy:

    * Variables, literals: 0
      (Exception for string literals, see litSize.)

    * Function applications (f e1 .. en): 1 + #value args

    * Constructor applications: 1, regardless of #args

    * Let(rec): 1 + size of components

    * Note, cast: 0

Examples

515
  Size  Term
516
  --------------
517 518
    0     42#
    0     x
519
    0     True
520 521 522
    2     f x
    1     Just x
    4     f (g x)
523 524

Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
525
a function call to account for.  Notice also that constructor applications
526 527
are very cheap, because exposing them to a caller is so valuable.

528 529 530 531
[25/5/11] All sizes are now multiplied by 10, except for primops
(which have sizes like 1 or 4.  This makes primops look fantastically
cheap, and seems to be almost unversally beneficial.  Done partly as a
result of #4978.
532 533 534

Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
535
The FloatOut pass has gone to some trouble to float out calls to 'error'
536 537 538 539
and similar friends.  See Note [Bottoming floats] in SetLevels.
Do not re-inline them!  But we *do* still inline if they are very small
(the uncondInline stuff).

540 541
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
542
Consider        {-# INLINE f #-}
543 544 545 546 547 548 549 550 551
                f x = Just x
                g y = f y
Then f's RHS is no larger than its LHS, so we should inline it into
even the most boring context.  In general, f the function is
sufficiently small that its body is as small as the call itself, the
inline unconditionally, regardless of how boring the context is.

Things to note:

552 553
(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
    than the thing it's replacing.  Notice that
554 555 556 557 558
      (f x) --> (g 3)             -- YES, unconditionally
      (f x) --> x : []            -- YES, *even though* there are two
                                  --      arguments to the cons
      x     --> g 3               -- NO
      x     --> Just v            -- NO
559

560 561 562 563 564 565 566 567 568 569 570
    It's very important not to unconditionally replace a variable by
    a non-atomic term.

(2) We do this even if the thing isn't saturated, else we end up with the
    silly situation that
       f x y = x
       ...map (f 3)...
    doesn't inline.  Even in a boring context, inlining without being
    saturated will give a lambda instead of a PAP, and will be more
    efficient at runtime.

Simon Peyton Jones's avatar
Simon Peyton Jones committed
571
(3) However, when the function's arity > 0, we do insist that it
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
    has at least one value argument at the call site.  (This check is
    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
         f = /\a \x:a. x
         d = /\b. MkD (f b)
    If we inline f here we get
         d = /\b. MkD (\x:b. x)
    and then prepareRhs floats out the argument, abstracting the type
    variables, so we end up with the original again!

(4) We must be much more cautious about arity-zero things. Consider
       let x = y +# z in ...
    In *size* terms primops look very small, because the generate a
    single instruction, but we do not want to unconditionally replace
    every occurrence of x with (y +# z).  So we only do the
    unconditional-inline thing for *trivial* expressions.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
587

588 589 590
    NB: you might think that PostInlineUnconditionally would do this
    but it doesn't fire for top-level things; see SimplUtils
    Note [Top level and postInlineUnconditionally]
Austin Seipp's avatar
Austin Seipp committed
591
-}
592

593
uncondInline :: CoreExpr -> Arity -> Int -> Bool
594 595
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
596
-- See Note [INLINE for small functions]
597
uncondInline rhs arity size
598 599
  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
600

601
sizeExpr :: DynFlags
602
         -> Int             -- Bomb out if it gets bigger than this
603 604 605 606
         -> [Id]            -- Arguments; we're interested in which of these
                            -- get case'd
         -> CoreExpr
         -> ExprSize
607

608 609
-- Note [Computing the size of an expression]

610
sizeExpr dflags bOMB_OUT_SIZE top_args expr
611 612
  = size_up expr
  where
613
    size_up (Cast e _) = size_up e
614
    size_up (Tick _ e) = size_up e
615
    size_up (Type _)   = sizeZero           -- Types cost nothing
616
    size_up (Coercion _) = sizeZero
617
    size_up (Lit lit)  = sizeN (litSize lit)
618 619 620 621
    size_up (Var f) | isRealWorldId f = sizeZero
                      -- Make sure we get constructor discounts even
                      -- on nullary constructors
                    | otherwise       = size_up_call f [] 0
Simon Marlow's avatar
Simon Marlow committed
622

623 624 625 626
    size_up (App fun arg)
      | isTyCoArg arg = size_up fun
      | otherwise     = size_up arg  `addSizeNSD`
                        size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
627

628 629 630
    size_up (Lam b e)
      | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
      | otherwise = size_up e
631 632

    size_up (Let (NonRec binder rhs) body)
lukemaurer's avatar
lukemaurer committed
633 634 635
      = size_up_rhs (binder, rhs) `addSizeNSD`
        size_up body              `addSizeN`
        size_up_alloc binder
636 637

    size_up (Let (Rec pairs) body)
lukemaurer's avatar
lukemaurer committed
638 639
      = foldr (addSizeNSD . size_up_rhs)
              (size_up body `addSizeN` sum (map (size_up_alloc . fst) pairs))
640
              pairs
641

642
    size_up (Case e _ _ alts)
643 644 645 646 647
        | null alts
        = size_up e    -- case e of {} never returns, so take size of scrutinee

    size_up (Case e _ _ alts)
        -- Now alts is non-empty
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
        | Just v <- is_top_arg e -- We are scrutinising an argument variable
        = let
            alt_sizes = map size_up_alt alts

                  -- alts_size tries to compute a good discount for
                  -- the case when we are scrutinising an argument variable
            alts_size (SizeIs tot tot_disc tot_scrut)
                          -- Size of all alternatives
                      (SizeIs max _        _)
                          -- Size of biggest alternative
                  = SizeIs tot (unitBag (v, 20 + tot - max)
                      `unionBags` tot_disc) tot_scrut
                          -- If the variable is known, we produce a
                          -- discount that will take us back to 'max',
                          -- the size of the largest alternative The
                          -- 1+ is a little discount for reduced
                          -- allocation in the caller
                          --
                          -- Notice though, that we return tot_disc,
                          -- the total discount from all branches.  I
                          -- think that's right.

            alts_size tot_size _ = tot_size
          in
672 673
          alts_size (foldr1 addAltSize alt_sizes)  -- alts is non-empty
                    (foldr1 maxSize    alt_sizes)
674 675 676 677
                -- Good to inline if an arg is scrutinised, because
                -- that may eliminate allocation in the caller
                -- And it eliminates the case itself
        where
678 679 680 681
          is_top_arg (Var v) | v `elem` top_args = Just v
          is_top_arg (Cast e _) = is_top_arg e
          is_top_arg _ = Nothing

682

Simon Marlow's avatar
Simon Marlow committed
683
    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
684 685 686
                                foldr (addAltSize . size_up_alt) case_size alts
      where
          case_size
687
           | is_inline_scrut e, lengthAtMost alts 1 = sizeN (-10)
688 689 690 691 692 693 694 695 696 697 698 699 700 701
           | otherwise = sizeZero
                -- Normally we don't charge for the case itself, but
                -- we charge one per alternative (see size_up_alt,
                -- below) to account for the cost of the info table
                -- and comparisons.
                --
                -- However, in certain cases (see is_inline_scrut
                -- below), no code is generated for the case unless
                -- there are multiple alts.  In these cases we
                -- subtract one, making the first alt free.
                -- e.g. case x# +# y# of _ -> ...   should cost 1
                --      case touch# x# of _ -> ...  should cost 0
                -- (see #4978)
                --
702
                -- I would like to not have the "lengthAtMost alts 1"
703 704 705 706 707 708
                -- condition above, but without that some programs got worse
                -- (spectral/hartel/event and spectral/para).  I don't fully
                -- understand why. (SDM 24/5/11)

                -- unboxed variables, inline primops and unsafe foreign calls
                -- are all "inline" things:
709
          is_inline_scrut (Var v) = isUnliftedType (idType v)
710 711 712 713 714 715 716 717
          is_inline_scrut scrut
              | (Var f, _) <- collectArgs scrut
                = case idDetails f of
                    FCallId fc  -> not (isSafeForeignCall fc)
                    PrimOpId op -> not (primOpOutOfLine op)
                    _other      -> False
              | otherwise
                = False
718

lukemaurer's avatar
lukemaurer committed
719 720 721 722 723 724 725 726
    size_up_rhs (bndr, rhs)
      | Just join_arity <- isJoinId_maybe bndr
        -- Skip arguments to join point
      , (_bndrs, body) <- collectNBinders join_arity rhs
      = size_up body
      | otherwise
      = size_up rhs

727
    ------------
728
    -- size_up_app is used when there's ONE OR MORE value args
729
    size_up_app (App fun arg) args voids
730 731 732
        | isTyCoArg arg                  = size_up_app fun args voids
        | isRealWorldExpr arg            = size_up_app fun (arg:args) (voids + 1)
        | otherwise                      = size_up arg  `addSizeNSD`
733 734
                                           size_up_app fun (arg:args) voids
    size_up_app (Var fun)     args voids = size_up_call fun args voids
Peter Wortmann's avatar
Peter Wortmann committed
735
    size_up_app (Tick _ expr) args voids = size_up_app expr args voids
736 737 738 739 740 741
    size_up_app (Cast expr _) args voids = size_up_app expr args voids
    size_up_app other         args voids = size_up other `addSizeN`
                                           callSize (length args) voids
       -- if the lhs is not an App or a Var, or an invisible thing like a
       -- Tick or Cast, then we should charge for a complete call plus the
       -- size of the lhs itself.
742

743
    ------------
744 745
    size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
    size_up_call fun val_args voids
746
       = case idDetails fun of
747
           FCallId _        -> sizeN (callSize (length val_args) voids)
748 749
           DataConWorkId dc -> conSize    dc (length val_args)
           PrimOpId op      -> primOpSize op (length val_args)
750 751
           ClassOpId _      -> classOpSize dflags top_args val_args
           _                -> funSize dflags top_args fun (length val_args) voids
752

753
    ------------
754
    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
755 756 757
        -- Don't charge for args, so that wrappers look cheap
        -- (See comments about wrappers with Case)
        --
Gabor Greif's avatar
Gabor Greif committed
758
        -- IMPORTANT: *do* charge 1 for the alternative, else we
759
        -- find that giant case nests are treated as practically free
760
        -- A good example is Foreign.C.Error.errnoToIOError
761

lukemaurer's avatar
lukemaurer committed
762 763 764 765 766
    ------------
    -- Cost to allocate binding with given binder
    size_up_alloc bndr
      |  isTyVar bndr                 -- Doesn't exist at runtime
      || isJoinId bndr                -- Not allocated at all
767
      || isUnliftedType (idType bndr) -- Doesn't live in heap
lukemaurer's avatar
lukemaurer committed
768 769 770 771
      = 0
      | otherwise
      = 10

772
    ------------
773 774
        -- These addSize things have to be here because
        -- I don't want to give them bOMB_OUT_SIZE as an argument
775
    addSizeN TooBig          _  = TooBig
776
    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n + m) xs d
777

778
        -- addAltSize is used to add the sizes of case alternatives
779 780 781
    addAltSize TooBig            _      = TooBig
    addAltSize _                 TooBig = TooBig
    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
782
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
783
                                 (xs `unionBags` ys)
784
                                 (d1 + d2) -- Note [addAltSize result discounts]
785 786

        -- This variant ignores the result discount from its LEFT argument
787 788 789 790
        -- It's used when the second argument isn't part of the result
    addSizeNSD TooBig            _      = TooBig
    addSizeNSD _                 TooBig = TooBig
    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
791
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
792
                                 (xs `unionBags` ys)
793
                                 d2  -- Ignore d1
794 795 796 797

    isRealWorldId id = idType id `eqType` realWorldStatePrimTy

    -- an expression of type State# RealWorld must be a variable
Peter Wortmann's avatar
Peter Wortmann committed
798 799 800
    isRealWorldExpr (Var id)   = isRealWorldId id
    isRealWorldExpr (Tick _ e) = isRealWorldExpr e
    isRealWorldExpr _          = False
801

802 803 804
-- | Finds a nominal size of a string literal.
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
805 806
litSize (LitNumber LitNumInteger _ _) = 100   -- Note [Size of literal integers]
litSize (LitNumber LitNumNatural _ _) = 100
Sylvain Henry's avatar
Sylvain Henry committed
807
litSize (LitString str) = 10 + 10 * ((BS.length str + 3) `div` 4)
808 809 810
        -- If size could be 0 then @f "x"@ might be too small
        -- [Sept03: make literal strings a bit bigger to avoid fruitless
        --  duplication of little strings]
811
litSize _other = 0    -- Must match size of nullary constructors
812 813
                      -- Key point: if  x |-> 4, then x must inline unconditionally
                      --            (eg via case binding)
814

815
classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
816
-- See Note [Conlike is interesting]
817
classOpSize _ _ []
818
  = sizeZero
819
classOpSize dflags top_args (arg1 : other_args)
820
  = SizeIs size arg_discount 0
821
  where
822
    size = 20 + (10 * length other_args)
823 824 825 826
    -- If the class op is scrutinising a lambda bound dictionary then
    -- give it a discount, to encourage the inlining of this function
    -- The actual discount is rather arbitrarily chosen
    arg_discount = case arg1 of
827 828 829 830
                     Var dict | dict `elem` top_args
                              -> unitBag (dict, ufDictDiscount dflags)
                     _other   -> emptyBag

831 832 833 834 835 836
-- | The size of a function call
callSize
 :: Int  -- ^ number of value args
 -> Int  -- ^ number of value args that are void
 -> Int
callSize n_val_args voids = 10 * (1 + n_val_args - voids)
Simon Peyton Jones's avatar
Simon Peyton Jones committed
837 838 839
        -- The 1+ is for the function itself
        -- Add 1 for each non-trivial arg;
        -- the allocation cost, as in let(rec)
840

lukemaurer's avatar
lukemaurer committed
841 842 843 844 845 846 847 848 849 850 851
-- | The size of a jump to a join point
jumpSize
 :: Int  -- ^ number of value args
 -> Int  -- ^ number of value args that are void
 -> Int
jumpSize n_val_args voids = 2 * (1 + n_val_args - voids)
  -- A jump is 20% the size of a function call. Making jumps free reopens
  -- bug #6048, but making them any more expensive loses a 21% improvement in
  -- spectral/puzzle. TODO Perhaps adjusting the default threshold would be a
  -- better solution?

852
funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
853 854
-- Size for functions that are not constructors or primops
-- Note [Function applications]
855
funSize dflags top_args fun n_val_args voids
856 857
  | fun `hasKey` buildIdKey   = buildSize
  | fun `hasKey` augmentIdKey = augmentSize
858
  | otherwise = SizeIs size arg_discount res_discount
859 860
  where
    some_val_args = n_val_args > 0
lukemaurer's avatar
lukemaurer committed
861
    is_join = isJoinId fun
862

lukemaurer's avatar
lukemaurer committed
863 864 865
    size | is_join              = jumpSize n_val_args voids
         | not some_val_args    = 0
         | otherwise            = callSize n_val_args voids
866

867
        --                  DISCOUNTS
868 869
        --  See Note [Function and non-function discounts]
    arg_discount | some_val_args && fun `elem` top_args
870 871 872 873
                 = unitBag (fun, ufFunAppDiscount dflags)
                 | otherwise = emptyBag
        -- If the function is an argument and is applied
        -- to some values, give it an arg-discount
874

875
    res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
876
                 | otherwise                = 0
877
        -- If the function is partially applied, show a result discount
Gabor Greif's avatar
Gabor Greif committed
878
-- XXX maybe behave like ConSize for eval'd variable
879

880 881
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
882
  | n_val_args == 0 = SizeIs 0 emptyBag 10    -- Like variables
883

884
-- See Note [Unboxed tuple size and result discount]
885
  | isUnboxedTupleCon dc = SizeIs 0 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
886

887
-- See Note [Constructor size and result discount]
888
  | otherwise = SizeIs 10 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
889

890 891
-- XXX still looks to large to me

Austin Seipp's avatar
Austin Seipp committed
892
{-
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
Note [Constructor size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Treat a constructors application as size 10, regardless of how many
arguments it has; we are keen to expose them (and we charge separately
for their args).  We can't treat them as size zero, else we find that
(Just x) has size 0, which is the same as a lone variable; and hence
'v' will always be replaced by (Just x), where v is bound to Just x.

The "result discount" is applied if the result of the call is
scrutinised (say by a case).  For a constructor application that will
mean the constructor application will disappear, so we don't need to
charge it to the function.  So the discount should at least match the
cost of the constructor application, namely 10.  But to give a bit
of extra incentive we give a discount of 10*(1 + n_val_args).

908
Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
909
and said it was an "unambiguous win", but its terribly dangerous
Gabor Greif's avatar
Gabor Greif committed
910
because a function with many many case branches, each finishing with
911
a constructor, can have an arbitrarily large discount.  This led to
912
terrible code bloat: see #6099.
913 914 915

Note [Unboxed tuple size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
916 917
However, unboxed tuples count as size zero. I found occasions where we had
        f x y z = case op# x y z of { s -> (# s, () #) }
918 919 920 921 922 923 924 925 926 927 928 929 930
and f wasn't getting inlined.

I tried giving unboxed tuples a *result discount* of zero (see the
commented-out line).  Why?  When returned as a result they do not
allocate, so maybe we don't want to charge so much for them If you
have a non-zero discount here, we find that workers often get inlined
back into wrappers, because it look like
    f x = case $wf x of (# a,b #) -> (a,b)
and we are keener because of the case.  However while this change
shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
more. All other changes were very small. So it's not a big deal but I
didn't adopt the idea.

931 932
Note [Function and non-function discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
933 934 935 936 937
We want a discount if the function is applied. A good example is
monadic combinators with continuation arguments, where inlining is
quite important.

But we don't want a big discount when a function is called many times
938
(see the detailed comments with #6048) because if the function is
939 940
big it won't be inlined at its many call sites and no benefit results.
Indeed, we can get exponentially big inlinings this way; that is what
941
#6048 is about.
942

943 944 945 946 947 948 949 950 951
On the other hand, for data-valued arguments, if there are lots of
case expressions in the body, each one will get smaller if we apply
the function to a constructor application, so we *want* a big discount
if the argument is scrutinised by many case expressions.

Conclusion:
  - For functions, take the max of the discounts
  - For data values, take the sum of the discounts

952

953 954 955
Note [Literal integer size]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Literal integers *can* be big (mkInteger [...coefficients...]), but
956
need not be (S# n).  We just use an arbitrary big-ish constant here
957 958
so that, in particular, we don't inline top-level defns like
   n = S# 5
Gabor Greif's avatar
Gabor Greif committed
959
There's no point in doing so -- any optimisations will see the S#
960 961 962
through n's unfolding.  Nor will a big size inhibit unfoldings functions
that mention a literal Integer, because the float-out pass will float
all those constants to top level.
Austin Seipp's avatar
Austin Seipp committed
963
-}
964

965
primOpSize :: PrimOp -> Int -> ExprSize
966
primOpSize op n_val_args
967 968 969 970 971
 = if primOpOutOfLine op
      then sizeN (op_size + n_val_args)
      else sizeN op_size
 where
   op_size = primOpCodeSize op
972

973

974
buildSize :: ExprSize
975
buildSize = SizeIs 0 emptyBag 40
976 977
        -- We really want to inline applications of build
        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
Gabor Greif's avatar
Gabor Greif committed
978
        -- Indeed, we should add a result_discount because build is
979 980 981
        -- very like a constructor.  We don't bother to check that the
        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
        -- The "4" is rather arbitrary.
982

983
augmentSize :: ExprSize
984
augmentSize = SizeIs 0 emptyBag 40
985 986
        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
        -- e plus ys. The -2 accounts for the \cn
987

988
-- When we return a lambda, give a discount if it's used (applied)
989
lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
990
lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (ufFunAppDiscount dflags)
991
lamScrutDiscount _      TooBig          = TooBig
992

Austin Seipp's avatar
Austin Seipp committed
993
{-
994 995 996 997 998
Note [addAltSize result discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When adding the size of alternatives, we *add* the result discounts
too, rather than take the *maximum*.  For a multi-branch case, this
gives a discount for each branch that returns a constructor, making us
999
keener to inline.  I did try using 'max' instead, but it makes nofib
1000 1001 1002
'rewrite' and 'puzzle' allocate significantly more, and didn't make
binary sizes shrink significantly either.

1003 1004
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1005 1006
Constants for discounts and thesholds are defined in main/DynFlags,
all of form ufXxxx.   They are:
1007

1008
ufCreationThreshold
1009 1010 1011
     At a definition site, if the unfolding is bigger than this, we
     may discard it altogether

1012
ufUseThreshold
1013 1014 1015
     At a call site, if the unfolding, less discounts, is smaller than
     this, then it's small enough inline

1016
ufKeenessFactor
1017
     Factor by which the discounts are multiplied before
1018 1019
     subtracting from size

1020
ufDictDiscount
1021 1022 1023 1024
     The discount for each occurrence of a dictionary argument
     as an argument of a class method.  Should be pretty small
     else big functions may get inlined

1025
ufFunAppDiscount
1026 1027 1028
     Discount for a function argument that is applied.  Quite
     large, because if we inline we avoid the higher-order call.

1029
ufDearOp
1030 1031
     The size of a foreign call or not-dupable PrimOp

1032 1033 1034 1035 1036
ufVeryAggressive
     If True, the compiler ignores all the thresholds and inlines very
     aggressively. It still adheres to arity, simplifier phase control and
     loop breakers.

1037

1038 1039 1040 1041
Note [Function applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In a function application (f a b)

1042
  - If 'f' is an argument to the function being analysed,
1043 1044 1045 1046
    and there's at least one value arg, record a FunAppDiscount for f

  - If the application if a PAP (arity > 2 in this example)
    record a *result* discount (because inlining
1047
    with "extra" args in the call may mean that we now
1048 1049 1050
    get a saturated application)

Code for manipulating sizes
Austin Seipp's avatar
Austin Seipp committed
1051
-}
1052

Gabor Greif's avatar
Gabor Greif committed
1053
-- | The size of a candidate expression for unfolding
1054 1055 1056 1057 1058 1059 1060 1061 1062
data ExprSize
    = TooBig
    | SizeIs { _es_size_is  :: {-# UNPACK #-} !Int -- ^ Size found
             , _es_args     :: !(Bag (Id,Int))
               -- ^ Arguments cased herein, and discount for each such
             , _es_discount :: {-# UNPACK #-} !Int
               -- ^ Size to subtract if result is scrutinised by a case
               -- expression
             }
1063 1064

instance Outputable ExprSize where
1065
  ppr TooBig         = text "TooBig"
1066
  ppr (SizeIs a _ c) = brackets (int a <+> int c)
1067 1068 1069

-- subtract the discount before deciding whether to bale out. eg. we
-- want to inline a large constructor application into a selector:
1070 1071
--      tup = (a_1, ..., a_99)
--      x = case tup of ...
1072
--
1073 1074 1075
mkSizeIs :: Int -> Int -> Bag (Id, Int) -> Int -> ExprSize
mkSizeIs max n xs d | (n - d) > max = TooBig
                    | otherwise     = SizeIs n xs d
1076

1077
maxSize :: ExprSize -> ExprSize -> ExprSize
1078 1079
maxSize TooBig         _                                  = TooBig
maxSize _              TooBig                             = TooBig
1080
maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 > n2   = s1
1081
                                              | otherwise = s2
1082