CoreUnfold.hs 59.4 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2 3 4
{-
(c) The University of Glasgow 2006
(c) The AQUA Project, Glasgow University, 1994-1998

Simon Marlow's avatar
Simon Marlow committed
5 6

Core-syntax unfoldings
7 8 9 10 11 12 13 14 15

Unfoldings (which can travel across module boundaries) are in Core
syntax (namely @CoreExpr@s).

The type @Unfolding@ sits ``above'' simply-Core-expressions
unfoldings, capturing ``higher-level'' things we know about a binding,
usually things that the simplifier found out (e.g., ``it's a
literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
find, unsurprisingly, a Core expression.
Austin Seipp's avatar
Austin Seipp committed
16
-}
17

18
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
19

20
module CoreUnfold (
21
        Unfolding, UnfoldingGuidance,   -- Abstract types
22

23
        noUnfolding, mkImplicitUnfolding,
24
        mkUnfolding, mkCoreUnfolding,
25
        mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
26 27
        mkInlineUnfolding, mkInlineUnfoldingWithArity,
        mkInlinableUnfolding, mkWwInlineRule,
28
        mkCompulsoryUnfolding, mkDFunUnfolding,
Simon Peyton Jones's avatar
Simon Peyton Jones committed
29
        specUnfolding,
30

31
        ArgSummary(..),
32

33 34
        couldBeSmallEnoughToInline, inlineBoringOk,
        certainlyWillInline, smallEnoughToInline,
35

36
        callSiteInline, CallCtxt(..),
37

38 39
        -- Reexport from CoreSubst (it only live there so it can be used
        -- by the Very Simple Optimiser)
40
        exprIsConApp_maybe, exprIsLiteral_maybe
41 42
    ) where

43 44
#include "HsVersions.h"

Simon Marlow's avatar
Simon Marlow committed
45
import DynFlags
46
import CoreSyn
47
import PprCore          ()      -- Instances
48
import OccurAnal        ( occurAnalyseExpr )
49
import CoreSubst hiding( substTy )
50
import CoreArity       ( manifestArity )
Simon Marlow's avatar
Simon Marlow committed
51 52 53 54 55 56
import CoreUtils
import Id
import DataCon
import Literal
import PrimOp
import IdInfo
57
import BasicTypes       ( Arity )
58
import Type
Simon Marlow's avatar
Simon Marlow committed
59
import PrelNames
60
import TysPrim          ( realWorldStatePrimTy )
61
import Bag
62
import Util
63
import Outputable
64 65
import ForeignCall

66
import qualified Data.ByteString as BS
67

Austin Seipp's avatar
Austin Seipp committed
68 69 70
{-
************************************************************************
*                                                                      *
71
\subsection{Making unfoldings}
Austin Seipp's avatar
Austin Seipp committed
72 73 74
*                                                                      *
************************************************************************
-}
75

76
mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
77 78
mkTopUnfolding dflags is_bottoming rhs
  = mkUnfolding dflags InlineRhs True is_bottoming rhs
79

80
mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
81
-- For implicit Ids, do a tiny bit of optimising first
82
mkImplicitUnfolding dflags expr
83
  = mkTopUnfolding dflags False (simpleOptExpr expr)
Simon Marlow's avatar
Simon Marlow committed
84

85 86 87 88 89
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Slight hack: note that mk_inline_rules conservatively sets the
-- top-level flag to True.  It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
90

91
mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
92 93
mkSimpleUnfolding dflags rhs
  = mkUnfolding dflags InlineRhs False False rhs
94

95
mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
96 97 98 99 100
mkDFunUnfolding bndrs con ops
  = DFunUnfolding { df_bndrs = bndrs
                  , df_con = con
                  , df_args = map occurAnalyseExpr ops }
                  -- See Note [Occurrrence analysis of unfoldings]
Simon Marlow's avatar
Simon Marlow committed
101

102 103
mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
mkWwInlineRule expr arity
104
  = mkCoreUnfolding InlineStable True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
105 106 107
                   (simpleOptExpr expr)
                   (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
                            , ug_boring_ok = boringCxtNotOk })
108

twanvl's avatar
twanvl committed
109
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
110
mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
111
  = mkCoreUnfolding InlineCompulsory True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
112 113 114
                    (simpleOptExpr expr)
                    (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
                             , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
115

116 117 118 119 120 121 122 123 124
mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
mkWorkerUnfolding dflags work_fn
                  (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                                 , uf_is_top = top_lvl })
  | isStableSource src
  = mkCoreUnfolding src top_lvl new_tmpl guidance
  where
    new_tmpl = simpleOptExpr (work_fn tmpl)
125
    guidance = calcUnfoldingGuidance dflags False new_tmpl
126 127 128

mkWorkerUnfolding _ _ _ = noUnfolding

129 130 131 132 133 134
-- | Make an unfolding that may be used unsaturated
-- (ug_unsat_ok = unSaturatedOk) and that is reported as having its
-- manifest arity (the number of outer lambdas applications will
-- resolve before doing any work).
mkInlineUnfolding :: CoreExpr -> Unfolding
mkInlineUnfolding expr
135
  = mkCoreUnfolding InlineStable
136
                    True         -- Note [Top-level flag on inline rules]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
137
                    expr' guide
138 139
  where
    expr' = simpleOptExpr expr
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
    guide = UnfWhen { ug_arity = manifestArity expr'
                    , ug_unsat_ok = unSaturatedOk
                    , ug_boring_ok = boring_ok }
    boring_ok = inlineBoringOk expr'

-- | Make an unfolding that will be used once the RHS has been saturated
-- to the given arity.
mkInlineUnfoldingWithArity :: Arity -> CoreExpr -> Unfolding
mkInlineUnfoldingWithArity arity expr
  = mkCoreUnfolding InlineStable
                    True         -- Note [Top-level flag on inline rules]
                    expr' guide
  where
    expr' = simpleOptExpr expr
    guide = UnfWhen { ug_arity = arity
                    , ug_unsat_ok = needSaturated
                    , ug_boring_ok = boring_ok }
157
    boring_ok = inlineBoringOk expr'
158

159 160
mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkInlinableUnfolding dflags expr
161
  = mkUnfolding dflags InlineStable False False expr'
162
  where
163
    expr' = simpleOptExpr expr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
164

Simon Peyton Jones's avatar
Simon Peyton Jones committed
165
specUnfolding :: [Var] -> (CoreExpr -> CoreExpr) -> Arity -> Unfolding -> Unfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
166
-- See Note [Specialising unfoldings]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
167 168
-- specUnfolding spec_bndrs spec_app arity_decrease unf
--   = \spec_bndrs. spec_app( unf )
169
--
Simon Peyton Jones's avatar
Simon Peyton Jones committed
170 171 172 173 174 175 176 177 178 179 180
specUnfolding spec_bndrs spec_app arity_decrease
              df@(DFunUnfolding { df_bndrs = old_bndrs, df_con = con, df_args = args })
  = ASSERT2( arity_decrease == count isId old_bndrs - count isId spec_bndrs, ppr df )
    mkDFunUnfolding spec_bndrs con (map spec_arg args)
      -- There is a hard-to-check assumption here that the spec_app has
      -- enough applications to exactly saturate the old_bndrs
      -- For DFunUnfoldings we transform
      --       \old_bndrs. MkD <op1> ... <opn>
      -- to
      --       \new_bndrs. MkD (spec_app(\old_bndrs. <op1>)) ... ditto <opn>
      -- The ASSERT checks the value part of that
Simon Peyton Jones's avatar
Simon Peyton Jones committed
181
  where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
182 183 184
    spec_arg arg = simpleOptExpr (spec_app (mkLams old_bndrs arg))
                   -- The beta-redexes created by spec_app will be
                   -- simplified away by simplOptExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
185

Simon Peyton Jones's avatar
Simon Peyton Jones committed
186
specUnfolding spec_bndrs spec_app arity_decrease
Simon Peyton Jones's avatar
Simon Peyton Jones committed
187 188 189 190
              (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                             , uf_is_top = top_lvl
                             , uf_guidance = old_guidance })
 | isStableSource src  -- See Note [Specialising unfoldings]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
191 192
 , UnfWhen { ug_arity     = old_arity
           , ug_unsat_ok  = unsat_ok
Simon Peyton Jones's avatar
Simon Peyton Jones committed
193
           , ug_boring_ok = boring_ok } <- old_guidance
Simon Peyton Jones's avatar
Simon Peyton Jones committed
194 195
 = let guidance = UnfWhen { ug_arity     = old_arity - arity_decrease
                          , ug_unsat_ok  = unsat_ok
Simon Peyton Jones's avatar
Simon Peyton Jones committed
196
                          , ug_boring_ok = boring_ok }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
197 198 199
       new_tmpl = simpleOptExpr (mkLams spec_bndrs (spec_app tmpl))
                   -- The beta-redexes created by spec_app will be
                   -- simplified away by simplOptExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
200 201 202

   in mkCoreUnfolding src top_lvl new_tmpl guidance

Simon Peyton Jones's avatar
Simon Peyton Jones committed
203
specUnfolding _ _ _ _ = noUnfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
204

Simon Peyton Jones's avatar
Simon Peyton Jones committed
205 206
{- Note [Specialising unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Simon Peyton Jones's avatar
Simon Peyton Jones committed
207 208 209 210 211 212 213 214 215 216
When we specialise a function for some given type-class arguments, we use
specUnfolding to specialise its unfolding.  Some important points:

* If the original function has a DFunUnfolding, the specialised one
  must do so too!  Otherwise we lose the magic rules that make it
  interact with ClassOps

* There is a bit of hack for INLINABLE functions:
     f :: Ord a => ....
     f = <big-rhs>
217
     {- INLINABLE f #-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
218
  Now if we specialise f, should the specialised version still have
219
  an INLINABLE pragma?  If it does, we'll capture a specialised copy
Simon Peyton Jones's avatar
Simon Peyton Jones committed
220 221 222 223 224
  of <big-rhs> as its unfolding, and that probaby won't inline.  But
  if we don't, the specialised version of <big-rhs> might be small
  enough to inline at a call site. This happens with Control.Monad.liftM3,
  and can cause a lot more allocation as a result (nofib n-body shows this).

225
  Moreover, keeping the INLINABLE thing isn't much help, because
Simon Peyton Jones's avatar
Simon Peyton Jones committed
226 227 228 229 230 231
  the specialised function (probaby) isn't overloaded any more.

  Conclusion: drop the INLINEALE pragma.  In practice what this means is:
     if a stable unfolding has UnfoldingGuidance of UnfWhen,
        we keep it (so the specialised thing too will always inline)
     if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
232
        (which arises from INLINABLE), we discard it
Austin Seipp's avatar
Austin Seipp committed
233
-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
234

235
mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
236
                -> UnfoldingGuidance -> Unfolding
237
-- Occurrence-analyses the expression before capturing it
Simon Peyton Jones's avatar
Simon Peyton Jones committed
238
mkCoreUnfolding src top_lvl expr guidance
239
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
240
                      -- See Note [Occurrrence analysis of unfoldings]
241 242 243
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
244
                    uf_is_conlike   = exprIsConLike    expr,
245 246 247
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_expandable   = exprIsExpandable expr,
                    uf_guidance     = guidance }
248

249 250 251 252 253
mkUnfolding :: DynFlags -> UnfoldingSource
            -> Bool       -- Is top-level
            -> Bool       -- Definitely a bottoming binding
                          -- (only relevant for top-level bindings)
            -> CoreExpr
254
            -> Unfolding
255 256
-- Calculates unfolding guidance
-- Occurrence-analyses the expression before capturing it
257
mkUnfolding dflags src is_top_lvl is_bottoming expr
258
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
259
                      -- See Note [Occurrrence analysis of unfoldings]
260
                    uf_src          = src,
261
                    uf_is_top       = is_top_lvl,
262
                    uf_is_value     = exprIsHNF        expr,
263
                    uf_is_conlike   = exprIsConLike    expr,
264 265 266
                    uf_expandable   = exprIsExpandable expr,
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_guidance     = guidance }
267
  where
268 269
    is_top_bottoming = is_top_lvl && is_bottoming
    guidance         = calcUnfoldingGuidance dflags is_top_bottoming expr
270
        -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
271
        -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
272

Austin Seipp's avatar
Austin Seipp committed
273
{-
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We do occurrence-analysis of unfoldings once and for all, when the
unfolding is built, rather than each time we inline them.

But given this decision it's vital that we do
*always* do it.  Consider this unfolding
    \x -> letrec { f = ...g...; g* = f } in body
where g* is (for some strange reason) the loop breaker.  If we don't
occ-anal it when reading it in, we won't mark g as a loop breaker, and
we may inline g entirely in body, dropping its binding, and leaving
the occurrence in f out of scope. This happened in Trac #8892, where
the unfolding in question was a DFun unfolding.

But more generally, the simplifier is designed on the
basis that it is looking at occurrence-analysed expressions, so better
ensure that they acutally are.

292 293 294 295 296 297 298 299 300 301
Note [Calculate unfolding guidance on the non-occ-anal'd expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Notice that we give the non-occur-analysed expression to
calcUnfoldingGuidance.  In some ways it'd be better to occur-analyse
first; for example, sometimes during simplification, there's a large
let-bound thing which has been substituted, and so is now dead; so
'expr' contains two copies of the thing while the occurrence-analysed
expression doesn't.

Nevertheless, we *don't* and *must not* occ-analyse before computing
302
the size because
303 304 305 306

a) The size computation bales out after a while, whereas occurrence
   analysis does not.

307 308
b) Residency increases sharply if you occ-anal first.  I'm not
   100% sure why, but it's a large effect.  Compiling Cabal went
309 310 311 312 313 314 315
   from residency of 534M to over 800M with this one change.

This can occasionally mean that the guidance is very pessimistic;
it gets fixed up next round.  And it should be rare, because large
let-bound things that are dead are usually caught by preInlineUnconditionally


Austin Seipp's avatar
Austin Seipp committed
316 317
************************************************************************
*                                                                      *
318
\subsection{The UnfoldingGuidance type}
Austin Seipp's avatar
Austin Seipp committed
319 320 321
*                                                                      *
************************************************************************
-}
322

323 324
inlineBoringOk :: CoreExpr -> Bool
-- See Note [INLINE for small functions]
325
-- True => the result of inlining the expression is
326 327 328 329 330 331 332 333 334 335 336 337
--         no bigger than the expression itself
--     eg      (\x y -> f y x)
-- This is a quick and dirty version. It doesn't attempt
-- to deal with  (\x y z -> x (y z))
-- The really important one is (x `cast` c)
inlineBoringOk e
  = go 0 e
  where
    go :: Int -> CoreExpr -> Bool
    go credit (Lam x e) | isId x           = go (credit+1) e
                        | otherwise        = go credit e
    go credit (App f (Type {}))            = go credit f
338
    go credit (App f a) | credit > 0
339
                        , exprIsTrivial a  = go (credit-1) f
340
    go credit (Tick _ e)                 = go credit e -- dubious
341 342 343
    go credit (Cast e _)                   = go credit e
    go _      (Var {})                     = boringCxtOk
    go _      _                            = boringCxtNotOk
344

345
calcUnfoldingGuidance
346
        :: DynFlags
347 348
        -> Bool          -- Definitely a top-level, bottoming binding
        -> CoreExpr      -- Expression to look at
Simon Peyton Jones's avatar
Simon Peyton Jones committed
349
        -> UnfoldingGuidance
350
calcUnfoldingGuidance dflags is_top_bottoming (Tick t expr)
Peter Wortmann's avatar
Peter Wortmann committed
351
  | not (tickishIsCode t)  -- non-code ticks don't matter for unfolding
352 353
  = calcUnfoldingGuidance dflags is_top_bottoming expr
calcUnfoldingGuidance dflags is_top_bottoming expr
354
  = case sizeExpr dflags bOMB_OUT_SIZE val_bndrs body of
Simon Peyton Jones's avatar
Simon Peyton Jones committed
355 356
      TooBig -> UnfNever
      SizeIs size cased_bndrs scrut_discount
357
        | uncondInline expr n_val_bndrs size
Simon Peyton Jones's avatar
Simon Peyton Jones committed
358 359 360
        -> UnfWhen { ug_unsat_ok = unSaturatedOk
                   , ug_boring_ok =  boringCxtOk
                   , ug_arity = n_val_bndrs }   -- Note [INLINE for small functions]
361 362 363 364

        | is_top_bottoming
        -> UnfNever   -- See Note [Do not inline top-level bottoming functions]

Simon Peyton Jones's avatar
Simon Peyton Jones committed
365 366
        | otherwise
        -> UnfIfGoodArgs { ug_args  = map (mk_discount cased_bndrs) val_bndrs
367 368
                         , ug_size  = size
                         , ug_res   = scrut_discount }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
369 370 371 372 373 374 375 376 377 378

  where
    (bndrs, body) = collectBinders expr
    bOMB_OUT_SIZE = ufCreationThreshold dflags
           -- Bomb out if size gets bigger than this
    val_bndrs   = filter isId bndrs
    n_val_bndrs = length val_bndrs

    mk_discount :: Bag (Id,Int) -> Id -> Int
    mk_discount cbs bndr = foldlBag combine 0 cbs
379
           where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
380
             combine acc (bndr', disc)
381 382
               | bndr == bndr' = acc `plus_disc` disc
               | otherwise     = acc
Simon Peyton Jones's avatar
Simon Peyton Jones committed
383

384 385 386 387
             plus_disc :: Int -> Int -> Int
             plus_disc | isFunTy (idType bndr) = max
                       | otherwise             = (+)
             -- See Note [Function and non-function discounts]
388

Austin Seipp's avatar
Austin Seipp committed
389
{-
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
Note [Computing the size of an expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
heuristics right has taken a long time.  Here's the basic strategy:

    * Variables, literals: 0
      (Exception for string literals, see litSize.)

    * Function applications (f e1 .. en): 1 + #value args

    * Constructor applications: 1, regardless of #args

    * Let(rec): 1 + size of components

    * Note, cast: 0

Examples

408
  Size  Term
409
  --------------
410 411
    0     42#
    0     x
412
    0     True
413 414 415
    2     f x
    1     Just x
    4     f (g x)
416 417

Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
418
a function call to account for.  Notice also that constructor applications
419 420
are very cheap, because exposing them to a caller is so valuable.

421 422 423 424
[25/5/11] All sizes are now multiplied by 10, except for primops
(which have sizes like 1 or 4.  This makes primops look fantastically
cheap, and seems to be almost unversally beneficial.  Done partly as a
result of #4978.
425 426 427

Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
428
The FloatOut pass has gone to some trouble to float out calls to 'error'
429 430 431 432
and similar friends.  See Note [Bottoming floats] in SetLevels.
Do not re-inline them!  But we *do* still inline if they are very small
(the uncondInline stuff).

433 434
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
435
Consider        {-# INLINE f #-}
436 437 438 439 440 441 442 443 444
                f x = Just x
                g y = f y
Then f's RHS is no larger than its LHS, so we should inline it into
even the most boring context.  In general, f the function is
sufficiently small that its body is as small as the call itself, the
inline unconditionally, regardless of how boring the context is.

Things to note:

445 446
(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
    than the thing it's replacing.  Notice that
447 448 449 450 451
      (f x) --> (g 3)             -- YES, unconditionally
      (f x) --> x : []            -- YES, *even though* there are two
                                  --      arguments to the cons
      x     --> g 3               -- NO
      x     --> Just v            -- NO
452

453 454 455 456 457 458 459 460 461 462 463
    It's very important not to unconditionally replace a variable by
    a non-atomic term.

(2) We do this even if the thing isn't saturated, else we end up with the
    silly situation that
       f x y = x
       ...map (f 3)...
    doesn't inline.  Even in a boring context, inlining without being
    saturated will give a lambda instead of a PAP, and will be more
    efficient at runtime.

Simon Peyton Jones's avatar
Simon Peyton Jones committed
464
(3) However, when the function's arity > 0, we do insist that it
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
    has at least one value argument at the call site.  (This check is
    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
         f = /\a \x:a. x
         d = /\b. MkD (f b)
    If we inline f here we get
         d = /\b. MkD (\x:b. x)
    and then prepareRhs floats out the argument, abstracting the type
    variables, so we end up with the original again!

(4) We must be much more cautious about arity-zero things. Consider
       let x = y +# z in ...
    In *size* terms primops look very small, because the generate a
    single instruction, but we do not want to unconditionally replace
    every occurrence of x with (y +# z).  So we only do the
    unconditional-inline thing for *trivial* expressions.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
480

481 482 483
    NB: you might think that PostInlineUnconditionally would do this
    but it doesn't fire for top-level things; see SimplUtils
    Note [Top level and postInlineUnconditionally]
Austin Seipp's avatar
Austin Seipp committed
484
-}
485

486
uncondInline :: CoreExpr -> Arity -> Int -> Bool
487 488
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
489
-- See Note [INLINE for small functions]
490
uncondInline rhs arity size
491 492
  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
493

494
sizeExpr :: DynFlags
495
         -> Int             -- Bomb out if it gets bigger than this
496 497 498 499
         -> [Id]            -- Arguments; we're interested in which of these
                            -- get case'd
         -> CoreExpr
         -> ExprSize
500

501 502
-- Note [Computing the size of an expression]

503
sizeExpr dflags bOMB_OUT_SIZE top_args expr
504 505
  = size_up expr
  where
506
    size_up (Cast e _) = size_up e
507
    size_up (Tick _ e) = size_up e
508
    size_up (Type _)   = sizeZero           -- Types cost nothing
509
    size_up (Coercion _) = sizeZero
510
    size_up (Lit lit)  = sizeN (litSize lit)
511 512 513 514
    size_up (Var f) | isRealWorldId f = sizeZero
                      -- Make sure we get constructor discounts even
                      -- on nullary constructors
                    | otherwise       = size_up_call f [] 0
Simon Marlow's avatar
Simon Marlow committed
515

516 517 518 519
    size_up (App fun arg)
      | isTyCoArg arg = size_up fun
      | otherwise     = size_up arg  `addSizeNSD`
                        size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
520

521 522 523
    size_up (Lam b e)
      | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
      | otherwise = size_up e
524 525

    size_up (Let (NonRec binder rhs) body)
526 527
      = size_up rhs             `addSizeNSD`
        size_up body            `addSizeN`
528
        (if isUnliftedType (idType binder) then 0 else 10)
529 530
                -- For the allocation
                -- If the binder has an unlifted type there is no allocation
531 532

    size_up (Let (Rec pairs) body)
533
      = foldr (addSizeNSD . size_up . snd)
534
              (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
535
              pairs
536

Simon Marlow's avatar
Simon Marlow committed
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
    size_up (Case e _ _ alts)
        | Just v <- is_top_arg e -- We are scrutinising an argument variable
        = let
            alt_sizes = map size_up_alt alts

                  -- alts_size tries to compute a good discount for
                  -- the case when we are scrutinising an argument variable
            alts_size (SizeIs tot tot_disc tot_scrut)
                          -- Size of all alternatives
                      (SizeIs max _        _)
                          -- Size of biggest alternative
                  = SizeIs tot (unitBag (v, 20 + tot - max)
                      `unionBags` tot_disc) tot_scrut
                          -- If the variable is known, we produce a
                          -- discount that will take us back to 'max',
                          -- the size of the largest alternative The
                          -- 1+ is a little discount for reduced
                          -- allocation in the caller
                          --
                          -- Notice though, that we return tot_disc,
                          -- the total discount from all branches.  I
                          -- think that's right.

            alts_size tot_size _ = tot_size
          in
          alts_size (foldr addAltSize sizeZero alt_sizes)
563 564 565 566 567
                    (foldr maxSize    sizeZero alt_sizes)
                -- Good to inline if an arg is scrutinised, because
                -- that may eliminate allocation in the caller
                -- And it eliminates the case itself
        where
Simon Marlow's avatar
Simon Marlow committed
568 569 570 571
          is_top_arg (Var v) | v `elem` top_args = Just v
          is_top_arg (Cast e _) = is_top_arg e
          is_top_arg _ = Nothing

572

Simon Marlow's avatar
Simon Marlow committed
573
    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
574 575 576
                                foldr (addAltSize . size_up_alt) case_size alts
      where
          case_size
577
           | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
           | otherwise = sizeZero
                -- Normally we don't charge for the case itself, but
                -- we charge one per alternative (see size_up_alt,
                -- below) to account for the cost of the info table
                -- and comparisons.
                --
                -- However, in certain cases (see is_inline_scrut
                -- below), no code is generated for the case unless
                -- there are multiple alts.  In these cases we
                -- subtract one, making the first alt free.
                -- e.g. case x# +# y# of _ -> ...   should cost 1
                --      case touch# x# of _ -> ...  should cost 0
                -- (see #4978)
                --
                -- I would like to not have the "not (lengthExceeds alts 1)"
                -- condition above, but without that some programs got worse
                -- (spectral/hartel/event and spectral/para).  I don't fully
                -- understand why. (SDM 24/5/11)

                -- unboxed variables, inline primops and unsafe foreign calls
                -- are all "inline" things:
599
          is_inline_scrut (Var v) = isUnliftedType (idType v)
600 601 602 603 604 605 606 607
          is_inline_scrut scrut
              | (Var f, _) <- collectArgs scrut
                = case idDetails f of
                    FCallId fc  -> not (isSafeForeignCall fc)
                    PrimOpId op -> not (primOpOutOfLine op)
                    _other      -> False
              | otherwise
                = False
608

609
    ------------
610
    -- size_up_app is used when there's ONE OR MORE value args
611
    size_up_app (App fun arg) args voids
612 613 614
        | isTyCoArg arg                  = size_up_app fun args voids
        | isRealWorldExpr arg            = size_up_app fun (arg:args) (voids + 1)
        | otherwise                      = size_up arg  `addSizeNSD`
615 616
                                           size_up_app fun (arg:args) voids
    size_up_app (Var fun)     args voids = size_up_call fun args voids
Peter Wortmann's avatar
Peter Wortmann committed
617
    size_up_app (Tick _ expr) args voids = size_up_app expr args voids
Simon Marlow's avatar
Simon Marlow committed
618 619 620 621 622 623
    size_up_app (Cast expr _) args voids = size_up_app expr args voids
    size_up_app other         args voids = size_up other `addSizeN`
                                           callSize (length args) voids
       -- if the lhs is not an App or a Var, or an invisible thing like a
       -- Tick or Cast, then we should charge for a complete call plus the
       -- size of the lhs itself.
624

625
    ------------
626 627
    size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
    size_up_call fun val_args voids
628
       = case idDetails fun of
Simon Marlow's avatar
Simon Marlow committed
629
           FCallId _        -> sizeN (callSize (length val_args) voids)
630 631
           DataConWorkId dc -> conSize    dc (length val_args)
           PrimOpId op      -> primOpSize op (length val_args)
632 633
           ClassOpId _      -> classOpSize dflags top_args val_args
           _                -> funSize dflags top_args fun (length val_args) voids
634

635
    ------------
636
    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
637 638 639 640 641 642
        -- Don't charge for args, so that wrappers look cheap
        -- (See comments about wrappers with Case)
        --
        -- IMPORATANT: *do* charge 1 for the alternative, else we
        -- find that giant case nests are treated as practically free
        -- A good example is Foreign.C.Error.errrnoToIOError
643 644

    ------------
645 646
        -- These addSize things have to be here because
        -- I don't want to give them bOMB_OUT_SIZE as an argument
647
    addSizeN TooBig          _  = TooBig
648
    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n + m) xs d
649

650
        -- addAltSize is used to add the sizes of case alternatives
651 652 653
    addAltSize TooBig            _      = TooBig
    addAltSize _                 TooBig = TooBig
    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
654
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
655
                                 (xs `unionBags` ys)
656
                                 (d1 + d2) -- Note [addAltSize result discounts]
657 658

        -- This variant ignores the result discount from its LEFT argument
659 660 661 662
        -- It's used when the second argument isn't part of the result
    addSizeNSD TooBig            _      = TooBig
    addSizeNSD _                 TooBig = TooBig
    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
663
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
664
                                 (xs `unionBags` ys)
665
                                 d2  -- Ignore d1
666 667 668 669

    isRealWorldId id = idType id `eqType` realWorldStatePrimTy

    -- an expression of type State# RealWorld must be a variable
Peter Wortmann's avatar
Peter Wortmann committed
670 671 672
    isRealWorldExpr (Var id)   = isRealWorldId id
    isRealWorldExpr (Tick _ e) = isRealWorldExpr e
    isRealWorldExpr _          = False
673

674 675 676
-- | Finds a nominal size of a string literal.
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
677
litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
678
litSize (MachStr str)   = 10 + 10 * ((BS.length str + 3) `div` 4)
679 680 681
        -- If size could be 0 then @f "x"@ might be too small
        -- [Sept03: make literal strings a bit bigger to avoid fruitless
        --  duplication of little strings]
682
litSize _other = 0    -- Must match size of nullary constructors
683 684
                      -- Key point: if  x |-> 4, then x must inline unconditionally
                      --            (eg via case binding)
685

686
classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
687
-- See Note [Conlike is interesting]
688
classOpSize _ _ []
689
  = sizeZero
690
classOpSize dflags top_args (arg1 : other_args)
691
  = SizeIs size arg_discount 0
692
  where
693
    size = 20 + (10 * length other_args)
694 695 696 697
    -- If the class op is scrutinising a lambda bound dictionary then
    -- give it a discount, to encourage the inlining of this function
    -- The actual discount is rather arbitrarily chosen
    arg_discount = case arg1 of
698 699 700 701
                     Var dict | dict `elem` top_args
                              -> unitBag (dict, ufDictDiscount dflags)
                     _other   -> emptyBag

Simon Marlow's avatar
Simon Marlow committed
702 703 704 705 706 707 708
-- | The size of a function call
callSize
 :: Int  -- ^ number of value args
 -> Int  -- ^ number of value args that are void
 -> Int
callSize n_val_args voids = 10 * (1 + n_val_args - voids)

709
funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
710 711
-- Size for functions that are not constructors or primops
-- Note [Function applications]
712
funSize dflags top_args fun n_val_args voids
713 714
  | fun `hasKey` buildIdKey   = buildSize
  | fun `hasKey` augmentIdKey = augmentSize
715
  | otherwise = SizeIs size arg_discount res_discount
716 717 718
  where
    some_val_args = n_val_args > 0

Simon Marlow's avatar
Simon Marlow committed
719
    size | some_val_args = callSize n_val_args voids
720
         | otherwise     = 0
721 722 723 724
        -- The 1+ is for the function itself
        -- Add 1 for each non-trivial arg;
        -- the allocation cost, as in let(rec)

725
        --                  DISCOUNTS
726 727
        --  See Note [Function and non-function discounts]
    arg_discount | some_val_args && fun `elem` top_args
728 729 730 731
                 = unitBag (fun, ufFunAppDiscount dflags)
                 | otherwise = emptyBag
        -- If the function is an argument and is applied
        -- to some values, give it an arg-discount
732

733
    res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
734
                 | otherwise                = 0
735 736
        -- If the function is partially applied, show a result discount

737 738
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
739
  | n_val_args == 0 = SizeIs 0 emptyBag 10    -- Like variables
740

741
-- See Note [Unboxed tuple size and result discount]
742
  | isUnboxedTupleCon dc = SizeIs 0 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
743

744
-- See Note [Constructor size and result discount]
745
  | otherwise = SizeIs 10 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
746

Austin Seipp's avatar
Austin Seipp committed
747
{-
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
Note [Constructor size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Treat a constructors application as size 10, regardless of how many
arguments it has; we are keen to expose them (and we charge separately
for their args).  We can't treat them as size zero, else we find that
(Just x) has size 0, which is the same as a lone variable; and hence
'v' will always be replaced by (Just x), where v is bound to Just x.

The "result discount" is applied if the result of the call is
scrutinised (say by a case).  For a constructor application that will
mean the constructor application will disappear, so we don't need to
charge it to the function.  So the discount should at least match the
cost of the constructor application, namely 10.  But to give a bit
of extra incentive we give a discount of 10*(1 + n_val_args).

763
Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
764
and said it was an "unambiguous win", but its terribly dangerous
Gabor Greif's avatar
Gabor Greif committed
765
because a function with many many case branches, each finishing with
766 767 768 769 770
a constructor, can have an arbitrarily large discount.  This led to
terrible code bloat: see Trac #6099.

Note [Unboxed tuple size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
771 772
However, unboxed tuples count as size zero. I found occasions where we had
        f x y z = case op# x y z of { s -> (# s, () #) }
773 774 775 776 777 778 779 780 781 782 783 784 785
and f wasn't getting inlined.

I tried giving unboxed tuples a *result discount* of zero (see the
commented-out line).  Why?  When returned as a result they do not
allocate, so maybe we don't want to charge so much for them If you
have a non-zero discount here, we find that workers often get inlined
back into wrappers, because it look like
    f x = case $wf x of (# a,b #) -> (a,b)
and we are keener because of the case.  However while this change
shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
more. All other changes were very small. So it's not a big deal but I
didn't adopt the idea.

786 787
Note [Function and non-function discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
788 789 790 791 792
We want a discount if the function is applied. A good example is
monadic combinators with continuation arguments, where inlining is
quite important.

But we don't want a big discount when a function is called many times
793
(see the detailed comments with Trac #6048) because if the function is
794 795 796 797
big it won't be inlined at its many call sites and no benefit results.
Indeed, we can get exponentially big inlinings this way; that is what
Trac #6048 is about.

798 799 800 801 802 803 804 805 806
On the other hand, for data-valued arguments, if there are lots of
case expressions in the body, each one will get smaller if we apply
the function to a constructor application, so we *want* a big discount
if the argument is scrutinised by many case expressions.

Conclusion:
  - For functions, take the max of the discounts
  - For data values, take the sum of the discounts

807

808 809 810 811 812 813
Note [Literal integer size]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Literal integers *can* be big (mkInteger [...coefficients...]), but
need not be (S# n).  We just use an aribitrary big-ish constant here
so that, in particular, we don't inline top-level defns like
   n = S# 5
Gabor Greif's avatar
Gabor Greif committed
814
There's no point in doing so -- any optimisations will see the S#
815 816 817
through n's unfolding.  Nor will a big size inhibit unfoldings functions
that mention a literal Integer, because the float-out pass will float
all those constants to top level.
Austin Seipp's avatar
Austin Seipp committed
818
-}
819

twanvl's avatar
twanvl committed
820
primOpSize :: PrimOp -> Int -> ExprSize
821
primOpSize op n_val_args
822 823 824 825 826
 = if primOpOutOfLine op
      then sizeN (op_size + n_val_args)
      else sizeN op_size
 where
   op_size = primOpCodeSize op
827

828

twanvl's avatar
twanvl committed
829
buildSize :: ExprSize
830
buildSize = SizeIs 0 emptyBag 40
831 832
        -- We really want to inline applications of build
        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
Gabor Greif's avatar
Gabor Greif committed
833
        -- Indeed, we should add a result_discount because build is
834 835 836
        -- very like a constructor.  We don't bother to check that the
        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
        -- The "4" is rather arbitrary.
837

twanvl's avatar
twanvl committed
838
augmentSize :: ExprSize
839
augmentSize = SizeIs 0 emptyBag 40
840 841
        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
        -- e plus ys. The -2 accounts for the \cn
twanvl's avatar
twanvl committed
842

843
-- When we return a lambda, give a discount if it's used (applied)
844
lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
845
lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (ufFunAppDiscount dflags)
846
lamScrutDiscount _      TooBig          = TooBig
847

Austin Seipp's avatar
Austin Seipp committed
848
{-
849 850 851 852 853
Note [addAltSize result discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When adding the size of alternatives, we *add* the result discounts
too, rather than take the *maximum*.  For a multi-branch case, this
gives a discount for each branch that returns a constructor, making us
854
keener to inline.  I did try using 'max' instead, but it makes nofib
855 856 857
'rewrite' and 'puzzle' allocate significantly more, and didn't make
binary sizes shrink significantly either.

858 859
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
860 861
Constants for discounts and thesholds are defined in main/DynFlags,
all of form ufXxxx.   They are:
862

863
ufCreationThreshold
864 865 866
     At a definition site, if the unfolding is bigger than this, we
     may discard it altogether

867
ufUseThreshold
868 869 870
     At a call site, if the unfolding, less discounts, is smaller than
     this, then it's small enough inline

871
ufKeenessFactor
872
     Factor by which the discounts are multiplied before
873 874
     subtracting from size

875
ufDictDiscount
876 877 878 879
     The discount for each occurrence of a dictionary argument
     as an argument of a class method.  Should be pretty small
     else big functions may get inlined

880
ufFunAppDiscount
881 882 883
     Discount for a function argument that is applied.  Quite
     large, because if we inline we avoid the higher-order call.

884
ufDearOp
885 886
     The size of a foreign call or not-dupable PrimOp

887

888 889 890 891
Note [Function applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In a function application (f a b)

892
  - If 'f' is an argument to the function being analysed,
893 894 895 896
    and there's at least one value arg, record a FunAppDiscount for f

  - If the application if a PAP (arity > 2 in this example)
    record a *result* discount (because inlining
897
    with "extra" args in the call may mean that we now
898 899 900
    get a saturated application)

Code for manipulating sizes
Austin Seipp's avatar
Austin Seipp committed
901
-}
902

903 904 905 906 907 908 909 910 911 912
-- | The size of an candidate expression for unfolding
data ExprSize
    = TooBig
    | SizeIs { _es_size_is  :: {-# UNPACK #-} !Int -- ^ Size found
             , _es_args     :: !(Bag (Id,Int))
               -- ^ Arguments cased herein, and discount for each such
             , _es_discount :: {-# UNPACK #-} !Int
               -- ^ Size to subtract if result is scrutinised by a case
               -- expression
             }
913 914

instance Outputable ExprSize where
915
  ppr TooBig         = text "TooBig"
916
  ppr (SizeIs a _ c) = brackets (int a <+> int c)
917 918 919

-- subtract the discount before deciding whether to bale out. eg. we
-- want to inline a large constructor application into a selector:
920 921
--      tup = (a_1, ..., a_99)
--      x = case tup of ...
922
--
923 924 925
mkSizeIs :: Int -> Int -> Bag (Id, Int) -> Int -> ExprSize
mkSizeIs max n xs d | (n - d) > max = TooBig
                    | otherwise     = SizeIs n xs d