CoreUnfold.hs 56.4 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2 3 4
{-
(c) The University of Glasgow 2006
(c) The AQUA Project, Glasgow University, 1994-1998

Simon Marlow's avatar
Simon Marlow committed
5 6

Core-syntax unfoldings
7 8 9 10 11 12 13 14 15

Unfoldings (which can travel across module boundaries) are in Core
syntax (namely @CoreExpr@s).

The type @Unfolding@ sits ``above'' simply-Core-expressions
unfoldings, capturing ``higher-level'' things we know about a binding,
usually things that the simplifier found out (e.g., ``it's a
literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
find, unsurprisingly, a Core expression.
Austin Seipp's avatar
Austin Seipp committed
16
-}
17

18
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
19

20
module CoreUnfold (
21
        Unfolding, UnfoldingGuidance,   -- Abstract types
22

23
        noUnfolding, mkImplicitUnfolding,
24
        mkUnfolding, mkCoreUnfolding,
25 26 27
        mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
        mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
        mkCompulsoryUnfolding, mkDFunUnfolding,
Simon Peyton Jones's avatar
Simon Peyton Jones committed
28
        specUnfolding,
29

30
        ArgSummary(..),
31

32 33
        couldBeSmallEnoughToInline, inlineBoringOk,
        certainlyWillInline, smallEnoughToInline,
34

35
        callSiteInline, CallCtxt(..),
36

37 38
        -- Reexport from CoreSubst (it only live there so it can be used
        -- by the Very Simple Optimiser)
39
        exprIsConApp_maybe, exprIsLiteral_maybe
40 41
    ) where

42 43
#include "HsVersions.h"

Simon Marlow's avatar
Simon Marlow committed
44
import DynFlags
45
import CoreSyn
46
import PprCore          ()      -- Instances
47
import OccurAnal        ( occurAnalyseExpr )
48
import CoreSubst hiding( substTy )
49
import CoreArity       ( manifestArity, exprBotStrictness_maybe )
Simon Marlow's avatar
Simon Marlow committed
50 51 52 53 54 55
import CoreUtils
import Id
import DataCon
import Literal
import PrimOp
import IdInfo
56
import BasicTypes       ( Arity )
57
import Type
Simon Marlow's avatar
Simon Marlow committed
58
import PrelNames
59
import TysPrim          ( realWorldStatePrimTy )
60
import Bag
61
import Util
62
import Outputable
63 64
import ForeignCall

65
import qualified Data.ByteString as BS
66
import Data.Maybe
67

Austin Seipp's avatar
Austin Seipp committed
68 69 70
{-
************************************************************************
*                                                                      *
71
\subsection{Making unfoldings}
Austin Seipp's avatar
Austin Seipp committed
72 73 74
*                                                                      *
************************************************************************
-}
75

76 77
mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
mkTopUnfolding dflags = mkUnfolding dflags InlineRhs True {- Top level -}
78

79
mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
80
-- For implicit Ids, do a tiny bit of optimising first
81 82
mkImplicitUnfolding dflags expr
    = mkTopUnfolding dflags False (simpleOptExpr expr)
Simon Marlow's avatar
Simon Marlow committed
83

84 85 86 87 88
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Slight hack: note that mk_inline_rules conservatively sets the
-- top-level flag to True.  It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
89

90 91
mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkSimpleUnfolding dflags = mkUnfolding dflags InlineRhs False False
92

93
mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
94 95 96 97 98
mkDFunUnfolding bndrs con ops
  = DFunUnfolding { df_bndrs = bndrs
                  , df_con = con
                  , df_args = map occurAnalyseExpr ops }
                  -- See Note [Occurrrence analysis of unfoldings]
Simon Marlow's avatar
Simon Marlow committed
99

100 101
mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
mkWwInlineRule expr arity
102
  = mkCoreUnfolding InlineStable True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
103 104 105
                   (simpleOptExpr expr)
                   (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
                            , ug_boring_ok = boringCxtNotOk })
106

twanvl's avatar
twanvl committed
107
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
108
mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
109
  = mkCoreUnfolding InlineCompulsory True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
110 111 112
                    (simpleOptExpr expr)
                    (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
                             , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
113

114 115 116 117 118 119 120 121 122 123 124 125 126
mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
mkWorkerUnfolding dflags work_fn
                  (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                                 , uf_is_top = top_lvl })
  | isStableSource src
  = mkCoreUnfolding src top_lvl new_tmpl guidance
  where
    new_tmpl = simpleOptExpr (work_fn tmpl)
    guidance = calcUnfoldingGuidance dflags new_tmpl

mkWorkerUnfolding _ _ _ = noUnfolding

127
mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
128
mkInlineUnfolding mb_arity expr
129
  = mkCoreUnfolding InlineStable
130
                    True         -- Note [Top-level flag on inline rules]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
131
                    expr' guide
132 133
  where
    expr' = simpleOptExpr expr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
134 135 136 137 138 139 140
    guide = case mb_arity of
              Nothing    -> UnfWhen { ug_arity = manifestArity expr'
                                    , ug_unsat_ok = unSaturatedOk
                                    , ug_boring_ok = boring_ok }
              Just arity -> UnfWhen { ug_arity = arity
                                    , ug_unsat_ok = needSaturated
                                    , ug_boring_ok = boring_ok }
141
    boring_ok = inlineBoringOk expr'
142

143 144 145
mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkInlinableUnfolding dflags expr
  = mkUnfolding dflags InlineStable True is_bot expr'
146
  where
147 148
    expr' = simpleOptExpr expr
    is_bot = isJust (exprBotStrictness_maybe expr')
Simon Peyton Jones's avatar
Simon Peyton Jones committed
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding
-- See Note [Specialising unfoldings]
specUnfolding _ subst new_bndrs spec_args
              df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args })
  = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs )
    mkDFunUnfolding (new_bndrs ++ extra_bndrs) con
                    (map (substExpr spec_doc subst2) args)
  where
    subst1 = extendSubstList subst (bndrs `zip` spec_args)
    (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs)

specUnfolding _dflags subst new_bndrs spec_args
              (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                             , uf_is_top = top_lvl
                             , uf_guidance = old_guidance })
 | isStableSource src  -- See Note [Specialising unfoldings]
 , UnfWhen { ug_arity = old_arity
           , ug_unsat_ok = unsat_ok
           , ug_boring_ok = boring_ok } <- old_guidance
 = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args
                                     + count isId new_bndrs
                          , ug_unsat_ok = unsat_ok
                          , ug_boring_ok = boring_ok }
       new_tmpl = simpleOptExpr $ mkLams new_bndrs $
                  mkApps (substExpr spec_doc subst tmpl) spec_args
                   -- The beta-redexes created here will be simplified
                   -- away by simplOptExpr in mkUnfolding

   in mkCoreUnfolding src top_lvl new_tmpl guidance

specUnfolding _ _ _ _ _ = noUnfolding

spec_doc :: SDoc
183
spec_doc = text "specUnfolding"
184

Austin Seipp's avatar
Austin Seipp committed
185
{-
Simon Peyton Jones's avatar
Simon Peyton Jones committed
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
Note [Specialising unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When we specialise a function for some given type-class arguments, we use
specUnfolding to specialise its unfolding.  Some important points:

* If the original function has a DFunUnfolding, the specialised one
  must do so too!  Otherwise we lose the magic rules that make it
  interact with ClassOps

* There is a bit of hack for INLINABLE functions:
     f :: Ord a => ....
     f = <big-rhs>
     {- INLINEABLE f #-}
  Now if we specialise f, should the specialised version still have
  an INLINEABLE pragma?  If it does, we'll capture a specialised copy
  of <big-rhs> as its unfolding, and that probaby won't inline.  But
  if we don't, the specialised version of <big-rhs> might be small
  enough to inline at a call site. This happens with Control.Monad.liftM3,
  and can cause a lot more allocation as a result (nofib n-body shows this).

  Moreover, keeping the INLINEABLE thing isn't much help, because
  the specialised function (probaby) isn't overloaded any more.

  Conclusion: drop the INLINEALE pragma.  In practice what this means is:
     if a stable unfolding has UnfoldingGuidance of UnfWhen,
        we keep it (so the specialised thing too will always inline)
     if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
        (which arises from INLINEABLE), we discard it
Austin Seipp's avatar
Austin Seipp committed
214
-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
215

216
mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
217
                -> UnfoldingGuidance -> Unfolding
218
-- Occurrence-analyses the expression before capturing it
Simon Peyton Jones's avatar
Simon Peyton Jones committed
219
mkCoreUnfolding src top_lvl expr guidance
220
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
221
                      -- See Note [Occurrrence analysis of unfoldings]
222 223 224
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
225
                    uf_is_conlike   = exprIsConLike    expr,
226 227 228
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_expandable   = exprIsExpandable expr,
                    uf_guidance     = guidance }
229

230 231
mkUnfolding :: DynFlags -> UnfoldingSource -> Bool -> Bool -> CoreExpr
            -> Unfolding
232 233
-- Calculates unfolding guidance
-- Occurrence-analyses the expression before capturing it
234
mkUnfolding dflags src top_lvl is_bottoming expr
235 236 237 238
  | top_lvl && is_bottoming
  , not (exprIsTrivial expr)
  = NoUnfolding    -- See Note [Do not inline top-level bottoming functions]
  | otherwise
239
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
240
                      -- See Note [Occurrrence analysis of unfoldings]
241 242 243
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
244
                    uf_is_conlike   = exprIsConLike    expr,
245 246 247
                    uf_expandable   = exprIsExpandable expr,
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_guidance     = guidance }
248
  where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
249
    guidance = calcUnfoldingGuidance dflags expr
250
        -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
251
        -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
252

Austin Seipp's avatar
Austin Seipp committed
253
{-
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We do occurrence-analysis of unfoldings once and for all, when the
unfolding is built, rather than each time we inline them.

But given this decision it's vital that we do
*always* do it.  Consider this unfolding
    \x -> letrec { f = ...g...; g* = f } in body
where g* is (for some strange reason) the loop breaker.  If we don't
occ-anal it when reading it in, we won't mark g as a loop breaker, and
we may inline g entirely in body, dropping its binding, and leaving
the occurrence in f out of scope. This happened in Trac #8892, where
the unfolding in question was a DFun unfolding.

But more generally, the simplifier is designed on the
basis that it is looking at occurrence-analysed expressions, so better
ensure that they acutally are.

272 273 274 275 276 277 278 279 280 281
Note [Calculate unfolding guidance on the non-occ-anal'd expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Notice that we give the non-occur-analysed expression to
calcUnfoldingGuidance.  In some ways it'd be better to occur-analyse
first; for example, sometimes during simplification, there's a large
let-bound thing which has been substituted, and so is now dead; so
'expr' contains two copies of the thing while the occurrence-analysed
expression doesn't.

Nevertheless, we *don't* and *must not* occ-analyse before computing
282
the size because
283 284 285 286

a) The size computation bales out after a while, whereas occurrence
   analysis does not.

287 288
b) Residency increases sharply if you occ-anal first.  I'm not
   100% sure why, but it's a large effect.  Compiling Cabal went
289 290 291 292 293 294 295
   from residency of 534M to over 800M with this one change.

This can occasionally mean that the guidance is very pessimistic;
it gets fixed up next round.  And it should be rare, because large
let-bound things that are dead are usually caught by preInlineUnconditionally


Austin Seipp's avatar
Austin Seipp committed
296 297
************************************************************************
*                                                                      *
298
\subsection{The UnfoldingGuidance type}
Austin Seipp's avatar
Austin Seipp committed
299 300 301
*                                                                      *
************************************************************************
-}
302

303 304
inlineBoringOk :: CoreExpr -> Bool
-- See Note [INLINE for small functions]
305
-- True => the result of inlining the expression is
306 307 308 309 310 311 312 313 314 315 316 317
--         no bigger than the expression itself
--     eg      (\x y -> f y x)
-- This is a quick and dirty version. It doesn't attempt
-- to deal with  (\x y z -> x (y z))
-- The really important one is (x `cast` c)
inlineBoringOk e
  = go 0 e
  where
    go :: Int -> CoreExpr -> Bool
    go credit (Lam x e) | isId x           = go (credit+1) e
                        | otherwise        = go credit e
    go credit (App f (Type {}))            = go credit f
318
    go credit (App f a) | credit > 0
319
                        , exprIsTrivial a  = go (credit-1) f
320
    go credit (Tick _ e)                 = go credit e -- dubious
321 322 323
    go credit (Cast e _)                   = go credit e
    go _      (Var {})                     = boringCxtOk
    go _      _                            = boringCxtNotOk
324

325
calcUnfoldingGuidance
326 327
        :: DynFlags
        -> CoreExpr    -- Expression to look at
Simon Peyton Jones's avatar
Simon Peyton Jones committed
328
        -> UnfoldingGuidance
Peter Wortmann's avatar
Peter Wortmann committed
329 330 331
calcUnfoldingGuidance dflags (Tick t expr)
  | not (tickishIsCode t)  -- non-code ticks don't matter for unfolding
  = calcUnfoldingGuidance dflags expr
332
calcUnfoldingGuidance dflags expr
333
  = case sizeExpr dflags bOMB_OUT_SIZE val_bndrs body of
Simon Peyton Jones's avatar
Simon Peyton Jones committed
334 335
      TooBig -> UnfNever
      SizeIs size cased_bndrs scrut_discount
336
        | uncondInline expr n_val_bndrs size
Simon Peyton Jones's avatar
Simon Peyton Jones committed
337 338 339 340 341
        -> UnfWhen { ug_unsat_ok = unSaturatedOk
                   , ug_boring_ok =  boringCxtOk
                   , ug_arity = n_val_bndrs }   -- Note [INLINE for small functions]
        | otherwise
        -> UnfIfGoodArgs { ug_args  = map (mk_discount cased_bndrs) val_bndrs
342 343
                         , ug_size  = size
                         , ug_res   = scrut_discount }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
344 345 346 347 348 349 350 351 352 353

  where
    (bndrs, body) = collectBinders expr
    bOMB_OUT_SIZE = ufCreationThreshold dflags
           -- Bomb out if size gets bigger than this
    val_bndrs   = filter isId bndrs
    n_val_bndrs = length val_bndrs

    mk_discount :: Bag (Id,Int) -> Id -> Int
    mk_discount cbs bndr = foldlBag combine 0 cbs
354
           where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
355
             combine acc (bndr', disc)
356 357
               | bndr == bndr' = acc `plus_disc` disc
               | otherwise     = acc
Simon Peyton Jones's avatar
Simon Peyton Jones committed
358

359 360 361 362
             plus_disc :: Int -> Int -> Int
             plus_disc | isFunTy (idType bndr) = max
                       | otherwise             = (+)
             -- See Note [Function and non-function discounts]
363

Austin Seipp's avatar
Austin Seipp committed
364
{-
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
Note [Computing the size of an expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
heuristics right has taken a long time.  Here's the basic strategy:

    * Variables, literals: 0
      (Exception for string literals, see litSize.)

    * Function applications (f e1 .. en): 1 + #value args

    * Constructor applications: 1, regardless of #args

    * Let(rec): 1 + size of components

    * Note, cast: 0

Examples

383
  Size  Term
384
  --------------
385 386
    0     42#
    0     x
387
    0     True
388 389 390
    2     f x
    1     Just x
    4     f (g x)
391 392

Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
393
a function call to account for.  Notice also that constructor applications
394 395
are very cheap, because exposing them to a caller is so valuable.

396 397 398 399
[25/5/11] All sizes are now multiplied by 10, except for primops
(which have sizes like 1 or 4.  This makes primops look fantastically
cheap, and seems to be almost unversally beneficial.  Done partly as a
result of #4978.
400 401 402

Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
403
The FloatOut pass has gone to some trouble to float out calls to 'error'
404 405 406 407
and similar friends.  See Note [Bottoming floats] in SetLevels.
Do not re-inline them!  But we *do* still inline if they are very small
(the uncondInline stuff).

408 409
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
410
Consider        {-# INLINE f #-}
411 412 413 414 415 416 417 418 419
                f x = Just x
                g y = f y
Then f's RHS is no larger than its LHS, so we should inline it into
even the most boring context.  In general, f the function is
sufficiently small that its body is as small as the call itself, the
inline unconditionally, regardless of how boring the context is.

Things to note:

420 421
(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
    than the thing it's replacing.  Notice that
422 423 424 425 426
      (f x) --> (g 3)             -- YES, unconditionally
      (f x) --> x : []            -- YES, *even though* there are two
                                  --      arguments to the cons
      x     --> g 3               -- NO
      x     --> Just v            -- NO
427

428 429 430 431 432 433 434 435 436 437 438
    It's very important not to unconditionally replace a variable by
    a non-atomic term.

(2) We do this even if the thing isn't saturated, else we end up with the
    silly situation that
       f x y = x
       ...map (f 3)...
    doesn't inline.  Even in a boring context, inlining without being
    saturated will give a lambda instead of a PAP, and will be more
    efficient at runtime.

Simon Peyton Jones's avatar
Simon Peyton Jones committed
439
(3) However, when the function's arity > 0, we do insist that it
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
    has at least one value argument at the call site.  (This check is
    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
         f = /\a \x:a. x
         d = /\b. MkD (f b)
    If we inline f here we get
         d = /\b. MkD (\x:b. x)
    and then prepareRhs floats out the argument, abstracting the type
    variables, so we end up with the original again!

(4) We must be much more cautious about arity-zero things. Consider
       let x = y +# z in ...
    In *size* terms primops look very small, because the generate a
    single instruction, but we do not want to unconditionally replace
    every occurrence of x with (y +# z).  So we only do the
    unconditional-inline thing for *trivial* expressions.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
455

456 457 458
    NB: you might think that PostInlineUnconditionally would do this
    but it doesn't fire for top-level things; see SimplUtils
    Note [Top level and postInlineUnconditionally]
Austin Seipp's avatar
Austin Seipp committed
459
-}
460

461
uncondInline :: CoreExpr -> Arity -> Int -> Bool
462 463
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
464
-- See Note [INLINE for small functions]
465
uncondInline rhs arity size
466 467
  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
468

469
sizeExpr :: DynFlags
470
         -> Int             -- Bomb out if it gets bigger than this
471 472 473 474
         -> [Id]            -- Arguments; we're interested in which of these
                            -- get case'd
         -> CoreExpr
         -> ExprSize
475

476 477
-- Note [Computing the size of an expression]

478
sizeExpr dflags bOMB_OUT_SIZE top_args expr
479 480
  = size_up expr
  where
481
    size_up (Cast e _) = size_up e
482
    size_up (Tick _ e) = size_up e
483
    size_up (Type _)   = sizeZero           -- Types cost nothing
484
    size_up (Coercion _) = sizeZero
485
    size_up (Lit lit)  = sizeN (litSize lit)
486 487 488 489
    size_up (Var f) | isRealWorldId f = sizeZero
                      -- Make sure we get constructor discounts even
                      -- on nullary constructors
                    | otherwise       = size_up_call f [] 0
Simon Marlow's avatar
Simon Marlow committed
490

491 492 493 494
    size_up (App fun arg)
      | isTyCoArg arg = size_up fun
      | otherwise     = size_up arg  `addSizeNSD`
                        size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
495

496 497 498
    size_up (Lam b e)
      | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
      | otherwise = size_up e
499 500

    size_up (Let (NonRec binder rhs) body)
501 502
      = size_up rhs             `addSizeNSD`
        size_up body            `addSizeN`
503
        (if isUnLiftedType (idType binder) then 0 else 10)
504 505
                -- For the allocation
                -- If the binder has an unlifted type there is no allocation
506 507

    size_up (Let (Rec pairs) body)
508
      = foldr (addSizeNSD . size_up . snd)
509
              (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
510
              pairs
511

512 513 514 515 516 517 518 519 520 521 522 523 524 525
    size_up (Case (Var v) _ _ alts)
        | v `elem` top_args             -- We are scrutinising an argument variable
        = alts_size (foldr addAltSize sizeZero alt_sizes)
                    (foldr maxSize    sizeZero alt_sizes)
                -- Good to inline if an arg is scrutinised, because
                -- that may eliminate allocation in the caller
                -- And it eliminates the case itself
        where
          alt_sizes = map size_up_alt alts

                -- alts_size tries to compute a good discount for
                -- the case when we are scrutinising an argument variable
          alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
                    (SizeIs max _        _)          -- Size of biggest alternative
526
                = SizeIs tot (unitBag (v, 20 + tot - max) `unionBags` tot_disc) tot_scrut
527 528 529 530 531 532
                        -- If the variable is known, we produce a discount that
                        -- will take us back to 'max', the size of the largest alternative
                        -- The 1+ is a little discount for reduced allocation in the caller
                        --
                        -- Notice though, that we return tot_disc, the total discount from
                        -- all branches.  I think that's right.
533

534
          alts_size tot_size _ = tot_size
535

Simon Marlow's avatar
Simon Marlow committed
536
    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
537 538 539
                                foldr (addAltSize . size_up_alt) case_size alts
      where
          case_size
540
           | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
           | otherwise = sizeZero
                -- Normally we don't charge for the case itself, but
                -- we charge one per alternative (see size_up_alt,
                -- below) to account for the cost of the info table
                -- and comparisons.
                --
                -- However, in certain cases (see is_inline_scrut
                -- below), no code is generated for the case unless
                -- there are multiple alts.  In these cases we
                -- subtract one, making the first alt free.
                -- e.g. case x# +# y# of _ -> ...   should cost 1
                --      case touch# x# of _ -> ...  should cost 0
                -- (see #4978)
                --
                -- I would like to not have the "not (lengthExceeds alts 1)"
                -- condition above, but without that some programs got worse
                -- (spectral/hartel/event and spectral/para).  I don't fully
                -- understand why. (SDM 24/5/11)

                -- unboxed variables, inline primops and unsafe foreign calls
                -- are all "inline" things:
          is_inline_scrut (Var v) = isUnLiftedType (idType v)
          is_inline_scrut scrut
              | (Var f, _) <- collectArgs scrut
                = case idDetails f of
                    FCallId fc  -> not (isSafeForeignCall fc)
                    PrimOpId op -> not (primOpOutOfLine op)
                    _other      -> False
              | otherwise
                = False
571

572
    ------------
573
    -- size_up_app is used when there's ONE OR MORE value args
574
    size_up_app (App fun arg) args voids
575 576 577
        | isTyCoArg arg                  = size_up_app fun args voids
        | isRealWorldExpr arg            = size_up_app fun (arg:args) (voids + 1)
        | otherwise                      = size_up arg  `addSizeNSD`
578 579
                                           size_up_app fun (arg:args) voids
    size_up_app (Var fun)     args voids = size_up_call fun args voids
Peter Wortmann's avatar
Peter Wortmann committed
580
    size_up_app (Tick _ expr) args voids = size_up_app expr args voids
581
    size_up_app other         args voids = size_up other `addSizeN` (length args - voids)
582

583
    ------------
584 585
    size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
    size_up_call fun val_args voids
586
       = case idDetails fun of
587
           FCallId _        -> sizeN (10 * (1 + length val_args))
588 589
           DataConWorkId dc -> conSize    dc (length val_args)
           PrimOpId op      -> primOpSize op (length val_args)
590 591
           ClassOpId _      -> classOpSize dflags top_args val_args
           _                -> funSize dflags top_args fun (length val_args) voids
592

593
    ------------
594
    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
595 596 597 598 599 600
        -- Don't charge for args, so that wrappers look cheap
        -- (See comments about wrappers with Case)
        --
        -- IMPORATANT: *do* charge 1 for the alternative, else we
        -- find that giant case nests are treated as practically free
        -- A good example is Foreign.C.Error.errrnoToIOError
601 602

    ------------
603 604
        -- These addSize things have to be here because
        -- I don't want to give them bOMB_OUT_SIZE as an argument
605
    addSizeN TooBig          _  = TooBig
606
    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n + m) xs d
607

608
        -- addAltSize is used to add the sizes of case alternatives
609 610 611
    addAltSize TooBig            _      = TooBig
    addAltSize _                 TooBig = TooBig
    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
612
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
613
                                 (xs `unionBags` ys)
614
                                 (d1 + d2) -- Note [addAltSize result discounts]
615 616

        -- This variant ignores the result discount from its LEFT argument
617 618 619 620
        -- It's used when the second argument isn't part of the result
    addSizeNSD TooBig            _      = TooBig
    addSizeNSD _                 TooBig = TooBig
    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
621
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
622
                                 (xs `unionBags` ys)
623
                                 d2  -- Ignore d1
624 625 626 627

    isRealWorldId id = idType id `eqType` realWorldStatePrimTy

    -- an expression of type State# RealWorld must be a variable
Peter Wortmann's avatar
Peter Wortmann committed
628 629 630
    isRealWorldExpr (Var id)   = isRealWorldId id
    isRealWorldExpr (Tick _ e) = isRealWorldExpr e
    isRealWorldExpr _          = False
631

632 633 634
-- | Finds a nominal size of a string literal.
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
635
litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
636
litSize (MachStr str)   = 10 + 10 * ((BS.length str + 3) `div` 4)
637 638 639
        -- If size could be 0 then @f "x"@ might be too small
        -- [Sept03: make literal strings a bit bigger to avoid fruitless
        --  duplication of little strings]
640
litSize _other = 0    -- Must match size of nullary constructors
641 642
                      -- Key point: if  x |-> 4, then x must inline unconditionally
                      --            (eg via case binding)
643

644
classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
645
-- See Note [Conlike is interesting]
646
classOpSize _ _ []
647
  = sizeZero
648
classOpSize dflags top_args (arg1 : other_args)
649
  = SizeIs size arg_discount 0
650
  where
651
    size = 20 + (10 * length other_args)
652 653 654 655
    -- If the class op is scrutinising a lambda bound dictionary then
    -- give it a discount, to encourage the inlining of this function
    -- The actual discount is rather arbitrarily chosen
    arg_discount = case arg1 of
656 657 658 659
                     Var dict | dict `elem` top_args
                              -> unitBag (dict, ufDictDiscount dflags)
                     _other   -> emptyBag

660
funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
661 662
-- Size for functions that are not constructors or primops
-- Note [Function applications]
663
funSize dflags top_args fun n_val_args voids
664 665
  | fun `hasKey` buildIdKey   = buildSize
  | fun `hasKey` augmentIdKey = augmentSize
666
  | otherwise = SizeIs size arg_discount res_discount
667 668 669
  where
    some_val_args = n_val_args > 0

670
    size | some_val_args = 10 * (1 + n_val_args - voids)
671
         | otherwise     = 0
672 673 674 675
        -- The 1+ is for the function itself
        -- Add 1 for each non-trivial arg;
        -- the allocation cost, as in let(rec)

676
        --                  DISCOUNTS
677 678
        --  See Note [Function and non-function discounts]
    arg_discount | some_val_args && fun `elem` top_args
679 680 681 682
                 = unitBag (fun, ufFunAppDiscount dflags)
                 | otherwise = emptyBag
        -- If the function is an argument and is applied
        -- to some values, give it an arg-discount
683

684
    res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
685
                 | otherwise                = 0
686 687
        -- If the function is partially applied, show a result discount

688 689
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
690
  | n_val_args == 0 = SizeIs 0 emptyBag 10    -- Like variables
691

692
-- See Note [Unboxed tuple size and result discount]
693
  | isUnboxedTupleCon dc = SizeIs 0 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
694

695
-- See Note [Constructor size and result discount]
696
  | otherwise = SizeIs 10 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
697

Austin Seipp's avatar
Austin Seipp committed
698
{-
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
Note [Constructor size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Treat a constructors application as size 10, regardless of how many
arguments it has; we are keen to expose them (and we charge separately
for their args).  We can't treat them as size zero, else we find that
(Just x) has size 0, which is the same as a lone variable; and hence
'v' will always be replaced by (Just x), where v is bound to Just x.

The "result discount" is applied if the result of the call is
scrutinised (say by a case).  For a constructor application that will
mean the constructor application will disappear, so we don't need to
charge it to the function.  So the discount should at least match the
cost of the constructor application, namely 10.  But to give a bit
of extra incentive we give a discount of 10*(1 + n_val_args).

714
Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
715
and said it was an "unambiguous win", but its terribly dangerous
Gabor Greif's avatar
Gabor Greif committed
716
because a function with many many case branches, each finishing with
717 718 719 720 721
a constructor, can have an arbitrarily large discount.  This led to
terrible code bloat: see Trac #6099.

Note [Unboxed tuple size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
722 723
However, unboxed tuples count as size zero. I found occasions where we had
        f x y z = case op# x y z of { s -> (# s, () #) }
724 725 726 727 728 729 730 731 732 733 734 735 736
and f wasn't getting inlined.

I tried giving unboxed tuples a *result discount* of zero (see the
commented-out line).  Why?  When returned as a result they do not
allocate, so maybe we don't want to charge so much for them If you
have a non-zero discount here, we find that workers often get inlined
back into wrappers, because it look like
    f x = case $wf x of (# a,b #) -> (a,b)
and we are keener because of the case.  However while this change
shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
more. All other changes were very small. So it's not a big deal but I
didn't adopt the idea.

737 738
Note [Function and non-function discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
739 740 741 742 743
We want a discount if the function is applied. A good example is
monadic combinators with continuation arguments, where inlining is
quite important.

But we don't want a big discount when a function is called many times
744
(see the detailed comments with Trac #6048) because if the function is
745 746 747 748
big it won't be inlined at its many call sites and no benefit results.
Indeed, we can get exponentially big inlinings this way; that is what
Trac #6048 is about.

749 750 751 752 753 754 755 756 757
On the other hand, for data-valued arguments, if there are lots of
case expressions in the body, each one will get smaller if we apply
the function to a constructor application, so we *want* a big discount
if the argument is scrutinised by many case expressions.

Conclusion:
  - For functions, take the max of the discounts
  - For data values, take the sum of the discounts

758

759 760 761 762 763 764
Note [Literal integer size]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Literal integers *can* be big (mkInteger [...coefficients...]), but
need not be (S# n).  We just use an aribitrary big-ish constant here
so that, in particular, we don't inline top-level defns like
   n = S# 5
Gabor Greif's avatar
Gabor Greif committed
765
There's no point in doing so -- any optimisations will see the S#
766 767 768
through n's unfolding.  Nor will a big size inhibit unfoldings functions
that mention a literal Integer, because the float-out pass will float
all those constants to top level.
Austin Seipp's avatar
Austin Seipp committed
769
-}
770

twanvl's avatar
twanvl committed
771
primOpSize :: PrimOp -> Int -> ExprSize
772
primOpSize op n_val_args
773 774 775 776 777
 = if primOpOutOfLine op
      then sizeN (op_size + n_val_args)
      else sizeN op_size
 where
   op_size = primOpCodeSize op
778

779

twanvl's avatar
twanvl committed
780
buildSize :: ExprSize
781
buildSize = SizeIs 0 emptyBag 40
782 783 784 785 786 787
        -- We really want to inline applications of build
        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
        -- Indeed, we should add a result_discount becuause build is
        -- very like a constructor.  We don't bother to check that the
        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
        -- The "4" is rather arbitrary.
788

twanvl's avatar
twanvl committed
789
augmentSize :: ExprSize
790
augmentSize = SizeIs 0 emptyBag 40
791 792
        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
        -- e plus ys. The -2 accounts for the \cn
twanvl's avatar
twanvl committed
793

794
-- When we return a lambda, give a discount if it's used (applied)
795
lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
796
lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (ufFunAppDiscount dflags)
797
lamScrutDiscount _      TooBig          = TooBig
798

Austin Seipp's avatar
Austin Seipp committed
799
{-
800 801 802 803 804
Note [addAltSize result discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When adding the size of alternatives, we *add* the result discounts
too, rather than take the *maximum*.  For a multi-branch case, this
gives a discount for each branch that returns a constructor, making us
805
keener to inline.  I did try using 'max' instead, but it makes nofib
806 807 808
'rewrite' and 'puzzle' allocate significantly more, and didn't make
binary sizes shrink significantly either.

809 810
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
811 812
Constants for discounts and thesholds are defined in main/DynFlags,
all of form ufXxxx.   They are:
813

814
ufCreationThreshold
815 816 817
     At a definition site, if the unfolding is bigger than this, we
     may discard it altogether

818
ufUseThreshold
819 820 821
     At a call site, if the unfolding, less discounts, is smaller than
     this, then it's small enough inline

822
ufKeenessFactor
823
     Factor by which the discounts are multiplied before
824 825
     subtracting from size

826
ufDictDiscount
827 828 829 830
     The discount for each occurrence of a dictionary argument
     as an argument of a class method.  Should be pretty small
     else big functions may get inlined

831
ufFunAppDiscount
832 833 834
     Discount for a function argument that is applied.  Quite
     large, because if we inline we avoid the higher-order call.

835
ufDearOp
836 837
     The size of a foreign call or not-dupable PrimOp

838

839 840 841 842
Note [Function applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In a function application (f a b)

843
  - If 'f' is an argument to the function being analysed,
844 845 846 847
    and there's at least one value arg, record a FunAppDiscount for f

  - If the application if a PAP (arity > 2 in this example)
    record a *result* discount (because inlining
848
    with "extra" args in the call may mean that we now
849 850 851
    get a saturated application)

Code for manipulating sizes
Austin Seipp's avatar
Austin Seipp committed
852
-}
853

854 855 856 857 858 859 860 861 862 863
-- | The size of an candidate expression for unfolding
data ExprSize
    = TooBig
    | SizeIs { _es_size_is  :: {-# UNPACK #-} !Int -- ^ Size found
             , _es_args     :: !(Bag (Id,Int))
               -- ^ Arguments cased herein, and discount for each such
             , _es_discount :: {-# UNPACK #-} !Int
               -- ^ Size to subtract if result is scrutinised by a case
               -- expression
             }
864 865

instance Outputable ExprSize where
866
  ppr TooBig         = text "TooBig"
867
  ppr (SizeIs a _ c) = brackets (int a <+> int c)
868 869 870

-- subtract the discount before deciding whether to bale out. eg. we
-- want to inline a large constructor application into a selector:
871 872
--      tup = (a_1, ..., a_99)
--      x = case tup of ...
873
--
874 875 876
mkSizeIs :: Int -> Int -> Bag (Id, Int) -> Int -> ExprSize
mkSizeIs max n xs d | (n - d) > max = TooBig
                    | otherwise     = SizeIs n xs d
877

878
maxSize :: ExprSize -> ExprSize -> ExprSize
879 880
maxSize TooBig         _                                  = TooBig
maxSize _              TooBig                             = TooBig
881
maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 > n2   = s1
882
                                              | otherwise = s2
883

884
sizeZero :: ExprSize
885 886
sizeN :: Int -> ExprSize

887 888
sizeZero = SizeIs 0 emptyBag 0
sizeN n  = SizeIs n emptyBag 0
889

Austin Seipp's avatar
Austin Seipp committed
890 891 892
{-
************************************************************************
*                                                                      *
893
\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
Austin Seipp's avatar
Austin Seipp committed
894 895
*                                                                      *
************************************************************************
896

897 898 899 900
We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
we ``couldn't possibly use'' on the other side.  Can be overridden w/
flaggery.  Just the same as smallEnoughToInline, except that it has no
actual arguments.
Austin Seipp's avatar
Austin Seipp committed
901
-}
902

903
couldBeSmallEnoughToInline :: DynFlags -> Int -> CoreExpr -> Bool
904
couldBeSmallEnoughToInline dflags threshold rhs
905
  = case sizeExpr dflags threshold [] body of
906 907 908 909
       TooBig -> False
       _      -> True
  where
    (_, body) = collectBinders rhs
910

911
----------------
912 913 914 915
smallEnoughToInline :: DynFlags -> Unfolding -> Bool
smallEnoughToInline dflags (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
  = size <= ufUseThreshold dflags
smallEnoughToInline _ _
916
  = False
917 918

----------------
919 920 921 922
certainlyWillInline :: DynFlags -> Unfolding -> Maybe Unfolding
-- Sees if the unfolding is pretty certain to inline
-- If so, return a *stable* unfolding for it, that will always inline
certainlyWillInline dflags unf@(CoreUnfolding { uf_guidance = guidance, uf_tmpl = expr })
923
  = case guidance of
924 925 926 927 928 929
      UnfNever   -> Nothing
      UnfWhen {} -> Just (unf { uf_src = InlineStable })

      -- The UnfIfGoodArgs case seems important.  If we w/w small functions
      -- binary sizes go up by 10%!  (This is with SplitObjs.)  I'm not totally
      -- sure whyy.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
930
      UnfIfGoodArgs { ug_size = size, ug_args = args }
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945
         | not (null args)  -- See Note [certainlyWillInline: be careful of thunks]
         , let arity = length args
         , size - (10 * (arity + 1)) <= ufUseThreshold dflags
         -> Just (unf { uf_src      = InlineStable
                      , uf_guidance = UnfWhen { ug_arity     = arity
                                              , ug_unsat_ok  = unSaturatedOk
                                              , ug_boring_ok = inlineBoringOk expr } })
                -- Note the "unsaturatedOk". A function like  f = \ab. a
                -- will certainly inline, even if partially applied (f e), so we'd