CoreUnfold.hs 56.5 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2 3 4
{-
(c) The University of Glasgow 2006
(c) The AQUA Project, Glasgow University, 1994-1998

Simon Marlow's avatar
Simon Marlow committed
5 6

Core-syntax unfoldings
7 8 9 10 11 12 13 14 15

Unfoldings (which can travel across module boundaries) are in Core
syntax (namely @CoreExpr@s).

The type @Unfolding@ sits ``above'' simply-Core-expressions
unfoldings, capturing ``higher-level'' things we know about a binding,
usually things that the simplifier found out (e.g., ``it's a
literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
find, unsurprisingly, a Core expression.
Austin Seipp's avatar
Austin Seipp committed
16
-}
17

18
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
19

20
module CoreUnfold (
21
        Unfolding, UnfoldingGuidance,   -- Abstract types
22

23
        noUnfolding, mkImplicitUnfolding,
24
        mkUnfolding, mkCoreUnfolding,
25 26 27
        mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
        mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
        mkCompulsoryUnfolding, mkDFunUnfolding,
Simon Peyton Jones's avatar
Simon Peyton Jones committed
28
        specUnfolding,
29

30
        ArgSummary(..),
31

32 33
        couldBeSmallEnoughToInline, inlineBoringOk,
        certainlyWillInline, smallEnoughToInline,
34

35
        callSiteInline, CallCtxt(..),
36

37 38
        -- Reexport from CoreSubst (it only live there so it can be used
        -- by the Very Simple Optimiser)
39
        exprIsConApp_maybe, exprIsLiteral_maybe
40 41
    ) where

42 43
#include "HsVersions.h"

Simon Marlow's avatar
Simon Marlow committed
44
import DynFlags
45
import CoreSyn
46
import PprCore          ()      -- Instances
47
import OccurAnal        ( occurAnalyseExpr )
48
import CoreSubst hiding( substTy )
49
import CoreArity       ( manifestArity, exprBotStrictness_maybe )
Simon Marlow's avatar
Simon Marlow committed
50 51 52 53 54 55
import CoreUtils
import Id
import DataCon
import Literal
import PrimOp
import IdInfo
56
import BasicTypes       ( Arity )
57
import Type
Simon Marlow's avatar
Simon Marlow committed
58
import PrelNames
59
import TysPrim          ( realWorldStatePrimTy )
60
import Bag
61
import Util
62
import FastString
63
import Outputable
64 65
import ForeignCall

66
import qualified Data.ByteString as BS
67
import Data.Maybe
68

Austin Seipp's avatar
Austin Seipp committed
69 70 71
{-
************************************************************************
*                                                                      *
72
\subsection{Making unfoldings}
Austin Seipp's avatar
Austin Seipp committed
73 74 75
*                                                                      *
************************************************************************
-}
76

77 78
mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
mkTopUnfolding dflags = mkUnfolding dflags InlineRhs True {- Top level -}
79

80
mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
81
-- For implicit Ids, do a tiny bit of optimising first
82 83
mkImplicitUnfolding dflags expr
    = mkTopUnfolding dflags False (simpleOptExpr expr)
Simon Marlow's avatar
Simon Marlow committed
84

85 86 87 88 89
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Slight hack: note that mk_inline_rules conservatively sets the
-- top-level flag to True.  It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
90

91 92
mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkSimpleUnfolding dflags = mkUnfolding dflags InlineRhs False False
93

94
mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
95 96 97 98 99
mkDFunUnfolding bndrs con ops
  = DFunUnfolding { df_bndrs = bndrs
                  , df_con = con
                  , df_args = map occurAnalyseExpr ops }
                  -- See Note [Occurrrence analysis of unfoldings]
Simon Marlow's avatar
Simon Marlow committed
100

101 102
mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
mkWwInlineRule expr arity
103
  = mkCoreUnfolding InlineStable True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
104 105 106
                   (simpleOptExpr expr)
                   (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
                            , ug_boring_ok = boringCxtNotOk })
107

twanvl's avatar
twanvl committed
108
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
109
mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
110
  = mkCoreUnfolding InlineCompulsory True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
111 112 113
                    (simpleOptExpr expr)
                    (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
                             , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
114

115 116 117 118 119 120 121 122 123 124 125 126 127
mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
mkWorkerUnfolding dflags work_fn
                  (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                                 , uf_is_top = top_lvl })
  | isStableSource src
  = mkCoreUnfolding src top_lvl new_tmpl guidance
  where
    new_tmpl = simpleOptExpr (work_fn tmpl)
    guidance = calcUnfoldingGuidance dflags new_tmpl

mkWorkerUnfolding _ _ _ = noUnfolding

128
mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
129
mkInlineUnfolding mb_arity expr
130
  = mkCoreUnfolding InlineStable
131
                    True         -- Note [Top-level flag on inline rules]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
132
                    expr' guide
133 134
  where
    expr' = simpleOptExpr expr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
135 136 137 138 139 140 141
    guide = case mb_arity of
              Nothing    -> UnfWhen { ug_arity = manifestArity expr'
                                    , ug_unsat_ok = unSaturatedOk
                                    , ug_boring_ok = boring_ok }
              Just arity -> UnfWhen { ug_arity = arity
                                    , ug_unsat_ok = needSaturated
                                    , ug_boring_ok = boring_ok }
142
    boring_ok = inlineBoringOk expr'
143

144 145 146
mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkInlinableUnfolding dflags expr
  = mkUnfolding dflags InlineStable True is_bot expr'
147
  where
148 149
    expr' = simpleOptExpr expr
    is_bot = isJust (exprBotStrictness_maybe expr')
Simon Peyton Jones's avatar
Simon Peyton Jones committed
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184

specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding
-- See Note [Specialising unfoldings]
specUnfolding _ subst new_bndrs spec_args
              df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args })
  = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs )
    mkDFunUnfolding (new_bndrs ++ extra_bndrs) con
                    (map (substExpr spec_doc subst2) args)
  where
    subst1 = extendSubstList subst (bndrs `zip` spec_args)
    (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs)

specUnfolding _dflags subst new_bndrs spec_args
              (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                             , uf_is_top = top_lvl
                             , uf_guidance = old_guidance })
 | isStableSource src  -- See Note [Specialising unfoldings]
 , UnfWhen { ug_arity = old_arity
           , ug_unsat_ok = unsat_ok
           , ug_boring_ok = boring_ok } <- old_guidance
 = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args
                                     + count isId new_bndrs
                          , ug_unsat_ok = unsat_ok
                          , ug_boring_ok = boring_ok }
       new_tmpl = simpleOptExpr $ mkLams new_bndrs $
                  mkApps (substExpr spec_doc subst tmpl) spec_args
                   -- The beta-redexes created here will be simplified
                   -- away by simplOptExpr in mkUnfolding

   in mkCoreUnfolding src top_lvl new_tmpl guidance

specUnfolding _ _ _ _ _ = noUnfolding

spec_doc :: SDoc
spec_doc = ptext (sLit "specUnfolding")
185

Austin Seipp's avatar
Austin Seipp committed
186
{-
Simon Peyton Jones's avatar
Simon Peyton Jones committed
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
Note [Specialising unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When we specialise a function for some given type-class arguments, we use
specUnfolding to specialise its unfolding.  Some important points:

* If the original function has a DFunUnfolding, the specialised one
  must do so too!  Otherwise we lose the magic rules that make it
  interact with ClassOps

* There is a bit of hack for INLINABLE functions:
     f :: Ord a => ....
     f = <big-rhs>
     {- INLINEABLE f #-}
  Now if we specialise f, should the specialised version still have
  an INLINEABLE pragma?  If it does, we'll capture a specialised copy
  of <big-rhs> as its unfolding, and that probaby won't inline.  But
  if we don't, the specialised version of <big-rhs> might be small
  enough to inline at a call site. This happens with Control.Monad.liftM3,
  and can cause a lot more allocation as a result (nofib n-body shows this).

  Moreover, keeping the INLINEABLE thing isn't much help, because
  the specialised function (probaby) isn't overloaded any more.

  Conclusion: drop the INLINEALE pragma.  In practice what this means is:
     if a stable unfolding has UnfoldingGuidance of UnfWhen,
        we keep it (so the specialised thing too will always inline)
     if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
        (which arises from INLINEABLE), we discard it
Austin Seipp's avatar
Austin Seipp committed
215
-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
216

217
mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
218
                -> UnfoldingGuidance -> Unfolding
219
-- Occurrence-analyses the expression before capturing it
Simon Peyton Jones's avatar
Simon Peyton Jones committed
220
mkCoreUnfolding src top_lvl expr guidance
221
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
222
                      -- See Note [Occurrrence analysis of unfoldings]
223 224 225
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
226
                    uf_is_conlike   = exprIsConLike    expr,
227 228 229
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_expandable   = exprIsExpandable expr,
                    uf_guidance     = guidance }
230

231 232
mkUnfolding :: DynFlags -> UnfoldingSource -> Bool -> Bool -> CoreExpr
            -> Unfolding
233 234
-- Calculates unfolding guidance
-- Occurrence-analyses the expression before capturing it
235
mkUnfolding dflags src top_lvl is_bottoming expr
236 237 238 239
  | top_lvl && is_bottoming
  , not (exprIsTrivial expr)
  = NoUnfolding    -- See Note [Do not inline top-level bottoming functions]
  | otherwise
240
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
241
                      -- See Note [Occurrrence analysis of unfoldings]
242 243 244
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
245
                    uf_is_conlike   = exprIsConLike    expr,
246 247 248
                    uf_expandable   = exprIsExpandable expr,
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_guidance     = guidance }
249
  where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
250
    guidance = calcUnfoldingGuidance dflags expr
251
        -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
252
        -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
253

Austin Seipp's avatar
Austin Seipp committed
254
{-
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We do occurrence-analysis of unfoldings once and for all, when the
unfolding is built, rather than each time we inline them.

But given this decision it's vital that we do
*always* do it.  Consider this unfolding
    \x -> letrec { f = ...g...; g* = f } in body
where g* is (for some strange reason) the loop breaker.  If we don't
occ-anal it when reading it in, we won't mark g as a loop breaker, and
we may inline g entirely in body, dropping its binding, and leaving
the occurrence in f out of scope. This happened in Trac #8892, where
the unfolding in question was a DFun unfolding.

But more generally, the simplifier is designed on the
basis that it is looking at occurrence-analysed expressions, so better
ensure that they acutally are.

273 274 275 276 277 278 279 280 281 282
Note [Calculate unfolding guidance on the non-occ-anal'd expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Notice that we give the non-occur-analysed expression to
calcUnfoldingGuidance.  In some ways it'd be better to occur-analyse
first; for example, sometimes during simplification, there's a large
let-bound thing which has been substituted, and so is now dead; so
'expr' contains two copies of the thing while the occurrence-analysed
expression doesn't.

Nevertheless, we *don't* and *must not* occ-analyse before computing
283
the size because
284 285 286 287

a) The size computation bales out after a while, whereas occurrence
   analysis does not.

288 289
b) Residency increases sharply if you occ-anal first.  I'm not
   100% sure why, but it's a large effect.  Compiling Cabal went
290 291 292 293 294 295 296
   from residency of 534M to over 800M with this one change.

This can occasionally mean that the guidance is very pessimistic;
it gets fixed up next round.  And it should be rare, because large
let-bound things that are dead are usually caught by preInlineUnconditionally


Austin Seipp's avatar
Austin Seipp committed
297 298
************************************************************************
*                                                                      *
299
\subsection{The UnfoldingGuidance type}
Austin Seipp's avatar
Austin Seipp committed
300 301 302
*                                                                      *
************************************************************************
-}
303

304 305
inlineBoringOk :: CoreExpr -> Bool
-- See Note [INLINE for small functions]
306
-- True => the result of inlining the expression is
307 308 309 310 311 312 313 314 315 316 317 318
--         no bigger than the expression itself
--     eg      (\x y -> f y x)
-- This is a quick and dirty version. It doesn't attempt
-- to deal with  (\x y z -> x (y z))
-- The really important one is (x `cast` c)
inlineBoringOk e
  = go 0 e
  where
    go :: Int -> CoreExpr -> Bool
    go credit (Lam x e) | isId x           = go (credit+1) e
                        | otherwise        = go credit e
    go credit (App f (Type {}))            = go credit f
319
    go credit (App f a) | credit > 0
320
                        , exprIsTrivial a  = go (credit-1) f
321
    go credit (Tick _ e)                 = go credit e -- dubious
322 323 324
    go credit (Cast e _)                   = go credit e
    go _      (Var {})                     = boringCxtOk
    go _      _                            = boringCxtNotOk
325

326
calcUnfoldingGuidance
327 328
        :: DynFlags
        -> CoreExpr    -- Expression to look at
Simon Peyton Jones's avatar
Simon Peyton Jones committed
329
        -> UnfoldingGuidance
Peter Wortmann's avatar
Peter Wortmann committed
330 331 332
calcUnfoldingGuidance dflags (Tick t expr)
  | not (tickishIsCode t)  -- non-code ticks don't matter for unfolding
  = calcUnfoldingGuidance dflags expr
333
calcUnfoldingGuidance dflags expr
334
  = case sizeExpr dflags bOMB_OUT_SIZE val_bndrs body of
Simon Peyton Jones's avatar
Simon Peyton Jones committed
335 336
      TooBig -> UnfNever
      SizeIs size cased_bndrs scrut_discount
337
        | uncondInline expr n_val_bndrs size
Simon Peyton Jones's avatar
Simon Peyton Jones committed
338 339 340 341 342
        -> UnfWhen { ug_unsat_ok = unSaturatedOk
                   , ug_boring_ok =  boringCxtOk
                   , ug_arity = n_val_bndrs }   -- Note [INLINE for small functions]
        | otherwise
        -> UnfIfGoodArgs { ug_args  = map (mk_discount cased_bndrs) val_bndrs
343 344
                         , ug_size  = size
                         , ug_res   = scrut_discount }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
345 346 347 348 349 350 351 352 353 354

  where
    (bndrs, body) = collectBinders expr
    bOMB_OUT_SIZE = ufCreationThreshold dflags
           -- Bomb out if size gets bigger than this
    val_bndrs   = filter isId bndrs
    n_val_bndrs = length val_bndrs

    mk_discount :: Bag (Id,Int) -> Id -> Int
    mk_discount cbs bndr = foldlBag combine 0 cbs
355
           where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
356
             combine acc (bndr', disc)
357 358
               | bndr == bndr' = acc `plus_disc` disc
               | otherwise     = acc
Simon Peyton Jones's avatar
Simon Peyton Jones committed
359

360 361 362 363
             plus_disc :: Int -> Int -> Int
             plus_disc | isFunTy (idType bndr) = max
                       | otherwise             = (+)
             -- See Note [Function and non-function discounts]
364

Austin Seipp's avatar
Austin Seipp committed
365
{-
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
Note [Computing the size of an expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
heuristics right has taken a long time.  Here's the basic strategy:

    * Variables, literals: 0
      (Exception for string literals, see litSize.)

    * Function applications (f e1 .. en): 1 + #value args

    * Constructor applications: 1, regardless of #args

    * Let(rec): 1 + size of components

    * Note, cast: 0

Examples

384
  Size  Term
385
  --------------
386 387
    0     42#
    0     x
388
    0     True
389 390 391
    2     f x
    1     Just x
    4     f (g x)
392 393

Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
394
a function call to account for.  Notice also that constructor applications
395 396
are very cheap, because exposing them to a caller is so valuable.

397 398 399 400
[25/5/11] All sizes are now multiplied by 10, except for primops
(which have sizes like 1 or 4.  This makes primops look fantastically
cheap, and seems to be almost unversally beneficial.  Done partly as a
result of #4978.
401 402 403

Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
404
The FloatOut pass has gone to some trouble to float out calls to 'error'
405 406 407 408
and similar friends.  See Note [Bottoming floats] in SetLevels.
Do not re-inline them!  But we *do* still inline if they are very small
(the uncondInline stuff).

409 410
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
411
Consider        {-# INLINE f #-}
412 413 414 415 416 417 418 419 420
                f x = Just x
                g y = f y
Then f's RHS is no larger than its LHS, so we should inline it into
even the most boring context.  In general, f the function is
sufficiently small that its body is as small as the call itself, the
inline unconditionally, regardless of how boring the context is.

Things to note:

421 422
(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
    than the thing it's replacing.  Notice that
423 424 425 426 427
      (f x) --> (g 3)             -- YES, unconditionally
      (f x) --> x : []            -- YES, *even though* there are two
                                  --      arguments to the cons
      x     --> g 3               -- NO
      x     --> Just v            -- NO
428

429 430 431 432 433 434 435 436 437 438 439
    It's very important not to unconditionally replace a variable by
    a non-atomic term.

(2) We do this even if the thing isn't saturated, else we end up with the
    silly situation that
       f x y = x
       ...map (f 3)...
    doesn't inline.  Even in a boring context, inlining without being
    saturated will give a lambda instead of a PAP, and will be more
    efficient at runtime.

Simon Peyton Jones's avatar
Simon Peyton Jones committed
440
(3) However, when the function's arity > 0, we do insist that it
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
    has at least one value argument at the call site.  (This check is
    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
         f = /\a \x:a. x
         d = /\b. MkD (f b)
    If we inline f here we get
         d = /\b. MkD (\x:b. x)
    and then prepareRhs floats out the argument, abstracting the type
    variables, so we end up with the original again!

(4) We must be much more cautious about arity-zero things. Consider
       let x = y +# z in ...
    In *size* terms primops look very small, because the generate a
    single instruction, but we do not want to unconditionally replace
    every occurrence of x with (y +# z).  So we only do the
    unconditional-inline thing for *trivial* expressions.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
456

457 458 459
    NB: you might think that PostInlineUnconditionally would do this
    but it doesn't fire for top-level things; see SimplUtils
    Note [Top level and postInlineUnconditionally]
Austin Seipp's avatar
Austin Seipp committed
460
-}
461

462
uncondInline :: CoreExpr -> Arity -> Int -> Bool
463 464
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
465
-- See Note [INLINE for small functions]
466
uncondInline rhs arity size
467 468
  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
469

470
sizeExpr :: DynFlags
471
         -> Int             -- Bomb out if it gets bigger than this
472 473 474 475
         -> [Id]            -- Arguments; we're interested in which of these
                            -- get case'd
         -> CoreExpr
         -> ExprSize
476

477 478
-- Note [Computing the size of an expression]

479
sizeExpr dflags bOMB_OUT_SIZE top_args expr
480 481
  = size_up expr
  where
482
    size_up (Cast e _) = size_up e
483
    size_up (Tick _ e) = size_up e
484
    size_up (Type _)   = sizeZero           -- Types cost nothing
485
    size_up (Coercion _) = sizeZero
486
    size_up (Lit lit)  = sizeN (litSize lit)
487 488 489 490
    size_up (Var f) | isRealWorldId f = sizeZero
                      -- Make sure we get constructor discounts even
                      -- on nullary constructors
                    | otherwise       = size_up_call f [] 0
Simon Marlow's avatar
Simon Marlow committed
491

492 493 494 495
    size_up (App fun arg)
      | isTyCoArg arg = size_up fun
      | otherwise     = size_up arg  `addSizeNSD`
                        size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
496

497 498 499
    size_up (Lam b e)
      | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
      | otherwise = size_up e
500 501

    size_up (Let (NonRec binder rhs) body)
502 503
      = size_up rhs             `addSizeNSD`
        size_up body            `addSizeN`
504
        (if isUnLiftedType (idType binder) then 0 else 10)
505 506
                -- For the allocation
                -- If the binder has an unlifted type there is no allocation
507 508

    size_up (Let (Rec pairs) body)
509
      = foldr (addSizeNSD . size_up . snd)
510
              (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
511
              pairs
512

513 514 515 516 517 518 519 520 521 522 523 524 525 526
    size_up (Case (Var v) _ _ alts)
        | v `elem` top_args             -- We are scrutinising an argument variable
        = alts_size (foldr addAltSize sizeZero alt_sizes)
                    (foldr maxSize    sizeZero alt_sizes)
                -- Good to inline if an arg is scrutinised, because
                -- that may eliminate allocation in the caller
                -- And it eliminates the case itself
        where
          alt_sizes = map size_up_alt alts

                -- alts_size tries to compute a good discount for
                -- the case when we are scrutinising an argument variable
          alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
                    (SizeIs max _        _)          -- Size of biggest alternative
527
                = SizeIs tot (unitBag (v, 20 + tot - max) `unionBags` tot_disc) tot_scrut
528 529 530 531 532 533
                        -- If the variable is known, we produce a discount that
                        -- will take us back to 'max', the size of the largest alternative
                        -- The 1+ is a little discount for reduced allocation in the caller
                        --
                        -- Notice though, that we return tot_disc, the total discount from
                        -- all branches.  I think that's right.
534

535
          alts_size tot_size _ = tot_size
536

Simon Marlow's avatar
Simon Marlow committed
537
    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
538 539 540
                                foldr (addAltSize . size_up_alt) case_size alts
      where
          case_size
541
           | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
           | otherwise = sizeZero
                -- Normally we don't charge for the case itself, but
                -- we charge one per alternative (see size_up_alt,
                -- below) to account for the cost of the info table
                -- and comparisons.
                --
                -- However, in certain cases (see is_inline_scrut
                -- below), no code is generated for the case unless
                -- there are multiple alts.  In these cases we
                -- subtract one, making the first alt free.
                -- e.g. case x# +# y# of _ -> ...   should cost 1
                --      case touch# x# of _ -> ...  should cost 0
                -- (see #4978)
                --
                -- I would like to not have the "not (lengthExceeds alts 1)"
                -- condition above, but without that some programs got worse
                -- (spectral/hartel/event and spectral/para).  I don't fully
                -- understand why. (SDM 24/5/11)

                -- unboxed variables, inline primops and unsafe foreign calls
                -- are all "inline" things:
          is_inline_scrut (Var v) = isUnLiftedType (idType v)
          is_inline_scrut scrut
              | (Var f, _) <- collectArgs scrut
                = case idDetails f of
                    FCallId fc  -> not (isSafeForeignCall fc)
                    PrimOpId op -> not (primOpOutOfLine op)
                    _other      -> False
              | otherwise
                = False
572

573
    ------------
574
    -- size_up_app is used when there's ONE OR MORE value args
575
    size_up_app (App fun arg) args voids
576 577 578
        | isTyCoArg arg                  = size_up_app fun args voids
        | isRealWorldExpr arg            = size_up_app fun (arg:args) (voids + 1)
        | otherwise                      = size_up arg  `addSizeNSD`
579 580
                                           size_up_app fun (arg:args) voids
    size_up_app (Var fun)     args voids = size_up_call fun args voids
Peter Wortmann's avatar
Peter Wortmann committed
581
    size_up_app (Tick _ expr) args voids = size_up_app expr args voids
582
    size_up_app other         args voids = size_up other `addSizeN` (length args - voids)
583

584
    ------------
585 586
    size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
    size_up_call fun val_args voids
587
       = case idDetails fun of
588
           FCallId _        -> sizeN (10 * (1 + length val_args))
589 590
           DataConWorkId dc -> conSize    dc (length val_args)
           PrimOpId op      -> primOpSize op (length val_args)
591 592
           ClassOpId _      -> classOpSize dflags top_args val_args
           _                -> funSize dflags top_args fun (length val_args) voids
593

594
    ------------
595
    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
596 597 598 599 600 601
        -- Don't charge for args, so that wrappers look cheap
        -- (See comments about wrappers with Case)
        --
        -- IMPORATANT: *do* charge 1 for the alternative, else we
        -- find that giant case nests are treated as practically free
        -- A good example is Foreign.C.Error.errrnoToIOError
602 603

    ------------
604 605
        -- These addSize things have to be here because
        -- I don't want to give them bOMB_OUT_SIZE as an argument
606
    addSizeN TooBig          _  = TooBig
607
    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n + m) xs d
608

609
        -- addAltSize is used to add the sizes of case alternatives
610 611 612
    addAltSize TooBig            _      = TooBig
    addAltSize _                 TooBig = TooBig
    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
613
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
614
                                 (xs `unionBags` ys)
615
                                 (d1 + d2) -- Note [addAltSize result discounts]
616 617

        -- This variant ignores the result discount from its LEFT argument
618 619 620 621
        -- It's used when the second argument isn't part of the result
    addSizeNSD TooBig            _      = TooBig
    addSizeNSD _                 TooBig = TooBig
    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
622
        = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
623
                                 (xs `unionBags` ys)
624
                                 d2  -- Ignore d1
625 626 627 628

    isRealWorldId id = idType id `eqType` realWorldStatePrimTy

    -- an expression of type State# RealWorld must be a variable
Peter Wortmann's avatar
Peter Wortmann committed
629 630 631
    isRealWorldExpr (Var id)   = isRealWorldId id
    isRealWorldExpr (Tick _ e) = isRealWorldExpr e
    isRealWorldExpr _          = False
632

633 634 635
-- | Finds a nominal size of a string literal.
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
636
litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
637
litSize (MachStr str)   = 10 + 10 * ((BS.length str + 3) `div` 4)
638 639 640
        -- If size could be 0 then @f "x"@ might be too small
        -- [Sept03: make literal strings a bit bigger to avoid fruitless
        --  duplication of little strings]
641
litSize _other = 0    -- Must match size of nullary constructors
642 643
                      -- Key point: if  x |-> 4, then x must inline unconditionally
                      --            (eg via case binding)
644

645
classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
646
-- See Note [Conlike is interesting]
647
classOpSize _ _ []
648
  = sizeZero
649
classOpSize dflags top_args (arg1 : other_args)
650
  = SizeIs size arg_discount 0
651
  where
652
    size = 20 + (10 * length other_args)
653 654 655 656
    -- If the class op is scrutinising a lambda bound dictionary then
    -- give it a discount, to encourage the inlining of this function
    -- The actual discount is rather arbitrarily chosen
    arg_discount = case arg1 of
657 658 659 660
                     Var dict | dict `elem` top_args
                              -> unitBag (dict, ufDictDiscount dflags)
                     _other   -> emptyBag

661
funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
662 663
-- Size for functions that are not constructors or primops
-- Note [Function applications]
664
funSize dflags top_args fun n_val_args voids
665 666
  | fun `hasKey` buildIdKey   = buildSize
  | fun `hasKey` augmentIdKey = augmentSize
667
  | otherwise = SizeIs size arg_discount res_discount
668 669 670
  where
    some_val_args = n_val_args > 0

671
    size | some_val_args = 10 * (1 + n_val_args - voids)
672
         | otherwise     = 0
673 674 675 676
        -- The 1+ is for the function itself
        -- Add 1 for each non-trivial arg;
        -- the allocation cost, as in let(rec)

677
        --                  DISCOUNTS
678 679
        --  See Note [Function and non-function discounts]
    arg_discount | some_val_args && fun `elem` top_args
680 681 682 683
                 = unitBag (fun, ufFunAppDiscount dflags)
                 | otherwise = emptyBag
        -- If the function is an argument and is applied
        -- to some values, give it an arg-discount
684

685
    res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
686
                 | otherwise                = 0
687 688
        -- If the function is partially applied, show a result discount

689 690
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
691
  | n_val_args == 0 = SizeIs 0 emptyBag 10    -- Like variables
692

693
-- See Note [Unboxed tuple size and result discount]
694
  | isUnboxedTupleCon dc = SizeIs 0 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
695

696
-- See Note [Constructor size and result discount]
697
  | otherwise = SizeIs 10 emptyBag (10 * (1 + n_val_args))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
698

Austin Seipp's avatar
Austin Seipp committed
699
{-
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
Note [Constructor size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Treat a constructors application as size 10, regardless of how many
arguments it has; we are keen to expose them (and we charge separately
for their args).  We can't treat them as size zero, else we find that
(Just x) has size 0, which is the same as a lone variable; and hence
'v' will always be replaced by (Just x), where v is bound to Just x.

The "result discount" is applied if the result of the call is
scrutinised (say by a case).  For a constructor application that will
mean the constructor application will disappear, so we don't need to
charge it to the function.  So the discount should at least match the
cost of the constructor application, namely 10.  But to give a bit
of extra incentive we give a discount of 10*(1 + n_val_args).

715
Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
716
and said it was an "unambiguous win", but its terribly dangerous
Gabor Greif's avatar
Gabor Greif committed
717
because a function with many many case branches, each finishing with
718 719 720 721 722
a constructor, can have an arbitrarily large discount.  This led to
terrible code bloat: see Trac #6099.

Note [Unboxed tuple size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
723 724
However, unboxed tuples count as size zero. I found occasions where we had
        f x y z = case op# x y z of { s -> (# s, () #) }
725 726 727 728 729 730 731 732 733 734 735 736 737
and f wasn't getting inlined.

I tried giving unboxed tuples a *result discount* of zero (see the
commented-out line).  Why?  When returned as a result they do not
allocate, so maybe we don't want to charge so much for them If you
have a non-zero discount here, we find that workers often get inlined
back into wrappers, because it look like
    f x = case $wf x of (# a,b #) -> (a,b)
and we are keener because of the case.  However while this change
shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
more. All other changes were very small. So it's not a big deal but I
didn't adopt the idea.

738 739
Note [Function and non-function discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
740 741 742 743 744
We want a discount if the function is applied. A good example is
monadic combinators with continuation arguments, where inlining is
quite important.

But we don't want a big discount when a function is called many times
745
(see the detailed comments with Trac #6048) because if the function is
746 747 748 749
big it won't be inlined at its many call sites and no benefit results.
Indeed, we can get exponentially big inlinings this way; that is what
Trac #6048 is about.

750 751 752 753 754 755 756 757 758
On the other hand, for data-valued arguments, if there are lots of
case expressions in the body, each one will get smaller if we apply
the function to a constructor application, so we *want* a big discount
if the argument is scrutinised by many case expressions.

Conclusion:
  - For functions, take the max of the discounts
  - For data values, take the sum of the discounts

759

760 761 762 763 764 765
Note [Literal integer size]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Literal integers *can* be big (mkInteger [...coefficients...]), but
need not be (S# n).  We just use an aribitrary big-ish constant here
so that, in particular, we don't inline top-level defns like
   n = S# 5
Gabor Greif's avatar
Gabor Greif committed
766
There's no point in doing so -- any optimisations will see the S#
767 768 769
through n's unfolding.  Nor will a big size inhibit unfoldings functions
that mention a literal Integer, because the float-out pass will float
all those constants to top level.
Austin Seipp's avatar
Austin Seipp committed
770
-}
771

twanvl's avatar
twanvl committed
772
primOpSize :: PrimOp -> Int -> ExprSize
773
primOpSize op n_val_args
774 775 776 777 778
 = if primOpOutOfLine op
      then sizeN (op_size + n_val_args)
      else sizeN op_size
 where
   op_size = primOpCodeSize op
779

780

twanvl's avatar
twanvl committed
781
buildSize :: ExprSize
782
buildSize = SizeIs 0 emptyBag 40
783 784 785 786 787 788
        -- We really want to inline applications of build
        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
        -- Indeed, we should add a result_discount becuause build is
        -- very like a constructor.  We don't bother to check that the
        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
        -- The "4" is rather arbitrary.
789

twanvl's avatar
twanvl committed
790
augmentSize :: ExprSize
791
augmentSize = SizeIs 0 emptyBag 40
792 793
        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
        -- e plus ys. The -2 accounts for the \cn
twanvl's avatar
twanvl committed
794

795
-- When we return a lambda, give a discount if it's used (applied)
796
lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
797
lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (ufFunAppDiscount dflags)
798
lamScrutDiscount _      TooBig          = TooBig
799

Austin Seipp's avatar
Austin Seipp committed
800
{-
801 802 803 804 805
Note [addAltSize result discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When adding the size of alternatives, we *add* the result discounts
too, rather than take the *maximum*.  For a multi-branch case, this
gives a discount for each branch that returns a constructor, making us
806
keener to inline.  I did try using 'max' instead, but it makes nofib
807 808 809
'rewrite' and 'puzzle' allocate significantly more, and didn't make
binary sizes shrink significantly either.

810 811
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
812 813
Constants for discounts and thesholds are defined in main/DynFlags,
all of form ufXxxx.   They are:
814

815
ufCreationThreshold
816 817 818
     At a definition site, if the unfolding is bigger than this, we
     may discard it altogether

819
ufUseThreshold
820 821 822
     At a call site, if the unfolding, less discounts, is smaller than
     this, then it's small enough inline

823
ufKeenessFactor
824
     Factor by which the discounts are multiplied before
825 826
     subtracting from size

827
ufDictDiscount
828 829 830 831
     The discount for each occurrence of a dictionary argument
     as an argument of a class method.  Should be pretty small
     else big functions may get inlined

832
ufFunAppDiscount
833 834 835
     Discount for a function argument that is applied.  Quite
     large, because if we inline we avoid the higher-order call.

836
ufDearOp
837 838
     The size of a foreign call or not-dupable PrimOp

839

840 841 842 843
Note [Function applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In a function application (f a b)

844
  - If 'f' is an argument to the function being analysed,
845 846 847 848
    and there's at least one value arg, record a FunAppDiscount for f

  - If the application if a PAP (arity > 2 in this example)
    record a *result* discount (because inlining
849
    with "extra" args in the call may mean that we now
850 851 852
    get a saturated application)

Code for manipulating sizes
Austin Seipp's avatar
Austin Seipp committed
853
-}
854

855 856 857 858 859 860 861 862 863 864
-- | The size of an candidate expression for unfolding
data ExprSize
    = TooBig
    | SizeIs { _es_size_is  :: {-# UNPACK #-} !Int -- ^ Size found
             , _es_args     :: !(Bag (Id,Int))
               -- ^ Arguments cased herein, and discount for each such
             , _es_discount :: {-# UNPACK #-} !Int
               -- ^ Size to subtract if result is scrutinised by a case
               -- expression
             }
865 866 867

instance Outputable ExprSize where
  ppr TooBig         = ptext (sLit "TooBig")
868
  ppr (SizeIs a _ c) = brackets (int a <+> int c)
869 870 871

-- subtract the discount before deciding whether to bale out. eg. we
-- want to inline a large constructor application into a selector:
872 873
--      tup = (a_1, ..., a_99)
--      x = case tup of ...
874
--
875 876 877
mkSizeIs :: Int -> Int -> Bag (Id, Int) -> Int -> ExprSize
mkSizeIs max n xs d | (n - d) > max = TooBig
                    | otherwise     = SizeIs n xs d
878

879
maxSize :: ExprSize -> ExprSize -> ExprSize
880 881
maxSize TooBig         _                                  = TooBig
maxSize _              TooBig                             = TooBig
882
maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 > n2   = s1
883
                                              | otherwise = s2
884

885
sizeZero :: ExprSize
886 887
sizeN :: Int -> ExprSize

888 889
sizeZero = SizeIs 0 emptyBag 0
sizeN n  = SizeIs n emptyBag 0
890

Austin Seipp's avatar
Austin Seipp committed
891 892 893
{-
************************************************************************
*                                                                      *
894
\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
Austin Seipp's avatar
Austin Seipp committed
895 896
*                                                                      *
************************************************************************
897

898 899 900 901
We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
we ``couldn't possibly use'' on the other side.  Can be overridden w/
flaggery.  Just the same as smallEnoughToInline, except that it has no
actual arguments.
Austin Seipp's avatar
Austin Seipp committed
902
-}
903

904
couldBeSmallEnoughToInline :: DynFlags -> Int -> CoreExpr -> Bool
905
couldBeSmallEnoughToInline dflags threshold rhs
906
  = case sizeExpr dflags threshold [] body of
907 908 909 910
       TooBig -> False
       _      -> True
  where
    (_, body) = collectBinders rhs
911

912
----------------
913 914 915 916
smallEnoughToInline :: DynFlags -> Unfolding -> Bool
smallEnoughToInline dflags (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
  = size <= ufUseThreshold dflags
smallEnoughToInline _ _
917
  = False
918 919

----------------
920 921 922 923
certainlyWillInline :: DynFlags -> Unfolding -> Maybe Unfolding
-- Sees if the unfolding is pretty certain to inline
-- If so, return a *stable* unfolding for it, that will always inline
certainlyWillInline dflags unf@(CoreUnfolding { uf_guidance = guidance, uf_tmpl = expr })
924
  = case guidance of
925 926 927 928 929 930
      UnfNever   -> Nothing
      UnfWhen {} -> Just (unf { uf_src = InlineStable })

      -- The UnfIfGoodArgs case seems important.  If we w/w small functions
      -- binary sizes go up by 10%!  (This is with SplitObjs.)  I'm not totally
      -- sure whyy.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
931
      UnfIfGoodArgs { ug_size = size, ug_args = args }
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
         | not (null args)  -- See Note [certainlyWillInline: be careful of thunks]
         , let arity = length args
         , size - (10 * (arity + 1)) <= ufUseThreshold dflags
         -> Just (unf { uf_src      = InlineStable
                      , uf_guidance = UnfWhen { ug_arity     = arity
                                              , ug_unsat_ok  = unSaturatedOk
                                              , ug_boring_ok = inlineBoringOk expr } })
                -- Note the "unsaturatedOk". A function like  f = \ab. a
                -- will certainly inline, even if partially applied (f e), so we'd
                -- better make sure that the transformed inlining has the same property

      _  -> Nothing

certainlyWillInline _ unf@(DFunUnfolding {})
  = Just unf