CoreUnfold.hs 56.7 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2 3 4
{-
(c) The University of Glasgow 2006
(c) The AQUA Project, Glasgow University, 1994-1998

Simon Marlow's avatar
Simon Marlow committed
5 6

Core-syntax unfoldings
7 8 9 10 11 12 13 14 15

Unfoldings (which can travel across module boundaries) are in Core
syntax (namely @CoreExpr@s).

The type @Unfolding@ sits ``above'' simply-Core-expressions
unfoldings, capturing ``higher-level'' things we know about a binding,
usually things that the simplifier found out (e.g., ``it's a
literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
find, unsurprisingly, a Core expression.
Austin Seipp's avatar
Austin Seipp committed
16
-}
17

18
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
19

20
module CoreUnfold (
21
        Unfolding, UnfoldingGuidance,   -- Abstract types
22

23
        noUnfolding, mkImplicitUnfolding,
24
        mkUnfolding, mkCoreUnfolding,
25 26 27
        mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
        mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
        mkCompulsoryUnfolding, mkDFunUnfolding,
Simon Peyton Jones's avatar
Simon Peyton Jones committed
28
        specUnfolding,
29

30
        ArgSummary(..),
31

32 33
        couldBeSmallEnoughToInline, inlineBoringOk,
        certainlyWillInline, smallEnoughToInline,
34

35
        callSiteInline, CallCtxt(..),
36

37 38
        -- Reexport from CoreSubst (it only live there so it can be used
        -- by the Very Simple Optimiser)
39
        exprIsConApp_maybe, exprIsLiteral_maybe
40 41
    ) where

42 43
#include "HsVersions.h"

Simon Marlow's avatar
Simon Marlow committed
44
import DynFlags
45
import CoreSyn
46
import PprCore          ()      -- Instances
47
import OccurAnal        ( occurAnalyseExpr )
48
import CoreSubst hiding( substTy )
49
import CoreArity       ( manifestArity, exprBotStrictness_maybe )
Simon Marlow's avatar
Simon Marlow committed
50 51 52 53 54 55
import CoreUtils
import Id
import DataCon
import Literal
import PrimOp
import IdInfo
56
import BasicTypes       ( Arity )
57
import Type
Simon Marlow's avatar
Simon Marlow committed
58
import PrelNames
59
import TysPrim          ( realWorldStatePrimTy )
60
import Bag
61
import Util
62
import FastTypes
63
import FastString
64
import Outputable
65 66
import ForeignCall

67
import qualified Data.ByteString as BS
68
import Data.Maybe
69

Austin Seipp's avatar
Austin Seipp committed
70 71 72
{-
************************************************************************
*                                                                      *
73
\subsection{Making unfoldings}
Austin Seipp's avatar
Austin Seipp committed
74 75 76
*                                                                      *
************************************************************************
-}
77

78 79
mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
mkTopUnfolding dflags = mkUnfolding dflags InlineRhs True {- Top level -}
80

81
mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
82
-- For implicit Ids, do a tiny bit of optimising first
83 84
mkImplicitUnfolding dflags expr
    = mkTopUnfolding dflags False (simpleOptExpr expr)
Simon Marlow's avatar
Simon Marlow committed
85

86 87 88 89 90
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Slight hack: note that mk_inline_rules conservatively sets the
-- top-level flag to True.  It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
91

92 93
mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkSimpleUnfolding dflags = mkUnfolding dflags InlineRhs False False
94

95
mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
96 97 98 99 100
mkDFunUnfolding bndrs con ops
  = DFunUnfolding { df_bndrs = bndrs
                  , df_con = con
                  , df_args = map occurAnalyseExpr ops }
                  -- See Note [Occurrrence analysis of unfoldings]
Simon Marlow's avatar
Simon Marlow committed
101

102 103
mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
mkWwInlineRule expr arity
104
  = mkCoreUnfolding InlineStable True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
105 106 107
                   (simpleOptExpr expr)
                   (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
                            , ug_boring_ok = boringCxtNotOk })
108

twanvl's avatar
twanvl committed
109
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
110
mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
111
  = mkCoreUnfolding InlineCompulsory True
Simon Peyton Jones's avatar
Simon Peyton Jones committed
112 113 114
                    (simpleOptExpr expr)
                    (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
                             , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
115

116 117 118 119 120 121 122 123 124 125 126 127 128
mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
-- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
mkWorkerUnfolding dflags work_fn
                  (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                                 , uf_is_top = top_lvl })
  | isStableSource src
  = mkCoreUnfolding src top_lvl new_tmpl guidance
  where
    new_tmpl = simpleOptExpr (work_fn tmpl)
    guidance = calcUnfoldingGuidance dflags new_tmpl

mkWorkerUnfolding _ _ _ = noUnfolding

129
mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
Simon Peyton Jones's avatar
Simon Peyton Jones committed
130
mkInlineUnfolding mb_arity expr
131
  = mkCoreUnfolding InlineStable
132
                    True         -- Note [Top-level flag on inline rules]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
133
                    expr' guide
134 135
  where
    expr' = simpleOptExpr expr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
136 137 138 139 140 141 142
    guide = case mb_arity of
              Nothing    -> UnfWhen { ug_arity = manifestArity expr'
                                    , ug_unsat_ok = unSaturatedOk
                                    , ug_boring_ok = boring_ok }
              Just arity -> UnfWhen { ug_arity = arity
                                    , ug_unsat_ok = needSaturated
                                    , ug_boring_ok = boring_ok }
143
    boring_ok = inlineBoringOk expr'
144

145 146 147
mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
mkInlinableUnfolding dflags expr
  = mkUnfolding dflags InlineStable True is_bot expr'
148
  where
149 150
    expr' = simpleOptExpr expr
    is_bot = isJust (exprBotStrictness_maybe expr')
Simon Peyton Jones's avatar
Simon Peyton Jones committed
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding
-- See Note [Specialising unfoldings]
specUnfolding _ subst new_bndrs spec_args
              df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args })
  = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs )
    mkDFunUnfolding (new_bndrs ++ extra_bndrs) con
                    (map (substExpr spec_doc subst2) args)
  where
    subst1 = extendSubstList subst (bndrs `zip` spec_args)
    (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs)

specUnfolding _dflags subst new_bndrs spec_args
              (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                             , uf_is_top = top_lvl
                             , uf_guidance = old_guidance })
 | isStableSource src  -- See Note [Specialising unfoldings]
 , UnfWhen { ug_arity = old_arity
           , ug_unsat_ok = unsat_ok
           , ug_boring_ok = boring_ok } <- old_guidance
 = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args
                                     + count isId new_bndrs
                          , ug_unsat_ok = unsat_ok
                          , ug_boring_ok = boring_ok }
       new_tmpl = simpleOptExpr $ mkLams new_bndrs $
                  mkApps (substExpr spec_doc subst tmpl) spec_args
                   -- The beta-redexes created here will be simplified
                   -- away by simplOptExpr in mkUnfolding

   in mkCoreUnfolding src top_lvl new_tmpl guidance

specUnfolding _ _ _ _ _ = noUnfolding

spec_doc :: SDoc
spec_doc = ptext (sLit "specUnfolding")
186

Austin Seipp's avatar
Austin Seipp committed
187
{-
Simon Peyton Jones's avatar
Simon Peyton Jones committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
Note [Specialising unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When we specialise a function for some given type-class arguments, we use
specUnfolding to specialise its unfolding.  Some important points:

* If the original function has a DFunUnfolding, the specialised one
  must do so too!  Otherwise we lose the magic rules that make it
  interact with ClassOps

* There is a bit of hack for INLINABLE functions:
     f :: Ord a => ....
     f = <big-rhs>
     {- INLINEABLE f #-}
  Now if we specialise f, should the specialised version still have
  an INLINEABLE pragma?  If it does, we'll capture a specialised copy
  of <big-rhs> as its unfolding, and that probaby won't inline.  But
  if we don't, the specialised version of <big-rhs> might be small
  enough to inline at a call site. This happens with Control.Monad.liftM3,
  and can cause a lot more allocation as a result (nofib n-body shows this).

  Moreover, keeping the INLINEABLE thing isn't much help, because
  the specialised function (probaby) isn't overloaded any more.

  Conclusion: drop the INLINEALE pragma.  In practice what this means is:
     if a stable unfolding has UnfoldingGuidance of UnfWhen,
        we keep it (so the specialised thing too will always inline)
     if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
        (which arises from INLINEABLE), we discard it
Austin Seipp's avatar
Austin Seipp committed
216
-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
217

218
mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
219
                -> UnfoldingGuidance -> Unfolding
220
-- Occurrence-analyses the expression before capturing it
Simon Peyton Jones's avatar
Simon Peyton Jones committed
221
mkCoreUnfolding src top_lvl expr guidance
222
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
223
                      -- See Note [Occurrrence analysis of unfoldings]
224 225 226
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
227
                    uf_is_conlike   = exprIsConLike    expr,
228 229 230
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_expandable   = exprIsExpandable expr,
                    uf_guidance     = guidance }
231

232 233
mkUnfolding :: DynFlags -> UnfoldingSource -> Bool -> Bool -> CoreExpr
            -> Unfolding
234 235
-- Calculates unfolding guidance
-- Occurrence-analyses the expression before capturing it
236
mkUnfolding dflags src top_lvl is_bottoming expr
237 238 239 240
  | top_lvl && is_bottoming
  , not (exprIsTrivial expr)
  = NoUnfolding    -- See Note [Do not inline top-level bottoming functions]
  | otherwise
241
  = CoreUnfolding { uf_tmpl         = occurAnalyseExpr expr,
242
                      -- See Note [Occurrrence analysis of unfoldings]
243 244 245
                    uf_src          = src,
                    uf_is_top       = top_lvl,
                    uf_is_value     = exprIsHNF        expr,
246
                    uf_is_conlike   = exprIsConLike    expr,
247 248 249
                    uf_expandable   = exprIsExpandable expr,
                    uf_is_work_free = exprIsWorkFree   expr,
                    uf_guidance     = guidance }
250
  where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
251
    guidance = calcUnfoldingGuidance dflags expr
252
        -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
253
        -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
254

Austin Seipp's avatar
Austin Seipp committed
255
{-
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
Note [Occurrence analysis of unfoldings]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We do occurrence-analysis of unfoldings once and for all, when the
unfolding is built, rather than each time we inline them.

But given this decision it's vital that we do
*always* do it.  Consider this unfolding
    \x -> letrec { f = ...g...; g* = f } in body
where g* is (for some strange reason) the loop breaker.  If we don't
occ-anal it when reading it in, we won't mark g as a loop breaker, and
we may inline g entirely in body, dropping its binding, and leaving
the occurrence in f out of scope. This happened in Trac #8892, where
the unfolding in question was a DFun unfolding.

But more generally, the simplifier is designed on the
basis that it is looking at occurrence-analysed expressions, so better
ensure that they acutally are.

274 275 276 277 278 279 280 281 282 283
Note [Calculate unfolding guidance on the non-occ-anal'd expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Notice that we give the non-occur-analysed expression to
calcUnfoldingGuidance.  In some ways it'd be better to occur-analyse
first; for example, sometimes during simplification, there's a large
let-bound thing which has been substituted, and so is now dead; so
'expr' contains two copies of the thing while the occurrence-analysed
expression doesn't.

Nevertheless, we *don't* and *must not* occ-analyse before computing
284
the size because
285 286 287 288

a) The size computation bales out after a while, whereas occurrence
   analysis does not.

289 290
b) Residency increases sharply if you occ-anal first.  I'm not
   100% sure why, but it's a large effect.  Compiling Cabal went
291 292 293 294 295 296 297
   from residency of 534M to over 800M with this one change.

This can occasionally mean that the guidance is very pessimistic;
it gets fixed up next round.  And it should be rare, because large
let-bound things that are dead are usually caught by preInlineUnconditionally


Austin Seipp's avatar
Austin Seipp committed
298 299
************************************************************************
*                                                                      *
300
\subsection{The UnfoldingGuidance type}
Austin Seipp's avatar
Austin Seipp committed
301 302 303
*                                                                      *
************************************************************************
-}
304

305 306
inlineBoringOk :: CoreExpr -> Bool
-- See Note [INLINE for small functions]
307
-- True => the result of inlining the expression is
308 309 310 311 312 313 314 315 316 317 318 319
--         no bigger than the expression itself
--     eg      (\x y -> f y x)
-- This is a quick and dirty version. It doesn't attempt
-- to deal with  (\x y z -> x (y z))
-- The really important one is (x `cast` c)
inlineBoringOk e
  = go 0 e
  where
    go :: Int -> CoreExpr -> Bool
    go credit (Lam x e) | isId x           = go (credit+1) e
                        | otherwise        = go credit e
    go credit (App f (Type {}))            = go credit f
320
    go credit (App f a) | credit > 0
321
                        , exprIsTrivial a  = go (credit-1) f
322
    go credit (Tick _ e)                 = go credit e -- dubious
323 324 325
    go credit (Cast e _)                   = go credit e
    go _      (Var {})                     = boringCxtOk
    go _      _                            = boringCxtNotOk
326

327
calcUnfoldingGuidance
328 329
        :: DynFlags
        -> CoreExpr    -- Expression to look at
Simon Peyton Jones's avatar
Simon Peyton Jones committed
330
        -> UnfoldingGuidance
Peter Wortmann's avatar
Peter Wortmann committed
331 332 333
calcUnfoldingGuidance dflags (Tick t expr)
  | not (tickishIsCode t)  -- non-code ticks don't matter for unfolding
  = calcUnfoldingGuidance dflags expr
334
calcUnfoldingGuidance dflags expr
Simon Peyton Jones's avatar
Simon Peyton Jones committed
335 336 337 338 339 340 341 342 343 344
  = case sizeExpr dflags (iUnbox bOMB_OUT_SIZE) val_bndrs body of
      TooBig -> UnfNever
      SizeIs size cased_bndrs scrut_discount
        | uncondInline expr n_val_bndrs (iBox size)
        -> UnfWhen { ug_unsat_ok = unSaturatedOk
                   , ug_boring_ok =  boringCxtOk
                   , ug_arity = n_val_bndrs }   -- Note [INLINE for small functions]
        | otherwise
        -> UnfIfGoodArgs { ug_args  = map (mk_discount cased_bndrs) val_bndrs
                         , ug_size  = iBox size
345
                         , ug_res   = iBox scrut_discount }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
346 347 348 349 350 351 352 353 354 355

  where
    (bndrs, body) = collectBinders expr
    bOMB_OUT_SIZE = ufCreationThreshold dflags
           -- Bomb out if size gets bigger than this
    val_bndrs   = filter isId bndrs
    n_val_bndrs = length val_bndrs

    mk_discount :: Bag (Id,Int) -> Id -> Int
    mk_discount cbs bndr = foldlBag combine 0 cbs
356
           where
Simon Peyton Jones's avatar
Simon Peyton Jones committed
357
             combine acc (bndr', disc)
358 359
               | bndr == bndr' = acc `plus_disc` disc
               | otherwise     = acc
Simon Peyton Jones's avatar
Simon Peyton Jones committed
360

361 362 363 364
             plus_disc :: Int -> Int -> Int
             plus_disc | isFunTy (idType bndr) = max
                       | otherwise             = (+)
             -- See Note [Function and non-function discounts]
365

Austin Seipp's avatar
Austin Seipp committed
366
{-
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
Note [Computing the size of an expression]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
heuristics right has taken a long time.  Here's the basic strategy:

    * Variables, literals: 0
      (Exception for string literals, see litSize.)

    * Function applications (f e1 .. en): 1 + #value args

    * Constructor applications: 1, regardless of #args

    * Let(rec): 1 + size of components

    * Note, cast: 0

Examples

385
  Size  Term
386
  --------------
387 388
    0     42#
    0     x
389
    0     True
390 391 392
    2     f x
    1     Just x
    4     f (g x)
393 394

Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
395
a function call to account for.  Notice also that constructor applications
396 397
are very cheap, because exposing them to a caller is so valuable.

398 399 400 401
[25/5/11] All sizes are now multiplied by 10, except for primops
(which have sizes like 1 or 4.  This makes primops look fantastically
cheap, and seems to be almost unversally beneficial.  Done partly as a
result of #4978.
402 403 404

Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
405
The FloatOut pass has gone to some trouble to float out calls to 'error'
406 407 408 409
and similar friends.  See Note [Bottoming floats] in SetLevels.
Do not re-inline them!  But we *do* still inline if they are very small
(the uncondInline stuff).

410 411
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412
Consider        {-# INLINE f #-}
413 414 415 416 417 418 419 420 421
                f x = Just x
                g y = f y
Then f's RHS is no larger than its LHS, so we should inline it into
even the most boring context.  In general, f the function is
sufficiently small that its body is as small as the call itself, the
inline unconditionally, regardless of how boring the context is.

Things to note:

422 423
(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
    than the thing it's replacing.  Notice that
424 425 426 427 428
      (f x) --> (g 3)             -- YES, unconditionally
      (f x) --> x : []            -- YES, *even though* there are two
                                  --      arguments to the cons
      x     --> g 3               -- NO
      x     --> Just v            -- NO
429

430 431 432 433 434 435 436 437 438 439 440
    It's very important not to unconditionally replace a variable by
    a non-atomic term.

(2) We do this even if the thing isn't saturated, else we end up with the
    silly situation that
       f x y = x
       ...map (f 3)...
    doesn't inline.  Even in a boring context, inlining without being
    saturated will give a lambda instead of a PAP, and will be more
    efficient at runtime.

Simon Peyton Jones's avatar
Simon Peyton Jones committed
441
(3) However, when the function's arity > 0, we do insist that it
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
    has at least one value argument at the call site.  (This check is
    made in the UnfWhen case of callSiteInline.) Otherwise we find this:
         f = /\a \x:a. x
         d = /\b. MkD (f b)
    If we inline f here we get
         d = /\b. MkD (\x:b. x)
    and then prepareRhs floats out the argument, abstracting the type
    variables, so we end up with the original again!

(4) We must be much more cautious about arity-zero things. Consider
       let x = y +# z in ...
    In *size* terms primops look very small, because the generate a
    single instruction, but we do not want to unconditionally replace
    every occurrence of x with (y +# z).  So we only do the
    unconditional-inline thing for *trivial* expressions.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
457

458 459 460
    NB: you might think that PostInlineUnconditionally would do this
    but it doesn't fire for top-level things; see SimplUtils
    Note [Top level and postInlineUnconditionally]
Austin Seipp's avatar
Austin Seipp committed
461
-}
462

463
uncondInline :: CoreExpr -> Arity -> Int -> Bool
464 465
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
466
-- See Note [INLINE for small functions]
467
uncondInline rhs arity size
468 469
  | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
  | otherwise = exprIsTrivial rhs        -- See Note [INLINE for small functions] (4)
470

471
sizeExpr :: DynFlags
472 473 474 475 476
         -> FastInt         -- Bomb out if it gets bigger than this
         -> [Id]            -- Arguments; we're interested in which of these
                            -- get case'd
         -> CoreExpr
         -> ExprSize
477

478 479
-- Note [Computing the size of an expression]

480
sizeExpr dflags bOMB_OUT_SIZE top_args expr
481 482
  = size_up expr
  where
483
    size_up (Cast e _) = size_up e
484
    size_up (Tick _ e) = size_up e
485
    size_up (Type _)   = sizeZero           -- Types cost nothing
486
    size_up (Coercion _) = sizeZero
487
    size_up (Lit lit)  = sizeN (litSize lit)
488 489 490 491
    size_up (Var f) | isRealWorldId f = sizeZero
                      -- Make sure we get constructor discounts even
                      -- on nullary constructors
                    | otherwise       = size_up_call f [] 0
Simon Marlow's avatar
Simon Marlow committed
492

493 494 495 496
    size_up (App fun arg)
      | isTyCoArg arg = size_up fun
      | otherwise     = size_up arg  `addSizeNSD`
                        size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
497

498 499 500
    size_up (Lam b e)
      | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
      | otherwise = size_up e
501 502

    size_up (Let (NonRec binder rhs) body)
503 504
      = size_up rhs             `addSizeNSD`
        size_up body            `addSizeN`
505
        (if isUnLiftedType (idType binder) then 0 else 10)
506 507
                -- For the allocation
                -- If the binder has an unlifted type there is no allocation
508 509

    size_up (Let (Rec pairs) body)
510
      = foldr (addSizeNSD . size_up . snd)
511
              (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
512
              pairs
513

514 515 516 517 518 519 520 521 522 523 524 525 526 527
    size_up (Case (Var v) _ _ alts)
        | v `elem` top_args             -- We are scrutinising an argument variable
        = alts_size (foldr addAltSize sizeZero alt_sizes)
                    (foldr maxSize    sizeZero alt_sizes)
                -- Good to inline if an arg is scrutinised, because
                -- that may eliminate allocation in the caller
                -- And it eliminates the case itself
        where
          alt_sizes = map size_up_alt alts

                -- alts_size tries to compute a good discount for
                -- the case when we are scrutinising an argument variable
          alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
                    (SizeIs max _        _)          -- Size of biggest alternative
528
                = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut
529 530 531 532 533 534
                        -- If the variable is known, we produce a discount that
                        -- will take us back to 'max', the size of the largest alternative
                        -- The 1+ is a little discount for reduced allocation in the caller
                        --
                        -- Notice though, that we return tot_disc, the total discount from
                        -- all branches.  I think that's right.
535

536
          alts_size tot_size _ = tot_size
537

Simon Marlow's avatar
Simon Marlow committed
538
    size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
539 540 541
                                foldr (addAltSize . size_up_alt) case_size alts
      where
          case_size
542
           | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
           | otherwise = sizeZero
                -- Normally we don't charge for the case itself, but
                -- we charge one per alternative (see size_up_alt,
                -- below) to account for the cost of the info table
                -- and comparisons.
                --
                -- However, in certain cases (see is_inline_scrut
                -- below), no code is generated for the case unless
                -- there are multiple alts.  In these cases we
                -- subtract one, making the first alt free.
                -- e.g. case x# +# y# of _ -> ...   should cost 1
                --      case touch# x# of _ -> ...  should cost 0
                -- (see #4978)
                --
                -- I would like to not have the "not (lengthExceeds alts 1)"
                -- condition above, but without that some programs got worse
                -- (spectral/hartel/event and spectral/para).  I don't fully
                -- understand why. (SDM 24/5/11)

                -- unboxed variables, inline primops and unsafe foreign calls
                -- are all "inline" things:
          is_inline_scrut (Var v) = isUnLiftedType (idType v)
          is_inline_scrut scrut
              | (Var f, _) <- collectArgs scrut
                = case idDetails f of
                    FCallId fc  -> not (isSafeForeignCall fc)
                    PrimOpId op -> not (primOpOutOfLine op)
                    _other      -> False
              | otherwise
                = False
573

574
    ------------
575
    -- size_up_app is used when there's ONE OR MORE value args
576
    size_up_app (App fun arg) args voids
577 578 579
        | isTyCoArg arg                  = size_up_app fun args voids
        | isRealWorldExpr arg            = size_up_app fun (arg:args) (voids + 1)
        | otherwise                      = size_up arg  `addSizeNSD`
580 581
                                           size_up_app fun (arg:args) voids
    size_up_app (Var fun)     args voids = size_up_call fun args voids
Peter Wortmann's avatar
Peter Wortmann committed
582
    size_up_app (Tick _ expr) args voids = size_up_app expr args voids
583
    size_up_app other         args voids = size_up other `addSizeN` (length args - voids)
584

585
    ------------
586 587
    size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
    size_up_call fun val_args voids
588
       = case idDetails fun of
589
           FCallId _        -> sizeN (10 * (1 + length val_args))
590 591
           DataConWorkId dc -> conSize    dc (length val_args)
           PrimOpId op      -> primOpSize op (length val_args)
592 593
           ClassOpId _      -> classOpSize dflags top_args val_args
           _                -> funSize dflags top_args fun (length val_args) voids
594

595
    ------------
596
    size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
597 598 599 600 601 602
        -- Don't charge for args, so that wrappers look cheap
        -- (See comments about wrappers with Case)
        --
        -- IMPORATANT: *do* charge 1 for the alternative, else we
        -- find that giant case nests are treated as practically free
        -- A good example is Foreign.C.Error.errrnoToIOError
603 604

    ------------
605 606
        -- These addSize things have to be here because
        -- I don't want to give them bOMB_OUT_SIZE as an argument
607
    addSizeN TooBig          _  = TooBig
608 609
    addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d

610
        -- addAltSize is used to add the sizes of case alternatives
611 612 613 614 615
    addAltSize TooBig            _      = TooBig
    addAltSize _                 TooBig = TooBig
    addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
        = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
                                 (xs `unionBags` ys)
616 617 618
                                 (d1 +# d2)   -- Note [addAltSize result discounts]

        -- This variant ignores the result discount from its LEFT argument
619 620 621 622 623 624
        -- It's used when the second argument isn't part of the result
    addSizeNSD TooBig            _      = TooBig
    addSizeNSD _                 TooBig = TooBig
    addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
        = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
                                 (xs `unionBags` ys)
625
                                 d2  -- Ignore d1
626 627 628 629

    isRealWorldId id = idType id `eqType` realWorldStatePrimTy

    -- an expression of type State# RealWorld must be a variable
Peter Wortmann's avatar
Peter Wortmann committed
630 631 632
    isRealWorldExpr (Var id)   = isRealWorldId id
    isRealWorldExpr (Tick _ e) = isRealWorldExpr e
    isRealWorldExpr _          = False
633

634 635 636
-- | Finds a nominal size of a string literal.
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
637
litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
638
litSize (MachStr str)   = 10 + 10 * ((BS.length str + 3) `div` 4)
639 640 641
        -- If size could be 0 then @f "x"@ might be too small
        -- [Sept03: make literal strings a bit bigger to avoid fruitless
        --  duplication of little strings]
642
litSize _other = 0    -- Must match size of nullary constructors
643 644
                      -- Key point: if  x |-> 4, then x must inline unconditionally
                      --            (eg via case binding)
645

646
classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
647
-- See Note [Conlike is interesting]
648
classOpSize _ _ []
649
  = sizeZero
650
classOpSize dflags top_args (arg1 : other_args)
651 652
  = SizeIs (iUnbox size) arg_discount (_ILIT(0))
  where
653
    size = 20 + (10 * length other_args)
654 655 656 657
    -- If the class op is scrutinising a lambda bound dictionary then
    -- give it a discount, to encourage the inlining of this function
    -- The actual discount is rather arbitrarily chosen
    arg_discount = case arg1 of
658 659 660 661
                     Var dict | dict `elem` top_args
                              -> unitBag (dict, ufDictDiscount dflags)
                     _other   -> emptyBag

662
funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
663 664
-- Size for functions that are not constructors or primops
-- Note [Function applications]
665
funSize dflags top_args fun n_val_args voids
666 667 668 669 670 671
  | fun `hasKey` buildIdKey   = buildSize
  | fun `hasKey` augmentIdKey = augmentSize
  | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
  where
    some_val_args = n_val_args > 0

672
    size | some_val_args = 10 * (1 + n_val_args - voids)
673
         | otherwise     = 0
674 675 676 677
        -- The 1+ is for the function itself
        -- Add 1 for each non-trivial arg;
        -- the allocation cost, as in let(rec)

678
        --                  DISCOUNTS
679 680
        --  See Note [Function and non-function discounts]
    arg_discount | some_val_args && fun `elem` top_args
681 682 683 684
                 = unitBag (fun, ufFunAppDiscount dflags)
                 | otherwise = emptyBag
        -- If the function is an argument and is applied
        -- to some values, give it an arg-discount
685

686
    res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
687
                 | otherwise                = 0
688 689
        -- If the function is partially applied, show a result discount

690 691
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
692
  | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10))    -- Like variables
693

694
-- See Note [Unboxed tuple size and result discount]
695
  | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args)))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
696

697 698
-- See Note [Constructor size and result discount]
  | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (1 + n_val_args)))
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
699

Austin Seipp's avatar
Austin Seipp committed
700
{-
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
Note [Constructor size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Treat a constructors application as size 10, regardless of how many
arguments it has; we are keen to expose them (and we charge separately
for their args).  We can't treat them as size zero, else we find that
(Just x) has size 0, which is the same as a lone variable; and hence
'v' will always be replaced by (Just x), where v is bound to Just x.

The "result discount" is applied if the result of the call is
scrutinised (say by a case).  For a constructor application that will
mean the constructor application will disappear, so we don't need to
charge it to the function.  So the discount should at least match the
cost of the constructor application, namely 10.  But to give a bit
of extra incentive we give a discount of 10*(1 + n_val_args).

716
Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
717
and said it was an "unambiguous win", but its terribly dangerous
Gabor Greif's avatar
Gabor Greif committed
718
because a function with many many case branches, each finishing with
719 720 721 722 723
a constructor, can have an arbitrarily large discount.  This led to
terrible code bloat: see Trac #6099.

Note [Unboxed tuple size and result discount]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
724 725
However, unboxed tuples count as size zero. I found occasions where we had
        f x y z = case op# x y z of { s -> (# s, () #) }
726 727 728 729 730 731 732 733 734 735 736 737 738
and f wasn't getting inlined.

I tried giving unboxed tuples a *result discount* of zero (see the
commented-out line).  Why?  When returned as a result they do not
allocate, so maybe we don't want to charge so much for them If you
have a non-zero discount here, we find that workers often get inlined
back into wrappers, because it look like
    f x = case $wf x of (# a,b #) -> (a,b)
and we are keener because of the case.  However while this change
shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
more. All other changes were very small. So it's not a big deal but I
didn't adopt the idea.

739 740
Note [Function and non-function discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
741 742 743 744 745
We want a discount if the function is applied. A good example is
monadic combinators with continuation arguments, where inlining is
quite important.

But we don't want a big discount when a function is called many times
746
(see the detailed comments with Trac #6048) because if the function is
747 748 749 750
big it won't be inlined at its many call sites and no benefit results.
Indeed, we can get exponentially big inlinings this way; that is what
Trac #6048 is about.

751 752 753 754 755 756 757 758 759
On the other hand, for data-valued arguments, if there are lots of
case expressions in the body, each one will get smaller if we apply
the function to a constructor application, so we *want* a big discount
if the argument is scrutinised by many case expressions.

Conclusion:
  - For functions, take the max of the discounts
  - For data values, take the sum of the discounts

760

761 762 763 764 765 766
Note [Literal integer size]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Literal integers *can* be big (mkInteger [...coefficients...]), but
need not be (S# n).  We just use an aribitrary big-ish constant here
so that, in particular, we don't inline top-level defns like
   n = S# 5
Gabor Greif's avatar
Gabor Greif committed
767
There's no point in doing so -- any optimisations will see the S#
768 769 770
through n's unfolding.  Nor will a big size inhibit unfoldings functions
that mention a literal Integer, because the float-out pass will float
all those constants to top level.
Austin Seipp's avatar
Austin Seipp committed
771
-}
772

twanvl's avatar
twanvl committed
773
primOpSize :: PrimOp -> Int -> ExprSize
774
primOpSize op n_val_args
775 776 777 778 779
 = if primOpOutOfLine op
      then sizeN (op_size + n_val_args)
      else sizeN op_size
 where
   op_size = primOpCodeSize op
780

781

twanvl's avatar
twanvl committed
782
buildSize :: ExprSize
783
buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
784 785 786 787 788 789
        -- We really want to inline applications of build
        -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
        -- Indeed, we should add a result_discount becuause build is
        -- very like a constructor.  We don't bother to check that the
        -- build is saturated (it usually is).  The "-2" discounts for the \c n,
        -- The "4" is rather arbitrary.
790

twanvl's avatar
twanvl committed
791
augmentSize :: ExprSize
792
augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
793 794
        -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
        -- e plus ys. The -2 accounts for the \cn
twanvl's avatar
twanvl committed
795

796
-- When we return a lambda, give a discount if it's used (applied)
797 798 799
lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (iUnbox (ufFunAppDiscount dflags))
lamScrutDiscount _      TooBig          = TooBig
800

Austin Seipp's avatar
Austin Seipp committed
801
{-
802 803 804 805 806
Note [addAltSize result discounts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When adding the size of alternatives, we *add* the result discounts
too, rather than take the *maximum*.  For a multi-branch case, this
gives a discount for each branch that returns a constructor, making us
807
keener to inline.  I did try using 'max' instead, but it makes nofib
808 809 810
'rewrite' and 'puzzle' allocate significantly more, and didn't make
binary sizes shrink significantly either.

811 812
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
813 814
Constants for discounts and thesholds are defined in main/DynFlags,
all of form ufXxxx.   They are:
815

816
ufCreationThreshold
817 818 819
     At a definition site, if the unfolding is bigger than this, we
     may discard it altogether

820
ufUseThreshold
821 822 823
     At a call site, if the unfolding, less discounts, is smaller than
     this, then it's small enough inline

824
ufKeenessFactor
825
     Factor by which the discounts are multiplied before
826 827
     subtracting from size

828
ufDictDiscount
829 830 831 832
     The discount for each occurrence of a dictionary argument
     as an argument of a class method.  Should be pretty small
     else big functions may get inlined

833
ufFunAppDiscount
834 835 836
     Discount for a function argument that is applied.  Quite
     large, because if we inline we avoid the higher-order call.

837
ufDearOp
838 839
     The size of a foreign call or not-dupable PrimOp

840

841 842 843 844
Note [Function applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In a function application (f a b)

845
  - If 'f' is an argument to the function being analysed,
846 847 848 849
    and there's at least one value arg, record a FunAppDiscount for f

  - If the application if a PAP (arity > 2 in this example)
    record a *result* discount (because inlining
850
    with "extra" args in the call may mean that we now
851 852 853
    get a saturated application)

Code for manipulating sizes
Austin Seipp's avatar
Austin Seipp committed
854
-}
855 856

data ExprSize = TooBig
857 858 859 860
              | SizeIs FastInt          -- Size found
                       !(Bag (Id,Int))  -- Arguments cased herein, and discount for each such
                       FastInt          -- Size to subtract if result is scrutinised
                                        -- by a case expression
861 862 863 864 865 866 867

instance Outputable ExprSize where
  ppr TooBig         = ptext (sLit "TooBig")
  ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))

-- subtract the discount before deciding whether to bale out. eg. we
-- want to inline a large constructor application into a selector:
868 869
--      tup = (a_1, ..., a_99)
--      x = case tup of ...
870 871 872
--
mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
mkSizeIs max n xs d | (n -# d) ># max = TooBig
873 874
                    | otherwise       = SizeIs n xs d

875
maxSize :: ExprSize -> ExprSize -> ExprSize
876 877
maxSize TooBig         _                                  = TooBig
maxSize _              TooBig                             = TooBig
878
maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
879
                                              | otherwise = s2
880

881
sizeZero :: ExprSize
882 883 884 885 886
sizeN :: Int -> ExprSize

sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))

Austin Seipp's avatar
Austin Seipp committed
887 888 889
{-
************************************************************************
*                                                                      *
890
\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
Austin Seipp's avatar
Austin Seipp committed
891 892
*                                                                      *
************************************************************************
893

894 895 896 897
We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
we ``couldn't possibly use'' on the other side.  Can be overridden w/
flaggery.  Just the same as smallEnoughToInline, except that it has no
actual arguments.
Austin Seipp's avatar
Austin Seipp committed
898
-}
899

900
couldBeSmallEnoughToInline :: DynFlags -> Int -> CoreExpr -> Bool
901
couldBeSmallEnoughToInline dflags threshold rhs
902
  = case sizeExpr dflags (iUnbox threshold) [] body of
903 904 905 906
       TooBig -> False
       _      -> True
  where
    (_, body) = collectBinders rhs
907

908
----------------
909 910 911 912
smallEnoughToInline :: DynFlags -> Unfolding -> Bool
smallEnoughToInline dflags (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
  = size <= ufUseThreshold dflags
smallEnoughToInline _ _
913
  = False
914 915

----------------
916 917 918 919
certainlyWillInline :: DynFlags -> Unfolding -> Maybe Unfolding
-- Sees if the unfolding is pretty certain to inline
-- If so, return a *stable* unfolding for it, that will always inline
certainlyWillInline dflags unf@(CoreUnfolding { uf_guidance = guidance, uf_tmpl = expr })
920
  = case guidance of
921 922 923 924 925 926
      UnfNever   -> Nothing
      UnfWhen {} -> Just (unf { uf_src = InlineStable })

      -- The UnfIfGoodArgs case seems important.  If we w/w small functions
      -- binary sizes go up by 10%!  (This is with SplitObjs.)  I'm not totally
      -- sure whyy.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
927
      UnfIfGoodArgs { ug_size = size, ug_args = args }
928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
         | not (null args)  -- See Note [certainlyWillInline: be careful of thunks]
         , let arity = length args
         , size - (10 * (arity + 1)) <= ufUseThreshold dflags
         -> Just (unf { uf_src      = InlineStable
                      , uf_guidance = UnfWhen { ug_arity     = arity
                                              , ug_unsat_ok  = unSaturatedOk
                                              , ug_boring_ok = inlineBoringOk expr } })
                -- Note the "unsaturatedOk". A function like  f = \ab. a
                -- will certainly inline, even if partially applied (f e), so we'd
                -- better make sure that the transformed inlining has the same property

      _  -> Nothing

certainlyWillInline _ unf@(DFunUnfolding {})
  = Just unf
943

944
certainlyWillInline _ _
945
  = Nothing
946

Austin Seipp's avatar
Austin Seipp committed
947
{-
948
Note [certainlyWillInline: be careful of thunks]
949 950 951 952 953 954
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Don't claim that thunks will certainly inline, because that risks work
duplication.  Even if the work duplication is not great (eg is_cheap
holds), it can make a big difference in an inner loop In Trac #5623 we
found that the WorkWrap phase thought that
       y = case x of F# v -> F# (v +# v)
955
was certainlyWillInline, so the addition got duplicated.
956 957


Austin Seipp's avatar
Austin Seipp committed
958 959
************************************************************************
*                                                                      *
960
\subsection{callSiteInline}
Austin Seipp's avatar
Austin Seipp committed
961 962
*                                                                      *
************************************************************************
963 964 965 966 967 968

This is the key function.  It decides whether to inline a variable at a call site

callSiteInline is used at call sites, so it is a bit more generous.
It's a very important function that embodies lots of heuristics.
A non-WHNF can be inlined if it doesn't occur inside a lambda,