DmdAnal.hs 48.1 KB
Newer Older
1 2 3
{-
(c) The GRASP/AQUA Project, Glasgow University, 1993-1998

4

5 6 7
                        -----------------
                        A demand analysis
                        -----------------
8
-}
9

10
{-# LANGUAGE CPP #-}
Ian Lynagh's avatar
Ian Lynagh committed
11

12
module DmdAnal ( dmdAnalProgram ) where
13 14 15

#include "HsVersions.h"

16
import DynFlags
17
import WwLib            ( findTypeShape, deepSplitProductType_maybe )
18
import Demand   -- All of it
19
import CoreSyn
20
import Outputable
21
import VarEnv
22
import BasicTypes
23 24
import FastString
import Data.List
25
import DataCon
26
import Id
27
import CoreUtils        ( exprIsHNF, exprType, exprIsTrivial )
28
import TyCon
29
import Type
30
import FamInstEnv
31
import Util
32 33 34
import Maybes           ( isJust )
import TysWiredIn       ( unboxedPairDataCon )
import TysPrim          ( realWorldStatePrimTy )
Joachim Breitner's avatar
Joachim Breitner committed
35
import ErrUtils         ( dumpIfSet_dyn )
36 37
import Name             ( getName, stableNameCmp )
import Data.Function    ( on )
38

39 40 41
{-
************************************************************************
*                                                                      *
42
\subsection{Top level stuff}
43 44 45
*                                                                      *
************************************************************************
-}
46

47 48
dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram
dmdAnalProgram dflags fam_envs binds
49
  = do {
50
        let { binds_plus_dmds = do_prog binds } ;
Joachim Breitner's avatar
Joachim Breitner committed
51 52
        dumpIfSet_dyn dflags Opt_D_dump_strsigs "Strictness signatures" $
            dumpStrSig binds_plus_dmds ;
53
        return binds_plus_dmds
54 55
    }
  where
56
    do_prog :: CoreProgram -> CoreProgram
57
    do_prog binds = snd $ mapAccumL dmdAnalTopBind (emptyAnalEnv dflags fam_envs) binds
58

59
-- Analyse a (group of) top-level binding(s)
60
dmdAnalTopBind :: AnalEnv
61 62
               -> CoreBind
               -> (AnalEnv, CoreBind)
63 64
dmdAnalTopBind sigs (NonRec id rhs)
  = (extendAnalEnv TopLevel sigs id sig, NonRec id2 rhs2)
65
  where
66 67
    (  _, _, _,   rhs1) = dmdAnalRhs TopLevel Nothing sigs             id rhs
    (sig, _, id2, rhs2) = dmdAnalRhs TopLevel Nothing (nonVirgin sigs) id rhs1
68 69 70
        -- Do two passes to improve CPR information
        -- See comments with ignore_cpr_info in mk_sig_ty
        -- and with extendSigsWithLam
71

72
dmdAnalTopBind sigs (Rec pairs)
73 74
  = (sigs', Rec pairs')
  where
75
    (sigs', _, pairs')  = dmdFix TopLevel sigs pairs
76 77
                -- We get two iterations automatically
                -- c.f. the NonRec case above
78

79 80 81
{-
************************************************************************
*                                                                      *
82
\subsection{The analyser itself}
83 84
*                                                                      *
************************************************************************
85

86 87 88
Note [Ensure demand is strict]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It's important not to analyse e with a lazy demand because
89 90 91 92
a) When we encounter   case s of (a,b) ->
        we demand s with U(d1d2)... but if the overall demand is lazy
        that is wrong, and we'd need to reduce the demand on s,
        which is inconvenient
93
b) More important, consider
94
        f (let x = R in x+x), where f is lazy
95 96 97 98 99 100
   We still want to mark x as demanded, because it will be when we
   enter the let.  If we analyse f's arg with a Lazy demand, we'll
   just mark x as Lazy
c) The application rule wouldn't be right either
   Evaluating (f x) in a L demand does *not* cause
   evaluation of f in a C(L) demand!
101
-}
102

103 104 105 106 107 108
-- If e is complicated enough to become a thunk, its contents will be evaluated
-- at most once, so oneify it.
dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand
dmdTransformThunkDmd e
  | exprIsTrivial e = id
  | otherwise       = oneifyDmd
109 110 111 112

-- Do not process absent demands
-- Otherwise act like in a normal demand analysis
-- See |-* relation in the companion paper
113 114
dmdAnalStar :: AnalEnv
            -> Demand   -- This one takes a *Demand*
115
            -> CoreExpr -> (BothDmdArg, CoreExpr)
116
dmdAnalStar env dmd e
117
  | (cd, defer_and_use) <- toCleanDmd dmd (exprType e)
118 119
  , (dmd_ty, e')        <- dmdAnal env cd e
  = (postProcessDmdTypeM defer_and_use dmd_ty, e')
120 121

-- Main Demand Analsysis machinery
Simon Peyton Jones's avatar
Simon Peyton Jones committed
122
dmdAnal, dmdAnal' :: AnalEnv
123
        -> CleanDemand         -- The main one takes a *CleanDemand*
124 125 126 127 128
        -> CoreExpr -> (DmdType, CoreExpr)

-- The CleanDemand is always strict and not absent
--    See Note [Ensure demand is strict]

Simon Peyton Jones's avatar
Simon Peyton Jones committed
129 130
dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $
                  dmdAnal' env d e
131

Simon Peyton Jones's avatar
Simon Peyton Jones committed
132
dmdAnal' _ _ (Lit lit)     = (nopDmdType, Lit lit)
133
dmdAnal' _ _ (Type ty)     = (nopDmdType, Type ty)      -- Doesn't happen, in fact
Simon Peyton Jones's avatar
Simon Peyton Jones committed
134 135 136
dmdAnal' _ _ (Coercion co) = (nopDmdType, Coercion co)

dmdAnal' env dmd (Var var)
137
  = (dmdTransform env var dmd, Var var)
138

Simon Peyton Jones's avatar
Simon Peyton Jones committed
139
dmdAnal' env dmd (Cast e co)
140 141
  = (dmd_ty, Cast e' co)
  where
142 143 144
    (dmd_ty, e') = dmdAnal env dmd e

{-       ----- I don't get this, so commenting out -------
145
    to_co        = pSnd (coercionKind co)
146
    dmd'
147
      | Just tc <- tyConAppTyCon_maybe to_co
148
      , isRecursiveTyCon tc = cleanEvalDmd
149
      | otherwise           = dmd
150
        -- This coerce usually arises from a recursive
151
        -- newtype, and we don't want to look inside them
152 153 154
        -- for exactly the same reason that we don't look
        -- inside recursive products -- we might not reach
        -- a fixpoint.  So revert to a vanilla Eval demand
155
-}
156

Simon Peyton Jones's avatar
Simon Peyton Jones committed
157
dmdAnal' env dmd (Tick t e)
158
  = (dmd_ty, Tick t e')
159
  where
160
    (dmd_ty, e') = dmdAnal env dmd e
161

Simon Peyton Jones's avatar
Simon Peyton Jones committed
162
dmdAnal' env dmd (App fun (Type ty))
163
  = (fun_ty, App fun' (Type ty))
164
  where
165
    (fun_ty, fun') = dmdAnal env dmd fun
166

Simon Peyton Jones's avatar
Simon Peyton Jones committed
167
dmdAnal' sigs dmd (App fun (Coercion co))
168 169
  = (fun_ty, App fun' (Coercion co))
  where
170
    (fun_ty, fun') = dmdAnal sigs dmd fun
171

172 173
-- Lots of the other code is there to make this
-- beautiful, compositional, application rule :-)
174 175
dmdAnal' env dmd (App fun arg)  -- Non-type arguments
  = let                         -- [Type arg handled above]
176
        call_dmd          = mkCallDmd dmd
177 178 179
        (fun_ty, fun')    = dmdAnal env call_dmd fun
        (arg_dmd, res_ty) = splitDmdTy fun_ty
        (arg_ty, arg')    = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg
180
    in
181 182 183 184 185 186 187 188 189
--    pprTrace "dmdAnal:app" (vcat
--         [ text "dmd =" <+> ppr dmd
--         , text "expr =" <+> ppr (App fun arg)
--         , text "fun dmd_ty =" <+> ppr fun_ty
--         , text "arg dmd =" <+> ppr arg_dmd
--         , text "arg dmd_ty =" <+> ppr arg_ty
--         , text "res dmd_ty =" <+> ppr res_ty
--         , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ])
    (res_ty `bothDmdType` arg_ty, App fun' arg')
190

191
-- this is an anonymous lambda, since @dmdAnalRhs@ uses @collectBinders@
Simon Peyton Jones's avatar
Simon Peyton Jones committed
192
dmdAnal' env dmd (Lam var body)
193
  | isTyVar var
194 195
  = let
        (body_ty, body') = dmdAnal env dmd body
196
    in
197
    (body_ty, Lam var body')
198

199
  | otherwise
200
  = let (body_dmd, defer_and_use@(_,one_shot)) = peelCallDmd dmd
201 202 203
          -- body_dmd  - a demand to analyze the body
          -- one_shot  - one-shotness of the lambda
          --             hence, cardinality of its free vars
204

205 206 207
        env'             = extendSigsWithLam env var
        (body_ty, body') = dmdAnal env' body_dmd body
        (lam_ty, var')   = annotateLamIdBndr env notArgOfDfun body_ty one_shot var
208
    in
209
    (postProcessUnsat defer_and_use lam_ty, Lam var' body')
210

211
dmdAnal' env dmd (Case scrut case_bndr ty [(DataAlt dc, bndrs, rhs)])
212
  -- Only one alternative with a product constructor
213
  | let tycon = dataConTyCon dc
214
  , isProductTyCon tycon
215
  , Just rec_tc' <- checkRecTc (ae_rec_tc env) tycon
216
  = let
217 218 219 220
        env_w_tc      = env { ae_rec_tc = rec_tc' }
        env_alt       = extendAnalEnv NotTopLevel env_w_tc case_bndr case_bndr_sig
        case_bndr_sig = cprProdSig (dataConRepArity dc)
                -- cprProdSig: inside the alternative, the case binder has the CPR property.
221 222 223 224 225 226 227 228 229 230
                -- Meaning that a case on it will successfully cancel.
                -- Example:
                --      f True  x = case x of y { I# x' -> if x' ==# 3 then y else I# 8 }
                --      f False x = I# 3
                --
                -- We want f to have the CPR property:
                --      f b x = case fw b x of { r -> I# r }
                --      fw True  x = case x of y { I# x' -> if x' ==# 3 then x' else 8 }
                --      fw False x = 3

231 232 233 234 235 236
        (rhs_ty, rhs')                  = dmdAnal env_alt dmd rhs
        (alt_ty1, dmds)                 = findBndrsDmds env rhs_ty bndrs
        (alt_ty2, case_bndr_dmd)        = findBndrDmd env False alt_ty1 case_bndr
        id_dmds                         = addCaseBndrDmd case_bndr_dmd dmds
        alt_ty3 | io_hack_reqd dc bndrs = deferAfterIO alt_ty2
                | otherwise             = alt_ty2
237

238 239 240
        -- Compute demand on the scrutinee
        -- See Note [Demand on scrutinee of a product case]
        scrut_dmd          = mkProdDmd (addDataConStrictness dc id_dmds)
241
        (scrut_ty, scrut') = dmdAnal env scrut_dmd scrut
242 243 244
        res_ty             = alt_ty3 `bothDmdType` toBothDmdArg scrut_ty
        case_bndr'         = setIdDemandInfo case_bndr case_bndr_dmd
        bndrs'             = setBndrsDemandInfo bndrs id_dmds
245
    in
246
--    pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut
247 248 249 250
--                                   , text "dmd" <+> ppr dmd
--                                   , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr')
--                                   , text "scrut_dmd" <+> ppr scrut_dmd
--                                   , text "scrut_ty" <+> ppr scrut_ty
251
--                                   , text "alt_ty" <+> ppr alt_ty2
252
--                                   , text "res_ty" <+> ppr res_ty ]) $
253
    (res_ty, Case scrut' case_bndr' ty [(DataAlt dc, bndrs', rhs')])
254

Simon Peyton Jones's avatar
Simon Peyton Jones committed
255
dmdAnal' env dmd (Case scrut case_bndr ty alts)
256
  = let      -- Case expression with multiple alternatives
257
        (alt_tys, alts')     = mapAndUnzip (dmdAnalAlt env dmd case_bndr) alts
258 259
        (scrut_ty, scrut')   = dmdAnal env cleanEvalDmd scrut
        (alt_ty, case_bndr') = annotateBndr env (foldr lubDmdType botDmdType alt_tys) case_bndr
260
        res_ty               = alt_ty `bothDmdType` toBothDmdArg scrut_ty
261
    in
262 263
--    pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut
--                                   , text "scrut_ty" <+> ppr scrut_ty
Simon Peyton Jones's avatar
Simon Peyton Jones committed
264
--                                   , text "alt_tys" <+> ppr alt_tys
265 266 267
--                                   , text "alt_ty" <+> ppr alt_ty
--                                   , text "res_ty" <+> ppr res_ty ]) $
    (res_ty, Case scrut' case_bndr' ty alts')
268

Simon Peyton Jones's avatar
Simon Peyton Jones committed
269
dmdAnal' env dmd (Let (NonRec id rhs) body)
270
  = (body_ty2, Let (NonRec id2 annotated_rhs) body')
271 272
  where
    (sig, lazy_fv, id1, rhs') = dmdAnalRhs NotTopLevel Nothing env id rhs
273
    (body_ty, body')          = dmdAnal (extendAnalEnv NotTopLevel env id sig) dmd body
274
    (body_ty1, id2)           = annotateBndr env body_ty id1
275
    body_ty2                  = addLazyFVs body_ty1 lazy_fv
276 277

    -- Annotate top-level lambdas at RHS basing on the aggregated demand info
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
    -- See Note [Annotating lambdas at right-hand side]
    annotated_rhs = annLamWithShotness (idDemandInfo id2) rhs'

        -- If the actual demand is better than the vanilla call
        -- demand, you might think that we might do better to re-analyse
        -- the RHS with the stronger demand.
        -- But (a) That seldom happens, because it means that *every* path in
        --         the body of the let has to use that stronger demand
        -- (b) It often happens temporarily in when fixpointing, because
        --     the recursive function at first seems to place a massive demand.
        --     But we don't want to go to extra work when the function will
        --     probably iterate to something less demanding.
        -- In practice, all the times the actual demand on id2 is more than
        -- the vanilla call demand seem to be due to (b).  So we don't
        -- bother to re-analyse the RHS.
293

Simon Peyton Jones's avatar
Simon Peyton Jones committed
294
dmdAnal' env dmd (Let (Rec pairs) body)
295
  = let
296 297
        (env', lazy_fv, pairs') = dmdFix NotTopLevel env pairs
        (body_ty, body')        = dmdAnal env' dmd body
298
        body_ty1                = deleteFVs body_ty (map fst pairs)
299
        body_ty2                = addLazyFVs body_ty1 lazy_fv
300
    in
301
    body_ty2 `seq`
302
    (body_ty2,  Let (Rec pairs') body')
303

304 305 306 307 308 309 310
io_hack_reqd :: DataCon -> [Var] -> Bool
-- Note [IO hack in the demand analyser]
--
-- There's a hack here for I/O operations.  Consider
--      case foo x s of { (# s, r #) -> y }
-- Is this strict in 'y'.  Normally yes, but what if 'foo' is an I/O
-- operation that simply terminates the program (not in an erroneous way)?
311
-- In that case we should not evaluate 'y' before the call to 'foo'.
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
-- Hackish solution: spot the IO-like situation and add a virtual branch,
-- as if we had
--      case foo x s of
--         (# s, r #) -> y
--         other      -> return ()
-- So the 'y' isn't necessarily going to be evaluated
--
-- A more complete example (Trac #148, #1592) where this shows up is:
--      do { let len = <expensive> ;
--         ; when (...) (exitWith ExitSuccess)
--         ; print len }
io_hack_reqd con bndrs
  | (bndr:_) <- bndrs
  = con == unboxedPairDataCon &&
    idType bndr `eqType` realWorldStatePrimTy
  | otherwise
  = False

330 331 332 333 334 335 336 337 338
annLamWithShotness :: Demand -> CoreExpr -> CoreExpr
annLamWithShotness d e
  | Just u <- cleanUseDmd_maybe d
  = go u e
  | otherwise = e
  where
    go u e
      | Just (c, u') <- peelUseCall u
      , Lam bndr body <- e
339
      = if isTyVar bndr
340 341 342 343 344 345 346 347
        then Lam bndr                    (go u  body)
        else Lam (setOneShotness c bndr) (go u' body)
      | otherwise
      = e

setOneShotness :: Count -> Id -> Id
setOneShotness One  bndr = setOneShotLambda bndr
setOneShotness Many bndr = bndr
348

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
dmdAnalAlt :: AnalEnv -> CleanDemand -> Id -> Alt Var -> (DmdType, Alt Var)
dmdAnalAlt env dmd case_bndr (con,bndrs,rhs)
  | null bndrs    -- Literals, DEFAULT, and nullary constructors
  , (rhs_ty, rhs') <- dmdAnal env dmd rhs
  = (rhs_ty, (con, [], rhs'))

  | otherwise     -- Non-nullary data constructors
  , (rhs_ty, rhs') <- dmdAnal env dmd rhs
  , (alt_ty, dmds) <- findBndrsDmds env rhs_ty bndrs
  , let case_bndr_dmd = findIdDemand alt_ty case_bndr
        id_dmds       = addCaseBndrDmd case_bndr_dmd dmds
  = (alt_ty, (con, setBndrsDemandInfo bndrs id_dmds, rhs'))

{- Note [Demand on the scrutinee of a product case]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When figuring out the demand on the scrutinee of a product case,
we use the demands of the case alternative, i.e. id_dmds.
But note that these include the demand on the case binder;
see Note [Demand on case-alternative binders] in Demand.hs.
This is crucial. Example:
   f x = case x of y { (a,b) -> k y a }
If we just take scrut_demand = U(L,A), then we won't pass x to the
worker, so the worker will rebuild
     x = (a, absent-error)
and that'll crash.
374

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
Note [Aggregated demand for cardinality]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We use different strategies for strictness and usage/cardinality to
"unleash" demands captured on free variables by bindings. Let us
consider the example:

f1 y = let {-# NOINLINE h #-}
           h = y
       in  (h, h)

We are interested in obtaining cardinality demand U1 on |y|, as it is
used only in a thunk, and, therefore, is not going to be updated any
more. Therefore, the demand on |y|, captured and unleashed by usage of
|h| is U1. However, if we unleash this demand every time |h| is used,
and then sum up the effects, the ultimate demand on |y| will be U1 +
U1 = U. In order to avoid it, we *first* collect the aggregate demand
on |h| in the body of let-expression, and only then apply the demand
transformer:

transf[x](U) = {y |-> U1}

396
so the resulting demand on |y| is U1.
397 398 399 400 401

The situation is, however, different for strictness, where this
aggregating approach exhibits worse results because of the nature of
|both| operation for strictness. Consider the example:

402
f y c =
403
  let h x = y |seq| x
404
   in case of
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
        True  -> h True
        False -> y

It is clear that |f| is strict in |y|, however, the suggested analysis
will infer from the body of |let| that |h| is used lazily (as it is
used in one branch only), therefore lazy demand will be put on its
free variable |y|. Conversely, if the demand on |h| is unleashed right
on the spot, we will get the desired result, namely, that |f| is
strict in |y|.

Note [Annotating lambdas at right-hand side]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Let us take a look at the following example:

g f = let x = 100
          h = \y -> f x y
       in h 5

One can see that |h| is called just once, therefore the RHS of h can
be annotated as a one-shot lambda. This is done by the function
annLamWithShotness *a posteriori*, i.e., basing on the aggregated
usage demand on |h| from the body of |let|-expression, which is C1(U)
in this case.

In other words, for locally-bound lambdas we can infer
430
one-shotness.
431
-}
432

433

434
{-
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
Note [Add demands for strict constructors]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this program (due to Roman):

    data X a = X !a

    foo :: X Int -> Int -> Int
    foo (X a) n = go 0
     where
       go i | i < n     = a + go (i+1)
            | otherwise = 0

We want the worker for 'foo' too look like this:

    $wfoo :: Int# -> Int# -> Int#

with the first argument unboxed, so that it is not eval'd each time
452 453
around the 'go' loop (which would otherwise happen, since 'foo' is not
strict in 'a').  It is sound for the wrapper to pass an unboxed arg
454 455 456 457 458 459 460 461
because X is strict, so its argument must be evaluated.  And if we
*don't* pass an unboxed argument, we can't even repair it by adding a
`seq` thus:

    foo (X a) n = a `seq` go 0

because the seq is discarded (very early) since X is strict!

462
There is the usual danger of reboxing, which as usual we ignore. But
463 464 465 466
if X is monomorphic, and has an UNPACK pragma, then this optimisation
is even more important.  We don't want the wrapper to rebox an unboxed
argument, and pass an Int to $wfoo!

467 468 469 470 471 472 473
We add these extra strict demands to the demand on the *scrutinee* of
the case expression; hence the use of addDataConStrictness when
forming scrut_dmd.  The case alternatives aren't strict in their
sub-components, but simply evaluating the scrutinee to HNF does force
those sub-components.


474 475
************************************************************************
*                                                                      *
476
                    Demand transformer
477 478 479
*                                                                      *
************************************************************************
-}
480

481 482 483 484 485 486
dmdTransform :: AnalEnv         -- The strictness environment
             -> Id              -- The function
             -> CleanDemand     -- The demand on the function
             -> DmdType         -- The demand type of the function in this context
        -- Returned DmdEnv includes the demand on
        -- this function plus demand on its free variables
487

488
dmdTransform env var dmd
489
  | isDataConWorkId var                          -- Data constructor
490
  = dmdTransformDataConSig (idArity var) (idStrictness var) dmd
491

492 493 494 495
  | gopt Opt_DmdTxDictSel (ae_dflags env),
    Just _ <- isClassOpId_maybe var -- Dictionary component selector
  = dmdTransformDictSelSig (idStrictness var) dmd

496
  | isGlobalId var                               -- Imported function
497
  = let res = dmdTransformSig (idStrictness var) dmd in
498
--    pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res])
499
    res
500 501 502

  | Just (sig, top_lvl) <- lookupSigEnv env var  -- Local letrec bound thing
  , let fn_ty = dmdTransformSig sig dmd
503 504
  = -- pprTrace "dmdTransform" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $
    if isTopLevel top_lvl
505 506
    then fn_ty   -- Don't record top level things
    else addVarDmd fn_ty var (mkOnceUsedDmd dmd)
507

508
  | otherwise                                    -- Local non-letrec-bound thing
509
  = unitVarDmd var (mkOnceUsedDmd dmd)
510

511 512 513
{-
************************************************************************
*                                                                      *
514
\subsection{Bindings}
515 516 517
*                                                                      *
************************************************************************
-}
518 519

-- Recursive bindings
520
dmdFix :: TopLevelFlag
521
       -> AnalEnv               -- Does not include bindings for this binding
522
       -> [(Id,CoreExpr)]
523
       -> (AnalEnv, DmdEnv,
524
           [(Id,CoreExpr)])     -- Binders annotated with stricness info
525

526 527 528
dmdFix top_lvl env orig_pairs
  = (updSigEnv env (sigEnv final_env), lazy_fv, pairs')
     -- Return to original virgin state, keeping new signatures
529
  where
530
    bndrs        = map fst orig_pairs
531
    initial_env = addInitialSigs top_lvl env bndrs
532
    (final_env, lazy_fv, pairs') = loop 1 initial_env orig_pairs
533

534
    loop :: Int
535 536 537
         -> AnalEnv                     -- Already contains the current sigs
         -> [(Id,CoreExpr)]
         -> (AnalEnv, DmdEnv, [(Id,CoreExpr)])
538 539 540 541 542
    loop n env pairs
      = -- pprTrace "dmd loop" (ppr n <+> ppr bndrs $$ ppr env) $
        loop' n env pairs

    loop' n env pairs
543
      | found_fixpoint
544
      = (env', lazy_fv, pairs')
545 546 547 548
                -- Note: return pairs', not pairs.   pairs' is the result of
                -- processing the RHSs with sigs (= sigs'), whereas pairs
                -- is the result of processing the RHSs with the *previous*
                -- iteration of sigs.
549

Gabor Greif's avatar
Gabor Greif committed
550
      | n >= 10
551 552 553
      = -- pprTrace "dmdFix loop" (ppr n <+> (vcat
        --                 [ text "Sigs:" <+> ppr [ (id,lookupVarEnv (sigEnv env) id,
        --                                              lookupVarEnv (sigEnv env') id)
Gabor Greif's avatar
Gabor Greif committed
554 555 556
        --                                          | (id,_) <- pairs],
        --                   text "env:" <+> ppr env,
        --                   text "binds:" <+> pprCoreBinding (Rec pairs)]))
557 558 559 560 561 562
        (env, lazy_fv, orig_pairs)      -- Safe output
                -- The lazy_fv part is really important!  orig_pairs has no strictness
                -- info, including nothing about free vars.  But if we have
                --      letrec f = ....y..... in ...f...
                -- where 'y' is free in f, we must record that y is mentioned,
                -- otherwise y will get recorded as absent altogether
563 564

      | otherwise
565
      = loop (n+1) (nonVirgin env') pairs'
566
      where
567 568 569 570 571 572
        found_fixpoint = all (same_sig (sigEnv env) (sigEnv env')) bndrs

        ((env',lazy_fv), pairs') = mapAccumL my_downRhs (env, emptyDmdEnv) pairs
                -- mapAccumL: Use the new signature to do the next pair
                -- The occurrence analyser has arranged them in a good order
                -- so this can significantly reduce the number of iterations needed
573

574 575
        my_downRhs (env, lazy_fv) (id,rhs)
          = ((env', lazy_fv'), (id', rhs'))
576
          where
577 578
            (sig, lazy_fv1, id', rhs') = dmdAnalRhs top_lvl (Just bndrs) env id rhs
            lazy_fv'                   = plusVarEnv_C bothDmd lazy_fv lazy_fv1
579
            env'                       = extendAnalEnv top_lvl env id sig
580

581
    same_sig sigs sigs' var = lookup sigs var == lookup sigs' var
582
    lookup sigs var = case lookupVarEnv sigs var of
583
                        Just (sig,_) -> sig
584
                        Nothing      -> pprPanic "dmdFix" (ppr var)
585

586
-- Non-recursive bindings
587
dmdAnalRhs :: TopLevelFlag
588
           -> Maybe [Id]   -- Just bs <=> recursive, Nothing <=> non-recursive
589 590
           -> AnalEnv -> Id -> CoreExpr
           -> (StrictSig,  DmdEnv, Id, CoreExpr)
591 592
-- Process the RHS of the binding, add the strictness signature
-- to the Id, and augment the environment with the signature as well.
593
dmdAnalRhs top_lvl rec_flag env id rhs
594
  | Just fn <- unpackTrivial rhs   -- See Note [Demand analysis for trivial right-hand sides]
595
  , let fn_str = getStrictness env fn
596 597 598 599 600 601 602
        fn_fv | isLocalId fn = unitVarEnv fn topDmd
              | otherwise    = emptyDmdEnv
        -- Note [Remember to demand the function itself]
        -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        -- fn_fv: don't forget to produce a demand for fn itself
        -- Lacking this caused Trac #9128
        -- The demand is very conservative (topDmd), but that doesn't
603
        -- matter; trivial bindings are usually inlined, so it only
604 605
        -- kicks in for top-level bindings and NOINLINE bindings
  = (fn_str, fn_fv, set_idStrictness env id fn_str, rhs)
606 607

  | otherwise
608 609
  = (sig_ty, lazy_fv, id', mkLams bndrs' body')
  where
610 611 612 613 614 615 616
    (bndrs, body)    = collectBinders rhs
    env_body         = foldl extendSigsWithLam env bndrs
    (body_ty, body') = dmdAnal env_body body_dmd body
    body_ty'         = removeDmdTyArgs body_ty -- zap possible deep CPR info
    (DmdType rhs_fv rhs_dmds rhs_res, bndrs')
                     = annotateLamBndrs env (isDFunId id) body_ty' bndrs
    sig_ty           = mkStrictSig (mkDmdType sig_fv rhs_dmds rhs_res')
617 618
    id'              = set_idStrictness env id sig_ty
        -- See Note [NOINLINE and strictness]
619 620

    -- See Note [Product demands for function body]
621
    body_dmd = case deepSplitProductType_maybe (ae_fam_envs env) (exprType body) of
622 623 624 625 626 627
                 Nothing            -> cleanEvalDmd
                 Just (dc, _, _, _) -> cleanEvalProdDmd (dataConRepArity dc)

    -- See Note [Lazy and unleashable free variables]
    -- See Note [Aggregated demand for cardinality]
    rhs_fv1 = case rec_flag of
628
                Just bs -> reuseEnv (delVarEnvList rhs_fv bs)
629 630 631 632
                Nothing -> rhs_fv

    (lazy_fv, sig_fv) = splitFVs is_thunk rhs_fv1

633 634 635
    rhs_res'  = trimCPRInfo trim_all trim_sums rhs_res
    trim_all  = is_thunk && not_strict
    trim_sums = not (isTopLevel top_lvl) -- See Note [CPR for sum types]
636

637 638
    -- See Note [CPR for thunks]
    is_thunk = not (exprIsHNF rhs)
639 640
    not_strict
       =  isTopLevel top_lvl  -- Top level and recursive things don't
641 642 643
       || isJust rec_flag     -- get their demandInfo set at all
       || not (isStrictDmd (idDemandInfo id) || ae_virgin env)
          -- See Note [Optimistic CPR in the "virgin" case]
644 645 646

unpackTrivial :: CoreExpr -> Maybe Id
-- Returns (Just v) if the arg is really equal to v, modulo
647
-- casts, type applications etc
648
-- See Note [Demand analysis for trivial right-hand sides]
649 650 651 652 653
unpackTrivial (Var v)                 = Just v
unpackTrivial (Cast e _)              = unpackTrivial e
unpackTrivial (Lam v e) | isTyVar v   = unpackTrivial e
unpackTrivial (App e a) | isTypeArg a = unpackTrivial e
unpackTrivial _                       = Nothing
654

655
{-
656 657
Note [Demand analysis for trivial right-hand sides]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
658
Consider
659
        foo = plusInt |> co
660 661
where plusInt is an arity-2 function with known strictness.  Clearly
we want plusInt's strictness to propagate to foo!  But because it has
662 663
no manifest lambdas, it won't do so automatically, and indeed 'co' might
have type (Int->Int->Int) ~ T, so we *can't* eta-expand.  So we have a
664
special case for right-hand sides that are "trivial", namely variables,
665 666 667 668 669 670 671 672
casts, type applications, and the like.

Note that this can mean that 'foo' has an arity that is smaller than that
indicated by its demand info.  e.g. if co :: (Int->Int->Int) ~ T, then
foo's arity will be zero (see Note [exprArity invariant] in CoreArity),
but its demand signature will be that of plusInt. A small example is the
test case of Trac #8963.

673

674 675 676 677 678
Note [Product demands for function body]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example comes from shootout/binary_trees:

    Main.check' = \ b z ds. case z of z' { I# ip ->
679 680 681 682 683 684 685 686 687 688 689
                                case ds_d13s of
                                  Main.Nil -> z'
                                  Main.Node s14k s14l s14m ->
                                    Main.check' (not b)
                                      (Main.check' b
                                         (case b {
                                            False -> I# (-# s14h s14k);
                                            True  -> I# (+# s14h s14k)
                                          })
                                         s14l)
                                     s14m   }   }   }
690 691 692

Here we *really* want to unbox z, even though it appears to be used boxed in
the Nil case.  Partly the Nil case is not a hot path.  But more specifically,
693
the whole function gets the CPR property if we do.
694 695 696 697

So for the demand on the body of a RHS we use a product demand if it's
a product type.

698 699
************************************************************************
*                                                                      *
700
\subsection{Strictness signatures and types}
701 702 703
*                                                                      *
************************************************************************
-}
704

705
unitVarDmd :: Var -> Demand -> DmdType
706
unitVarDmd var dmd
707 708 709 710 711 712 713
  = DmdType (unitVarEnv var dmd) [] topRes

addVarDmd :: DmdType -> Var -> Demand -> DmdType
addVarDmd (DmdType fv ds res) var dmd
  = DmdType (extendVarEnv_C bothDmd fv var dmd) ds res

addLazyFVs :: DmdType -> DmdEnv -> DmdType
714
addLazyFVs dmd_ty lazy_fvs
715
  = dmd_ty `bothDmdType` mkBothDmdArg lazy_fvs
716
        -- Using bothDmdType (rather than just both'ing the envs)
717
        -- is vital.  Consider
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
        --      let f = \x -> (x,y)
        --      in  error (f 3)
        -- Here, y is treated as a lazy-fv of f, but we must `bothDmd` that L
        -- demand with the bottom coming up from 'error'
        --
        -- I got a loop in the fixpointer without this, due to an interaction
        -- with the lazy_fv filtering in dmdAnalRhs.  Roughly, it was
        --      letrec f n x
        --          = letrec g y = x `fatbar`
        --                         letrec h z = z + ...g...
        --                         in h (f (n-1) x)
        --      in ...
        -- In the initial iteration for f, f=Bot
        -- Suppose h is found to be strict in z, but the occurrence of g in its RHS
        -- is lazy.  Now consider the fixpoint iteration for g, esp the demands it
        -- places on its free variables.  Suppose it places none.  Then the
        --      x `fatbar` ...call to h...
        -- will give a x->V demand for x.  That turns into a L demand for x,
        -- which floats out of the defn for h.  Without the modifyEnv, that
        -- L demand doesn't get both'd with the Bot coming up from the inner
        -- call to f.  So we just get an L demand for x for g.
739

740
{-
741
Note [Do not strictify the argument dictionaries of a dfun]
742 743 744 745
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The typechecker can tie recursive knots involving dfuns, so we do the
conservative thing and refrain from strictifying a dfun's argument
dictionaries.
746
-}
747

748 749 750 751 752 753 754
setBndrsDemandInfo :: [Var] -> [Demand] -> [Var]
setBndrsDemandInfo (b:bs) (d:ds)
  | isTyVar b = b : setBndrsDemandInfo bs (d:ds)
  | otherwise = setIdDemandInfo b d : setBndrsDemandInfo bs ds
setBndrsDemandInfo [] ds = ASSERT( null ds ) []
setBndrsDemandInfo bs _  = pprPanic "setBndrsDemandInfo" (ppr bs)

755
annotateBndr :: AnalEnv -> DmdType -> Var -> (DmdType, Var)
756 757 758 759
-- The returned env has the var deleted
-- The returned var is annotated with demand info
-- according to the result demand of the provided demand type
-- No effect on the argument demands
760
annotateBndr env dmd_ty var
761 762
  | isId var  = (dmd_ty', setIdDemandInfo var dmd)
  | otherwise = (dmd_ty, var)
763
  where
764
    (dmd_ty', dmd) = findBndrDmd env False dmd_ty var
765 766 767

annotateLamBndrs :: AnalEnv -> DFunFlag -> DmdType -> [Var] -> (DmdType, [Var])
annotateLamBndrs env args_of_dfun ty bndrs = mapAccumR annotate ty bndrs
768 769
  where
    annotate dmd_ty bndr
770
      | isId bndr = annotateLamIdBndr env args_of_dfun dmd_ty Many bndr
771 772 773
      | otherwise = (dmd_ty, bndr)

annotateLamIdBndr :: AnalEnv
774
                  -> DFunFlag   -- is this lambda at the top of the RHS of a dfun?
775
                  -> DmdType    -- Demand type of body
776
                  -> Count      -- One-shot-ness of the lambda
777 778 779
                  -> Id         -- Lambda binder
                  -> (DmdType,  -- Demand type of lambda
                      Id)       -- and binder annotated with demand
780

781
annotateLamIdBndr env arg_of_dfun dmd_ty one_shot id
782 783 784
-- For lambdas we add the demand to the argument demands
-- Only called for Ids
  = ASSERT( isId id )
Simon Peyton Jones's avatar
Simon Peyton Jones committed
785
    -- pprTrace "annLamBndr" (vcat [ppr id, ppr _dmd_ty]) $
786
    (final_ty, setOneShotness one_shot (setIdDemandInfo id dmd))
787 788 789 790 791 792
  where
      -- Watch out!  See note [Lambda-bound unfoldings]
    final_ty = case maybeUnfoldingTemplate (idUnfolding id) of
                 Nothing  -> main_ty
                 Just unf -> main_ty `bothDmdType` unf_ty
                          where
793
                             (unf_ty, _) = dmdAnalStar env dmd unf
794

795
    main_ty = addDemand dmd dmd_ty'
796
    (dmd_ty', dmd) = findBndrDmd env arg_of_dfun dmd_ty id
797

798 799 800
deleteFVs :: DmdType -> [Var] -> DmdType
deleteFVs (DmdType fvs dmds res) bndrs
  = DmdType (delVarEnvList fvs bndrs) dmds res
801

802
{-
803 804 805 806 807 808 809 810 811 812 813 814 815 816
Note [CPR for sum types]
~~~~~~~~~~~~~~~~~~~~~~~~
At the moment we do not do CPR for let-bindings that
   * non-top level
   * bind a sum type
Reason: I found that in some benchmarks we were losing let-no-escapes,
which messed it all up.  Example
   let j = \x. ....
   in case y of
        True  -> j False
        False -> j True
If we w/w this we get
   let j' = \x. ....
   in case y of
817 818
        True  -> case j' False of { (# a #) -> Just a }
        False -> case j' True of { (# a #) -> Just a }
819 820 821 822
Notice that j' is not a let-no-escape any more.

However this means in turn that the *enclosing* function
may be CPR'd (via the returned Justs).  But in the case of
Krzysztof Gogolewski's avatar
Typos  
Krzysztof Gogolewski committed
823
sums, there may be Nothing alternatives; and that messes
824 825 826 827 828
up the sum-type CPR.

Conclusion: only do this for products.  It's still not
guaranteed OK for products, but sums definitely lose sometimes.

829 830
Note [CPR for thunks]
~~~~~~~~~~~~~~~~~~~~~
831
If the rhs is a thunk, we usually forget the CPR info, because
832
it is presumably shared (else it would have been inlined, and
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
833 834
so we'd lose sharing if w/w'd it into a function).  E.g.

835 836 837
        let r = case expensive of
                  (a,b) -> (b,a)
        in ...
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
838 839 840

If we marked r as having the CPR property, then we'd w/w into

841 842 843 844 845
        let $wr = \() -> case expensive of
                            (a,b) -> (# b, a #)
            r = case $wr () of
                  (# b,a #) -> (b,a)
        in ...
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
846 847

But now r is a thunk, which won't be inlined, so we are no further ahead.
848
But consider
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
849

850 851
        f x = let r = case expensive of (a,b) -> (b,a)
              in if foo r then r else (x,x)
852 853

Does f have the CPR property?  Well, no.
854

855
However, if the strictness analyser has figured out (in a previous
856
iteration) that it's strict, then we DON'T need to forget the CPR info.
857
Instead we can retain the CPR info and do the thunk-splitting transform
858 859 860
(see WorkWrap.splitThunk).

This made a big difference to PrelBase.modInt, which had something like
861 862
        modInt = \ x -> let r = ... -> I# v in
                        ...body strict in r...
863 864 865
r's RHS isn't a value yet; but modInt returns r in various branches, so
if r doesn't have the CPR property then neither does modInt
Another case I found in practice (in Complex.magnitude), looks like this:
866 867
                let k = if ... then I# a else I# b
                in ... body strict in k ....
868
(For this example, it doesn't matter whether k is returned as part of
869
the overall result; but it does matter that k's RHS has the CPR property.)
870
Left to itself, the simplifier will make a join point thus:
871 872
                let $j k = ...body strict in k...
                if ... then $j (I# a) else $j (I# b)
873
With thunk-splitting, we get instead
874 875
                let $j x = let k = I#x in ...body strict in k...
                in if ... then $j a else $j b
876 877 878 879 880
This is much better; there's a good chance the I# won't get allocated.

The difficulty with this is that we need the strictness type to
look at the body... but we now need the body to calculate the demand
on the variable, so we can decide whether its strictness type should
881 882 883 884 885 886
have a CPR in it or not.  Simple solution:
        a) use strictness info from the previous iteration
        b) make sure we do at least 2 iterations, by doing a second
           round for top-level non-recs.  Top level recs will get at
           least 2 iterations except for totally-bottom functions
           which aren't very interesting anyway.
887 888 889

NB: strictly_demanded is never true of a top-level Id, or of a recursive Id.

890
Note [Optimistic CPR in the "virgin" case]
891
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
892 893 894 895
Demand and strictness info are initialized by top elements. However,
this prevents from inferring a CPR property in the first pass of the
analyser, so we keep an explicit flag ae_virgin in the AnalEnv
datatype.
896

897
We can't start with 'not-demanded' (i.e., top) because then consider
898 899 900 901
        f x = let
                  t = ... I# x
              in
              if ... then t else I# y else f x'
902 903 904

In the first iteration we'd have no demand info for x, so assume
not-demanded; then we'd get TopRes for f's CPR info.  Next iteration
905
we'd see that t was demanded, and so give it the CPR property, but by
906 907 908
now f has TopRes, so it will stay TopRes.  Instead, by checking the
ae_virgin flag at the first time round, we say 'yes t is demanded' the
first time.
909 910 911 912 913 914

However, this does mean that for non-recursive bindings we must
iterate twice to be sure of not getting over-optimistic CPR info,
in the case where t turns out to be not-demanded.  This is handled
by dmdAnalTopBind.

915

916 917 918
Note [NOINLINE and strictness]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The strictness analyser used to have a HACK which ensured that NOINLNE
919 920
things were not strictness-analysed.  The reason was unsafePerformIO.
Left to itself, the strictness analyser would discover this strictness