OccurAnal.hs 85.8 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2
{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3

Austin Seipp's avatar
Austin Seipp committed
4 5
************************************************************************
*                                                                      *
6
\section[OccurAnal]{Occurrence analysis pass}
Austin Seipp's avatar
Austin Seipp committed
7 8
*                                                                      *
************************************************************************
9

10 11
The occurrence analyser re-typechecks a core expression, returning a new
core expression with (hopefully) improved usage information.
Austin Seipp's avatar
Austin Seipp committed
12
-}
13

14 15
{-# LANGUAGE CPP, BangPatterns #-}

16
module OccurAnal (
17
        occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 19
    ) where

20
#include "HsVersions.h"
21 22

import CoreSyn
23
import CoreFVs
Peter Wortmann's avatar
Peter Wortmann committed
24 25
import CoreUtils        ( exprIsTrivial, isDefaultAlt, isExpandableApp,
                          stripTicksTopE, mkTicks )
26
import Id
27
import Name( localiseName )
twanvl's avatar
twanvl committed
28
import BasicTypes
29
import Module( Module )
30 31
import Coercion

32 33
import VarSet
import VarEnv
34
import Var
35
import Demand           ( argOneShots, argsOneShots )
Ian Lynagh's avatar
Ian Lynagh committed
36
import Maybes           ( orElse )
37 38 39
import Digraph          ( SCC(..), Node
                        , stronglyConnCompFromEdgedVerticesUniq
                        , stronglyConnCompFromEdgedVerticesUniqR )
40 41
import Unique
import UniqFM
42
import Util
43
import Outputable
44
import Data.List
Peter Wortmann's avatar
Peter Wortmann committed
45
import Control.Arrow    ( second )
46

Austin Seipp's avatar
Austin Seipp committed
47 48 49
{-
************************************************************************
*                                                                      *
50
    occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
Austin Seipp's avatar
Austin Seipp committed
51 52
*                                                                      *
************************************************************************
53 54

Here's the externally-callable interface:
Austin Seipp's avatar
Austin Seipp committed
55
-}
56

57
occurAnalysePgm :: Module       -- Used only in debug output
58
                -> (Activation -> Bool)
59
                -> [CoreRule] -> [CoreVect] -> VarSet
60
                -> CoreProgram -> CoreProgram
61
occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
62
  | isEmptyVarEnv final_usage
63 64
  = occ_anald_binds

65
  | otherwise   -- See Note [Glomming]
66 67
  = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
                   2 (ppr final_usage ) )
68
    occ_anald_glommed_binds
69
  where
70 71
    init_env = initOccEnv active_rule
    (final_usage, occ_anald_binds) = go init_env binds
72
    (_, occ_anald_glommed_binds)   = occAnalRecBind init_env imp_rule_edges
73 74 75 76 77 78 79 80
                                                    (flattenBinds occ_anald_binds)
                                                    initial_uds
          -- It's crucial to re-analyse the glommed-together bindings
          -- so that we establish the right loop breakers. Otherwise
          -- we can easily create an infinite loop (Trac #9583 is an example)

    initial_uds = addIdOccs emptyDetails
                            (rulesFreeVars imp_rules `unionVarSet`
81 82 83 84
                             vectsFreeVars vects `unionVarSet`
                             vectVars)
    -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
    -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
Gabor Greif's avatar
Gabor Greif committed
85
    -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
86

87
    -- Note [Preventing loops due to imported functions rules]
88
    imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
89 90
                            [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
                            | imp_rule <- imp_rules
91
                            , not (isBuiltinRule imp_rule)  -- See Note [Plugin rules]
92 93 94 95
                            , let maps_to = exprFreeIds (ru_rhs imp_rule)
                                             `delVarSetList` ru_bndrs imp_rule
                            , arg <- ru_args imp_rule ]

96
    go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
twanvl's avatar
twanvl committed
97
    go _ []
98
        = (initial_uds, [])
Ian Lynagh's avatar
Ian Lynagh committed
99 100 101 102
    go env (bind:binds)
        = (final_usage, bind' ++ binds')
        where
           (bs_usage, binds')   = go env binds
103
           (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
104

105
occurAnalyseExpr :: CoreExpr -> CoreExpr
Gabor Greif's avatar
Gabor Greif committed
106
        -- Do occurrence analysis, and discard occurrence info returned
107 108 109 110 111 112 113 114
occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap

occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap

occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
occurAnalyseExpr' enable_binder_swap expr
  = snd (occAnal env expr)
115
  where
116
    env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
117
    -- To be conservative, we say that all inlines and rules are active
118
    all_active_rules = \_ -> True
119

120 121
{- Note [Plugin rules]
~~~~~~~~~~~~~~~~~~~~~~
Gabor Greif's avatar
Gabor Greif committed
122
Conal Elliott (Trac #11651) built a GHC plugin that added some
123 124 125 126 127 128 129 130 131 132
BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
do some domain-specific transformations that could not be expressed
with an ordinary pattern-matching CoreRule.  But then we can't extract
the dependencies (in imp_rule_edges) from ru_rhs etc, because a
BuiltinRule doesn't have any of that stuff.

So we simply assume that BuiltinRules have no dependencies, and filter
them out from the imp_rule_edges comprehension.
-}

Austin Seipp's avatar
Austin Seipp committed
133 134 135
{-
************************************************************************
*                                                                      *
136
                Bindings
Austin Seipp's avatar
Austin Seipp committed
137 138
*                                                                      *
************************************************************************
139

140 141 142 143 144
Note [Recursive bindings: the grand plan]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When we come across a binding group
  Rec { x1 = r1; ...; xn = rn }
we treat it like this (occAnalRecBind):
145

146 147
1. Occurrence-analyse each right hand side, and build a
   "Details" for each binding to capture the results.
148

149 150 151 152 153
   Wrap the details in a Node (details, node-id, dep-node-ids),
   where node-id is just the unique of the binder, and
   dep-node-ids lists all binders on which this binding depends.
   We'll call these the "scope edges".
   See Note [Forming the Rec groups].
154

155
   All this is done by makeNode.
156

157 158 159 160
2. Do SCC-analysis on these Nodes.  Each SCC will become a new Rec or
   NonRec.  The key property is that every free variable of a binding
   is accounted for by the scope edges, so that when we are done
   everything is still in scope.
161

162 163 164
3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
   identify suitable loop-breakers to ensure that inlining terminates.
   This is done by occAnalRec.
165

166 167 168 169
4. To do so we form a new set of Nodes, with the same details, but
   different edges, the "loop-breaker nodes". The loop-breaker nodes
   have both more and fewer depedencies than the scope edges
   (see Note [Choosing loop breakers])
170

171 172
   More edges: if f calls g, and g has an active rule that mentions h
               then we add an edge from f -> h
173

174 175 176 177
   Fewer edges: we only include dependencies on active rules, on rule
                RHSs (not LHSs) and if there is an INLINE pragma only
                on the stable unfolding (and vice versa).  The scope
                edges must be much more inclusive.
178

179 180 181
5.  The "weak fvs" of a node are, by definition:
       the scope fvs - the loop-breaker fvs
    See Note [Weak loop breakers], and the nd_weak field of Details
182

183
6.  Having formed the loop-breaker nodes
184

185 186
Note [Dead code]
~~~~~~~~~~~~~~~~
187 188
Dropping dead code for a cyclic Strongly Connected Component is done
in a very simple way:
189

190 191
        the entire SCC is dropped if none of its binders are mentioned
        in the body; otherwise the whole thing is kept.
192

193 194 195
The key observation is that dead code elimination happens after
dependency analysis: so 'occAnalBind' processes SCCs instead of the
original term's binding groups.
196

197
Thus 'occAnalBind' does indeed drop 'f' in an example like
198

Ian Lynagh's avatar
Ian Lynagh committed
199
        letrec f = ...g...
200
               g = ...(...g...)...
Ian Lynagh's avatar
Ian Lynagh committed
201
        in
202
           ...g...
203

204 205 206
when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
'AcyclicSCC f', where 'body_usage' won't contain 'f'.
207

208 209 210 211 212 213 214
------------------------------------------------------------
Note [Forming Rec groups]
~~~~~~~~~~~~~~~~~~~~~~~~~
We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
and "g uses f", no matter how indirectly.  We do a SCC analysis
with an edge f -> g if "f uses g".

Krzysztof Gogolewski's avatar
Typos  
Krzysztof Gogolewski committed
215
More precisely, "f uses g" iff g should be in scope wherever f is.
216 217 218 219
That is, g is free in:
  a) the rhs 'ef'
  b) or the RHS of a rule for f (Note [Rules are extra RHSs])
  c) or the LHS or a rule for f (Note [Rule dependency info])
220

221 222 223 224
These conditions apply regardless of the activation of the RULE (eg it might be
inactive in this phase but become active later).  Once a Rec is broken up
it can never be put back together, so we must be conservative.

Gabor Greif's avatar
Gabor Greif committed
225
The principle is that, regardless of rule firings, every variable is
226
always in scope.
227 228 229 230 231 232 233 234 235 236

  * Note [Rules are extra RHSs]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
    keeps the specialised "children" alive.  If the parent dies
    (because it isn't referenced any more), then the children will die
    too (unless they are already referenced directly).

    To that end, we build a Rec group for each cyclic strongly
    connected component,
Ian Lynagh's avatar
Ian Lynagh committed
237
        *treating f's rules as extra RHSs for 'f'*.
238 239 240 241 242 243 244 245 246 247 248
    More concretely, the SCC analysis runs on a graph with an edge
    from f -> g iff g is mentioned in
        (a) f's rhs
        (b) f's RULES
    These are rec_edges.

    Under (b) we include variables free in *either* LHS *or* RHS of
    the rule.  The former might seems silly, but see Note [Rule
    dependency info].  So in Example [eftInt], eftInt and eftIntFB
    will be put in the same Rec, even though their 'main' RHSs are
    both non-recursive.
249

250 251
  * Note [Rule dependency info]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
252
    The VarSet in a RuleInfo is used for dependency analysis in the
253
    occurrence analyser.  We must track free vars in *both* lhs and rhs.
254 255 256 257 258 259 260 261
    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
    Why both? Consider
        x = y
        RULE f x = v+4
    Then if we substitute y for x, we'd better do so in the
    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
    as well as 'v'

262 263 264
  * Note [Rules are visible in their own rec group]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    We want the rules for 'f' to be visible in f's right-hand side.
265
    And we'd like them to be visible in other functions in f's Rec
266
    group.  E.g. in Note [Specialisation rules] we want f' rule
267 268 269 270 271 272
    to be visible in both f's RHS, and fs's RHS.

    This means that we must simplify the RULEs first, before looking
    at any of the definitions.  This is done by Simplify.simplRecBind,
    when it calls addLetIdInfo.

273 274 275 276 277 278
------------------------------------------------------------
Note [Choosing loop breakers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Loop breaking is surprisingly subtle.  First read the section 4 of
"Secrets of the GHC inliner".  This describes our basic plan.
We avoid infinite inlinings by choosing loop breakers, and
279
ensuring that a loop breaker cuts each loop.
280

281 282 283
See also Note [Inlining and hs-boot files] in ToIface, which deals
with a closely related source of infinite loops.

284
Fundamentally, we do SCC analysis on a graph.  For each recursive
285
group we choose a loop breaker, delete all edges to that node,
286 287 288 289 290 291 292
re-analyse the SCC, and iterate.

But what is the graph?  NOT the same graph as was used for Note
[Forming Rec groups]!  In particular, a RULE is like an equation for
'f' that is *always* inlined if it is applicable.  We do *not* disable
rules for loop-breakers.  It's up to whoever makes the rules to make
sure that the rules themselves always terminate.  See Note [Rules for
293
recursive functions] in Simplify.hs
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313

Hence, if
    f's RHS (or its INLINE template if it has one) mentions g, and
    g has a RULE that mentions h, and
    h has a RULE that mentions f

then we *must* choose f to be a loop breaker.  Example: see Note
[Specialisation rules].

In general, take the free variables of f's RHS, and augment it with
all the variables reachable by RULES from those starting points.  That
is the whole reason for computing rule_fv_env in occAnalBind.  (Of
course we only consider free vars that are also binders in this Rec
group.)  See also Note [Finding rule RHS free vars]

Note that when we compute this rule_fv_env, we only consider variables
free in the *RHS* of the rule, in contrast to the way we build the
Rec group in the first place (Note [Rule dependency info])

Note that if 'g' has RHS that mentions 'w', we should add w to
314
g's loop-breaker edges.  More concretely there is an edge from f -> g
315
iff
316 317 318
        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
            (see Note [Inline rules])
        (b) or h is mentioned in f's RHS, and
319 320
            g appears in the RHS of an active RULE of h
            or a transitive sequence of active rules starting with h
321

322 323 324 325 326 327 328 329 330
Why "active rules"?  See Note [Finding rule RHS free vars]

Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
chosen as a loop breaker, because their RHSs don't mention each other.
And indeed both can be inlined safely.

Note again that the edges of the graph we use for computing loop breakers
are not the same as the edges we use for computing the Rec blocks.
That's why we compute
331 332

- rec_edges          for the Rec block analysis
333
- loop_breaker_nodes for the loop breaker analysis
334

335 336 337
  * Note [Finding rule RHS free vars]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Consider this real example from Data Parallel Haskell
338 339 340
         tagZero :: Array Int -> Array Tag
         {-# INLINE [1] tagZeroes #-}
         tagZero xs = pmap (\x -> fromBool (x==0)) xs
341

342 343
         {-# RULES "tagZero" [~1] forall xs n.
             pmap fromBool <blah blah> = tagZero xs #-}
344 345 346 347 348
    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
    However, tagZero can only be inlined in phase 1 and later, while
    the RULE is only active *before* phase 1.  So there's no problem.

    To make this work, we look for the RHS free vars only for
349
    *active* rules. That's the reason for the occ_rule_act field
350
    of the OccEnv.
351

352 353 354 355
  * Note [Weak loop breakers]
    ~~~~~~~~~~~~~~~~~~~~~~~~~
    There is a last nasty wrinkle.  Suppose we have

Ian Lynagh's avatar
Ian Lynagh committed
356
        Rec { f = f_rhs
357
              RULE f [] = g
Ian Lynagh's avatar
Ian Lynagh committed
358 359 360 361

              h = h_rhs
              g = h
              ...more...
362 363
        }

364
    Remember that we simplify the RULES before any RHS (see Note
365 366
    [Rules are visible in their own rec group] above).

367
    So we must *not* postInlineUnconditionally 'g', even though
368
    its RHS turns out to be trivial.  (I'm assuming that 'g' is
369 370 371
    not choosen as a loop breaker.)  Why not?  Because then we
    drop the binding for 'g', which leaves it out of scope in the
    RULE!
372

373 374 375 376 377 378
    Here's a somewhat different example of the same thing
        Rec { g = h
            ; h = ...f...
            ; f = f_rhs
              RULE f [] = g }
    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
379
    g, because the RULE for f is active throughout.  So the RHS of h
380
    might rewrite to     h = ...g...
381
    So g must remain in scope in the output program!
382

383 384 385
    We "solve" this by:

        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
386 387 388 389
        iff g is a "missing free variable" of the Rec group

    A "missing free variable" x is one that is mentioned in an RHS or
    INLINE or RULE of a binding in the Rec group, but where the
390
    dependency on x may not show up in the loop_breaker_nodes (see
391 392
    note [Choosing loop breakers} above).

393
    A normal "strong" loop breaker has IAmLoopBreaker False.  So
394

395 396 397 398
                                    Inline  postInlineUnconditionally
   strong   IAmLoopBreaker False    no      no
   weak     IAmLoopBreaker True     yes     no
            other                   yes     yes
399 400

    The **sole** reason for this kind of loop breaker is so that
401 402 403
    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
    inline via the usual callSiteInline stuff, so it'll be dead in the
    next pass, so the main Ugh is the tiresome complication.)
404

405 406 407 408 409
Note [Rules for imported functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this
   f = /\a. B.g a
   RULE B.g Int = 1 + f Int
410 411
Note that
  * The RULE is for an imported function.
412 413 414
  * f is non-recursive
Now we
can get
415
   f Int --> B.g Int      Inlining f
416
         --> 1 + f Int    Firing RULE
417
and so the simplifier goes into an infinite loop. This
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
would not happen if the RULE was for a local function,
because we keep track of dependencies through rules.  But
that is pretty much impossible to do for imported Ids.  Suppose
f's definition had been
   f = /\a. C.h a
where (by some long and devious process), C.h eventually inlines to
B.g.  We could only spot such loops by exhaustively following
unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
f.

Note that RULES for imported functions are important in practice; they
occur a lot in the libraries.

We regard this potential infinite loop as a *programmer* error.
It's up the programmer not to write silly rules like
     RULE f x = f x
434
and the example above is just a more complicated version.
435

436 437 438 439
Note [Preventing loops due to imported functions rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider:
  import GHC.Base (foldr)
440

441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
  {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
  filter p xs = build (\c n -> foldr (filterFB c p) n xs)
  filterFB c p = ...

  f = filter p xs

Note that filter is not a loop-breaker, so what happens is:
  f =          filter p xs
    = {inline} build (\c n -> foldr (filterFB c p) n xs)
    = {inline} foldr (filterFB (:) p) [] xs
    = {RULE}   filter p xs

We are in an infinite loop.

A more elaborate example (that I actually saw in practice when I went to
mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
457
  {-# LANGUAGE RankNTypes #-}
458
  module GHCList where
459

460 461
  import Prelude hiding (filter)
  import GHC.Base (build)
462

463 464 465 466
  {-# INLINABLE filter #-}
  filter :: (a -> Bool) -> [a] -> [a]
  filter p [] = []
  filter p (x:xs) = if p x then x : filter p xs else filter p xs
467

468 469 470 471
  {-# NOINLINE [0] filterFB #-}
  filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
  filterFB c p x r | p x       = x `c` r
                   | otherwise = r
472

473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
  {-# RULES
  "filter"     [~1] forall p xs.  filter p xs = build (\c n -> foldr
  (filterFB c p) n xs)
  "filterList" [1]  forall p.     foldr (filterFB (:) p) [] = filter p
   #-}

Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
are not), the unfolding given to "filter" in the interface file will be:
  filter p []     = []
  filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
                           else     build (\c n -> foldr (filterFB c p) n xs

Note that because this unfolding does not mention "filter", filter is not
marked as a strong loop breaker. Therefore at a use site in another module:
  filter p xs
    = {inline}
      case xs of []     -> []
                 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
                                  else     build (\c n -> foldr (filterFB c p) n xs)

  build (\c n -> foldr (filterFB c p) n xs)
    = {inline} foldr (filterFB (:) p) [] xs
    = {RULE}   filter p xs

And we are in an infinite loop again, except that this time the loop is producing an
infinitely large *term* (an unrolling of filter) and so the simplifier finally
dies with "ticks exhausted"

Because of this problem, we make a small change in the occurrence analyser
designed to mark functions like "filter" as strong loop breakers on the basis that:
  1. The RHS of filter mentions the local function "filterFB"
  2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS

506 507 508 509 510
So for each RULE for an *imported* function we are going to add
dependency edges between the *local* FVS of the rule LHS and the
*local* FVS of the rule RHS. We don't do anything special for RULES on
local functions because the standard occurrence analysis stuff is
pretty good at getting loop-breakerness correct there.
511 512 513 514 515 516

It is important to note that even with this extra hack we aren't always going to get
things right. For example, it might be that the rule LHS mentions an imported Id,
and another module has a RULE that can rewrite that imported Id to one of our local
Ids.

517
Note [Specialising imported functions] (referred to from Specialise)
518 519 520 521 522 523
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
BUT for *automatically-generated* rules, the programmer can't be
responsible for the "programmer error" in Note [Rules for imported
functions].  In paricular, consider specialising a recursive function
defined in another module.  If we specialise a recursive function B.g,
we get
524 525
         g_spec = .....(B.g Int).....
         RULE B.g Int = g_spec
526 527 528 529
Here, g_spec doesn't look recursive, but when the rule fires, it
becomes so.  And if B.g was mutually recursive, the loop might
not be as obvious as it is here.

530 531
To avoid this,
 * When specialising a function that is a loop breaker,
532 533 534 535 536
   give a NOINLINE pragma to the specialised function

Note [Glomming]
~~~~~~~~~~~~~~~
RULES for imported Ids can make something at the top refer to something at the bottom:
537 538 539 540
        f = \x -> B.g (q x)
        h = \y -> 3

        RULE:  B.g (q x) = h x
541 542 543 544 545 546 547 548 549 550 551 552 553

Applying this rule makes f refer to h, although f doesn't appear to
depend on h.  (And, as in Note [Rules for imported functions], the
dependency might be more indirect. For example, f might mention C.t
rather than B.g, where C.t eventually inlines to B.g.)

NOTICE that this cannot happen for rules whose head is a
locally-defined function, because we accurately track dependencies
through RULES.  It only happens for rules whose head is an imported
function (B.g in the example above).

Solution:
  - When simplifying, bring all top level identifiers into
554
    scope at the start, ignoring the Rec/NonRec structure, so
555 556 557 558 559 560 561 562
    that when 'h' pops up in f's rhs, we find it in the in-scope set
    (as the simplifier generally expects). This happens in simplTopBinds.

  - In the occurrence analyser, if there are any out-of-scope
    occurrences that pop out of the top, which will happen after
    firing the rule:      f = \x -> h x
                          h = \y -> 3
    then just glom all the bindings into a single Rec, so that
563
    the *next* iteration of the occurrence analyser will sort
564 565 566 567 568 569 570 571 572 573
    them all out.   This part happens in occurAnalysePgm.

------------------------------------------------------------
Note [Inline rules]
~~~~~~~~~~~~~~~~~~~
None of the above stuff about RULES applies to Inline Rules,
stored in a CoreUnfolding.  The unfolding, if any, is simplified
at the same time as the regular RHS of the function (ie *not* like
Note [Rules are visible in their own rec group]), so it should be
treated *exactly* like an extra RHS.
574

575 576 577 578 579
Or, rather, when computing loop-breaker edges,
  * If f has an INLINE pragma, and it is active, we treat the
    INLINE rhs as f's rhs
  * If it's inactive, we treat f as having no rhs
  * If it has no INLINE pragma, we look at f's actual rhs
580

581

582 583 584 585 586
There is a danger that we'll be sub-optimal if we see this
     f = ...f...
     [INLINE f = ..no f...]
where f is recursive, but the INLINE is not. This can just about
happen with a sufficiently odd set of rules; eg
587

588 589 590
        foo :: Int -> Int
        {-# INLINE [1] foo #-}
        foo x = x+1
591

592 593 594
        bar :: Int -> Int
        {-# INLINE [1] bar #-}
        bar x = foo x + 1
595

596
        {-# RULES "foo" [~1] forall x. foo x = bar x #-}
597

598 599 600 601 602 603 604 605 606 607 608
Here the RULE makes bar recursive; but it's INLINE pragma remains
non-recursive. It's tempting to then say that 'bar' should not be
a loop breaker, but an attempt to do so goes wrong in two ways:
   a) We may get
         $df = ...$cfoo...
         $cfoo = ...$df....
         [INLINE $cfoo = ...no-$df...]
      But we want $cfoo to depend on $df explicitly so that we
      put the bindings in the right order to inline $df in $cfoo
      and perhaps break the loop altogether.  (Maybe this
   b)
609

610

611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
Example [eftInt]
~~~~~~~~~~~~~~~
Example (from GHC.Enum):

  eftInt :: Int# -> Int# -> [Int]
  eftInt x y = ...(non-recursive)...

  {-# INLINE [0] eftIntFB #-}
  eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
  eftIntFB c n x y = ...(non-recursive)...

  {-# RULES
  "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
  "eftIntList"  [1] eftIntFB  (:) [] = eftInt
   #-}

627 628
Note [Specialisation rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
629 630 631 632 633 634 635 636 637
Consider this group, which is typical of what SpecConstr builds:

   fs a = ....f (C a)....
   f  x = ....f (C a)....
   {-# RULE f (C a) = fs a #-}

So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).

But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
638 639 640
  - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
  - fs is inlined (say it's small)
  - now there's another opportunity to apply the RULE
641 642

This showed up when compiling Control.Concurrent.Chan.getChanContents.
Austin Seipp's avatar
Austin Seipp committed
643
-}
644

645 646 647
------------------------------------------------------------------
--                 occAnalBind
------------------------------------------------------------------
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695

occAnalBind :: OccEnv           -- The incoming OccEnv
            -> ImpRuleEdges
            -> CoreBind
            -> UsageDetails             -- Usage details of scope
            -> (UsageDetails,           -- Of the whole let(rec)
                [CoreBind])

occAnalBind env top_env (NonRec binder rhs) body_usage
  = occAnalNonRecBind env top_env binder rhs body_usage
occAnalBind env top_env (Rec pairs) body_usage
  = occAnalRecBind env top_env pairs body_usage

-----------------
occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
                  -> UsageDetails -> (UsageDetails, [CoreBind])
occAnalNonRecBind env imp_rule_edges binder rhs body_usage
  | isTyVar binder      -- A type let; we don't gather usage info
  = (body_usage, [NonRec binder rhs])

  | not (binder `usedIn` body_usage)    -- It's not mentioned
  = (body_usage, [])

  | otherwise                   -- It's mentioned in the body
  = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
  where
    (body_usage', tagged_binder) = tagBinder body_usage binder
    (rhs_usage1, rhs')           = occAnalNonRecRhs env tagged_binder rhs
    rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)

    rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
       -- See Note [Rules are extra RHSs] and Note [Rule dependency info]

    rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
                 lookupVarEnv imp_rule_edges binder
       -- See Note [Preventing loops due to imported functions rules]

-----------------
occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
               -> UsageDetails -> (UsageDetails, [CoreBind])
occAnalRecBind env imp_rule_edges pairs body_usage
  = foldr occAnalRec (body_usage, []) sccs
        -- For a recursive group, we
        --      * occ-analyse all the RHSs
        --      * compute strongly-connected components
        --      * feed those components to occAnalRec
        -- See Note [Recursive bindings: the grand plan]
  where
696
    sccs :: [SCC Details]
697
    sccs = {-# SCC "occAnalBind.scc" #-}
698
           stronglyConnCompFromEdgedVerticesUniq nodes
699

700
    nodes :: [LetrecNode]
701
    nodes = {-# SCC "occAnalBind.assoc" #-}
702
            map (makeNode env imp_rule_edges bndr_set) pairs
703

704
    bndr_set = mkVarSet (map fst pairs)
705

706
-----------------------------
707
occAnalRec :: SCC Details
708
           -> (UsageDetails, [CoreBind])
709
           -> (UsageDetails, [CoreBind])
710

711
        -- The NonRec case is just like a Let (NonRec ...) above
712
occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}))
713
           (body_uds, binds)
714
  | not (bndr `usedIn` body_uds)
Simon Peyton Jones's avatar
Simon Peyton Jones committed
715
  = (body_uds, binds)           -- See Note [Dead code]
716

717 718
  | otherwise                   -- It's mentioned in the body
  = (body_uds' +++ rhs_uds,
719 720
     NonRec tagged_bndr rhs : binds)
  where
721
    (body_uds', tagged_bndr) = tagBinder body_uds bndr
722

723
        -- The Rec case is the interesting one
724
        -- See Note [Recursive bindings: the grand plan]
725
        -- See Note [Loop breaking]
726
occAnalRec (CyclicSCC details_s) (body_uds, binds)
727
  | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
Simon Peyton Jones's avatar
Simon Peyton Jones committed
728
  = (body_uds, binds)                   -- See Note [Dead code]
729

730
  | otherwise   -- At this point we always build a single Rec
731
  = -- pprTrace "occAnalRec" (vcat
732
    --  [ text "weak_fvs" <+> ppr weak_fvs
733 734
    --  , text "tagged details" <+> ppr tagged_details_s
    --  , text "lb nodes" <+> ppr loop_breaker_nodes])
735
    (final_uds, Rec pairs : binds)
736 737

  where
738 739
    bndrs    = map nd_bndr details_s
    bndr_set = mkVarSet bndrs
740 741

    ----------------------------
742
    -- Compute usage details
743
    total_uds = foldl add_uds body_uds details_s
744
    final_uds = total_uds `minusVarEnv` bndr_set
745
    add_uds usage_so_far nd = usage_so_far +++ nd_uds nd
746

747 748 749 750 751 752 753 754
    ------------------------------
        -- See Note [Choosing loop breakers] for loop_breaker_nodes
    loop_breaker_nodes :: [LetrecNode]
    loop_breaker_nodes = mkLoopBreakerNodes bndr_set total_uds details_s

    ------------------------------
    weak_fvs :: VarSet
    weak_fvs = mapUnionVarSet nd_weak details_s
755 756 757 758

    ---------------------------
    -- Now reconstruct the cycle
    pairs :: [(Id,CoreExpr)]
759 760 761 762 763 764 765 766
    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 bndr_set weak_fvs loop_breaker_nodes []
          | otherwise              = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
          -- If weak_fvs is empty, the loop_breaker_nodes will include
          -- all the edges in the original scope edges [remember,
          -- weak_fvs is the difference between scope edges and
          -- lb-edges], so a fresh SCC computation would yield a
          -- single CyclicSCC result; and reOrderNodes deals with
          -- exactly that case
767

768

769 770 771 772 773
------------------------------------------------------------------
--                 Loop breaking
------------------------------------------------------------------

type Binding = (Id,CoreExpr)
sof's avatar
sof committed
774

775 776 777 778 779 780 781
loopBreakNodes :: Int
               -> VarSet        -- All binders
               -> VarSet        -- Binders whose dependencies may be "missing"
                                -- See Note [Weak loop breakers]
               -> [LetrecNode]
               -> [Binding]             -- Append these to the end
               -> [Binding]
Austin Seipp's avatar
Austin Seipp committed
782
{-
783 784 785
loopBreakNodes is applied to the list of nodes for a cyclic strongly
connected component (there's guaranteed to be a cycle).  It returns
the same nodes, but
Ian Lynagh's avatar
Ian Lynagh committed
786 787
        a) in a better order,
        b) with some of the Ids having a IAmALoopBreaker pragma
sof's avatar
sof committed
788

789
The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
sof's avatar
sof committed
790 791 792 793 794 795 796
that the simplifier can guarantee not to loop provided it never records an inlining
for these no-inline guys.

Furthermore, the order of the binds is such that if we neglect dependencies
on the no-inline Ids then the binds are topologically sorted.  This means
that the simplifier will generally do a good job if it works from top bottom,
recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
Austin Seipp's avatar
Austin Seipp committed
797
-}
sof's avatar
sof committed
798

799
-- Return the bindings sorted into a plausible order, and marked with loop breakers.
800
loopBreakNodes depth bndr_set weak_fvs nodes binds
niteria's avatar
niteria committed
801
  = go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
sof's avatar
sof committed
802
  where
803 804
    go []         binds = binds
    go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
805

806
    loop_break_scc scc binds
807
      = case scc of
808 809
          AcyclicSCC node  -> mk_non_loop_breaker weak_fvs node : binds
          CyclicSCC nodes  -> reOrderNodes depth bndr_set weak_fvs nodes binds
810

811 812
----------------------------------
reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
813
    -- Choose a loop breaker, mark it no-inline,
814
    -- and call loopBreakNodes on the rest
815 816
reOrderNodes _ _ _ []     _     = panic "reOrderNodes"
reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
817
reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
818
  = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
819
    --                           text "chosen" <+> ppr chosen_nodes) $
820
    loopBreakNodes new_depth bndr_set weak_fvs unchosen $
821 822
    (map mk_loop_breaker chosen_nodes ++ binds)
  where
823 824 825
    (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
                                                 (nd_score (fstOf3 node))
                                                 [node] [] nodes
826

827 828 829
    approximate_lb = depth >= 2
    new_depth | approximate_lb = 0
              | otherwise      = depth+1
830 831 832
        -- After two iterations (d=0, d=1) give up
        -- and approximate, returning to d=0

833 834 835
mk_loop_breaker :: LetrecNode -> Binding
mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
  = (setIdOccInfo bndr strongLoopBreaker, rhs)
836

837 838 839 840 841
mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
-- See Note [Weak loop breakers]
mk_non_loop_breaker weak_fvs (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
  | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr weakLoopBreaker, rhs)
  | otherwise                  = (bndr, rhs)
842

843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
----------------------------------
chooseLoopBreaker :: Bool             -- True <=> Too many iterations,
                                      --          so approximate
                  -> NodeScore            -- Best score so far
                  -> [LetrecNode]       -- Nodes with this score
                  -> [LetrecNode]       -- Nodes with higher scores
                  -> [LetrecNode]       -- Unprocessed nodes
                  -> ([LetrecNode], [LetrecNode])
    -- This loop looks for the bind with the lowest score
    -- to pick as the loop  breaker.  The rest accumulate in
chooseLoopBreaker _ _ loop_nodes acc []
  = (loop_nodes, acc)        -- Done

    -- If approximate_loop_breaker is True, we pick *all*
    -- nodes with lowest score, else just one
    -- See Note [Complexity of loop breaking]
chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
  | approx_lb
  , rank sc == rank loop_sc
  = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes

  | sc `betterLB` loop_sc  -- Better score so pick this new one
  = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes

  | otherwise              -- Worse score so don't pick it
  = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
  where
    sc = nd_score (fstOf3 node)
871

Austin Seipp's avatar
Austin Seipp committed
872
{-
873 874
Note [Complexity of loop breaking]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
875
The loop-breaking algorithm knocks out one binder at a time, and
876 877 878 879 880 881 882
performs a new SCC analysis on the remaining binders.  That can
behave very badly in tightly-coupled groups of bindings; in the
worst case it can be (N**2)*log N, because it does a full SCC
on N, then N-1, then N-2 and so on.

To avoid this, we switch plans after 2 (or whatever) attempts:
  Plan A: pick one binder with the lowest score, make it
883
          a loop breaker, and try again
884
  Plan B: pick *all* binders with the lowest score, make them
885
          all loop breakers, and try again
886 887 888 889 890 891 892
Since there are only a small finite number of scores, this will
terminate in a constant number of iterations, rather than O(N)
iterations.

You might thing that it's very unlikely, but RULES make it much
more likely.  Here's a real example from Trac #1969:
  Rec { $dm = \d.\x. op d
893 894 895 896 897 898 899 900 901 902
        {-# RULES forall d. $dm Int d  = $s$dm1
                  forall d. $dm Bool d = $s$dm2 #-}

        dInt = MkD .... opInt ...
        dInt = MkD .... opBool ...
        opInt  = $dm dInt
        opBool = $dm dBool

        $s$dm1 = \x. op dInt
        $s$dm2 = \x. op dBool }
903 904 905 906 907
The RULES stuff means that we can't choose $dm as a loop breaker
(Note [Choosing loop breakers]), so we must choose at least (say)
opInt *and* opBool, and so on.  The number of loop breakders is
linear in the number of instance declarations.

908
Note [Loop breakers and INLINE/INLINABLE pragmas]
909
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
910
Avoid choosing a function with an INLINE pramga as the loop breaker!
911 912 913
If such a function is mutually-recursive with a non-INLINE thing,
then the latter should be the loop-breaker.

914
It's vital to distinguish between INLINE and INLINABLE (the
915
Bool returned by hasStableCoreUnfolding_maybe).  If we start with
916
   Rec { {-# INLINABLE f #-}
917 918
         f x = ...f... }
and then worker/wrapper it through strictness analysis, we'll get
919
   Rec { {-# INLINABLE $wf #-}
920
         $wf p q = let x = (p,q) in ...f...
Ian Lynagh's avatar
Ian Lynagh committed
921

922 923
         {-# INLINE f #-}
         f x = case x of (p,q) -> $wf p q }
924

925 926
Now it is vital that we choose $wf as the loop breaker, so we can
inline 'f' in '$wf'.
927

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
928 929 930 931 932
Note [DFuns should not be loop breakers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It's particularly bad to make a DFun into a loop breaker.  See
Note [How instance declarations are translated] in TcInstDcls

933
We give DFuns a higher score than ordinary CONLIKE things because
Gabor Greif's avatar
Gabor Greif committed
934
if there's a choice we want the DFun to be the non-loop breaker. Eg
935

936 937 938 939 940 941 942 943 944 945
rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)

      $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
      {-# DFUN #-}
      $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
    }

Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
if we can't unravel the DFun first.

946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
Note [Constructor applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It's really really important to inline dictionaries.  Real
example (the Enum Ordering instance from GHC.Base):

     rec     f = \ x -> case d of (p,q,r) -> p x
             g = \ x -> case d of (p,q,r) -> q x
             d = (v, f, g)

Here, f and g occur just once; but we can't inline them into d.
On the other hand we *could* simplify those case expressions if
we didn't stupidly choose d as the loop breaker.
But we won't because constructor args are marked "Many".
Inlining dictionaries is really essential to unravelling
the loops in static numeric dictionaries, see GHC.Float.

962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
Note [Closure conversion]
~~~~~~~~~~~~~~~~~~~~~~~~~
We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
The immediate motivation came from the result of a closure-conversion transformation
which generated code like this:

    data Clo a b = forall c. Clo (c -> a -> b) c

    ($:) :: Clo a b -> a -> b
    Clo f env $: x = f env x

    rec { plus = Clo plus1 ()

        ; plus1 _ n = Clo plus2 n

Ian Lynagh's avatar
Ian Lynagh committed
977 978
        ; plus2 Zero     n = n
        ; plus2 (Succ m) n = Succ (plus $: m $: n) }
979 980 981 982 983 984

If we inline 'plus' and 'plus1', everything unravels nicely.  But if
we choose 'plus1' as the loop breaker (which is entirely possible
otherwise), the loop does not unravel nicely.


985 986 987 988 989 990 991
@occAnalRhs@ deals with the question of bindings where the Id is marked
by an INLINE pragma.  For these we record that anything which occurs
in its RHS occurs many times.  This pessimistically assumes that ths
inlined binder also occurs many times in its scope, but if it doesn't
we'll catch it next time round.  At worst this costs an extra simplifier pass.
ToDo: try using the occurrence info for the inline'd binder.

992 993
[March 97] We do the same for atomic RHSs.  Reason: see notes with loopBreakSCC.
[June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with loopBreakSCC.
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267