OccurAnal.hs 76.1 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2
{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3

Austin Seipp's avatar
Austin Seipp committed
4 5
************************************************************************
*                                                                      *
6
\section[OccurAnal]{Occurrence analysis pass}
Austin Seipp's avatar
Austin Seipp committed
7 8
*                                                                      *
************************************************************************
9

10 11
The occurrence analyser re-typechecks a core expression, returning a new
core expression with (hopefully) improved usage information.
Austin Seipp's avatar
Austin Seipp committed
12
-}
13

14 15
{-# LANGUAGE CPP, BangPatterns #-}

16
module OccurAnal (
17
        occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 19
    ) where

20
#include "HsVersions.h"
21 22

import CoreSyn
23
import CoreFVs
Peter Wortmann's avatar
Peter Wortmann committed
24 25
import CoreUtils        ( exprIsTrivial, isDefaultAlt, isExpandableApp,
                          stripTicksTopE, mkTicks )
26
import Id
27
import Name( localiseName )
twanvl's avatar
twanvl committed
28
import BasicTypes
29
import Module( Module )
30 31
import Coercion

32 33
import VarSet
import VarEnv
34
import Var
35
import Demand           ( argOneShots, argsOneShots )
Ian Lynagh's avatar
Ian Lynagh committed
36
import Maybes           ( orElse )
niteria's avatar
niteria committed
37
import Digraph          ( SCC(..), stronglyConnCompFromEdgedVerticesUniqR )
38 39
import Unique
import UniqFM
40
import Util
41
import Outputable
42
import Data.List
Peter Wortmann's avatar
Peter Wortmann committed
43
import Control.Arrow    ( second )
44

Austin Seipp's avatar
Austin Seipp committed
45 46 47
{-
************************************************************************
*                                                                      *
48
\subsection[OccurAnal-main]{Counting occurrences: main function}
Austin Seipp's avatar
Austin Seipp committed
49 50
*                                                                      *
************************************************************************
51 52

Here's the externally-callable interface:
Austin Seipp's avatar
Austin Seipp committed
53
-}
54

55
occurAnalysePgm :: Module       -- Used only in debug output
56
                -> (Activation -> Bool)
57
                -> [CoreRule] -> [CoreVect] -> VarSet
58
                -> CoreProgram -> CoreProgram
59
occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
60
  | isEmptyVarEnv final_usage
61 62
  = occ_anald_binds

63
  | otherwise   -- See Note [Glomming]
64 65
  = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
                   2 (ppr final_usage ) )
66
    occ_anald_glommed_binds
67
  where
68 69
    init_env = initOccEnv active_rule
    (final_usage, occ_anald_binds) = go init_env binds
70
    (_, occ_anald_glommed_binds)   = occAnalRecBind init_env imp_rule_edges
71 72 73 74 75 76 77 78
                                                    (flattenBinds occ_anald_binds)
                                                    initial_uds
          -- It's crucial to re-analyse the glommed-together bindings
          -- so that we establish the right loop breakers. Otherwise
          -- we can easily create an infinite loop (Trac #9583 is an example)

    initial_uds = addIdOccs emptyDetails
                            (rulesFreeVars imp_rules `unionVarSet`
79 80 81 82
                             vectsFreeVars vects `unionVarSet`
                             vectVars)
    -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
    -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
Gabor Greif's avatar
Gabor Greif committed
83
    -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
84

85
    -- Note [Preventing loops due to imported functions rules]
86
    imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
87 88
                            [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
                            | imp_rule <- imp_rules
89
                            , not (isBuiltinRule imp_rule)  -- See Note [Plugin rules]
90 91 92 93
                            , let maps_to = exprFreeIds (ru_rhs imp_rule)
                                             `delVarSetList` ru_bndrs imp_rule
                            , arg <- ru_args imp_rule ]

94
    go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
twanvl's avatar
twanvl committed
95
    go _ []
96
        = (initial_uds, [])
Ian Lynagh's avatar
Ian Lynagh committed
97 98 99 100
    go env (bind:binds)
        = (final_usage, bind' ++ binds')
        where
           (bs_usage, binds')   = go env binds
101
           (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
102

103
occurAnalyseExpr :: CoreExpr -> CoreExpr
Gabor Greif's avatar
Gabor Greif committed
104
        -- Do occurrence analysis, and discard occurrence info returned
105 106 107 108 109 110 111 112
occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap

occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap

occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
occurAnalyseExpr' enable_binder_swap expr
  = snd (occAnal env expr)
113
  where
114
    env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
115
    -- To be conservative, we say that all inlines and rules are active
116
    all_active_rules = \_ -> True
117

118 119
{- Note [Plugin rules]
~~~~~~~~~~~~~~~~~~~~~~
Gabor Greif's avatar
Gabor Greif committed
120
Conal Elliott (Trac #11651) built a GHC plugin that added some
121 122 123 124 125 126 127 128 129 130
BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
do some domain-specific transformations that could not be expressed
with an ordinary pattern-matching CoreRule.  But then we can't extract
the dependencies (in imp_rule_edges) from ru_rhs etc, because a
BuiltinRule doesn't have any of that stuff.

So we simply assume that BuiltinRules have no dependencies, and filter
them out from the imp_rule_edges comprehension.
-}

Austin Seipp's avatar
Austin Seipp committed
131 132 133
{-
************************************************************************
*                                                                      *
134
\subsection[OccurAnal-main]{Counting occurrences: main function}
Austin Seipp's avatar
Austin Seipp committed
135 136
*                                                                      *
************************************************************************
137 138 139

Bindings
~~~~~~~~
Austin Seipp's avatar
Austin Seipp committed
140
-}
141

142 143 144 145 146
type ImpRuleEdges = IdEnv IdSet     -- Mapping from FVs of imported RULE LHSs to RHS FVs

noImpRuleEdges :: ImpRuleEdges
noImpRuleEdges = emptyVarEnv

147
occAnalBind :: OccEnv           -- The incoming OccEnv
148
            -> ImpRuleEdges
Ian Lynagh's avatar
Ian Lynagh committed
149 150 151 152
            -> CoreBind
            -> UsageDetails             -- Usage details of scope
            -> (UsageDetails,           -- Of the whole let(rec)
                [CoreBind])
153

154 155 156 157
occAnalBind env top_env (NonRec binder rhs) body_usage
  = occAnalNonRecBind env top_env binder rhs body_usage
occAnalBind env top_env (Rec pairs) body_usage
  = occAnalRecBind env top_env pairs body_usage
158 159

-----------------
160
occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
161
                  -> UsageDetails -> (UsageDetails, [CoreBind])
162
occAnalNonRecBind env imp_rule_edges binder rhs body_usage
163
  | isTyVar binder      -- A type let; we don't gather usage info
164 165
  = (body_usage, [NonRec binder rhs])

166
  | not (binder `usedIn` body_usage)    -- It's not mentioned
167 168
  = (body_usage, [])

Ian Lynagh's avatar
Ian Lynagh committed
169
  | otherwise                   -- It's mentioned in the body
170
  = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
171
  where
172
    (body_usage', tagged_binder) = tagBinder body_usage binder
173
    (rhs_usage1, rhs')           = occAnalNonRecRhs env tagged_binder rhs
174
    rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
175

176 177
    rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
       -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
178 179 180

    rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
                 lookupVarEnv imp_rule_edges binder
181
       -- See Note [Preventing loops due to imported functions rules]
182

183
-----------------
184
occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
185
               -> UsageDetails -> (UsageDetails, [CoreBind])
186
occAnalRecBind env imp_rule_edges pairs body_usage
187
  = foldr occAnalRec (body_usage, []) sccs
188 189 190 191
        -- For a recursive group, we
        --      * occ-analyse all the RHSs
        --      * compute strongly-connected components
        --      * feed those components to occAnalRec
192 193 194 195
  where
    bndr_set = mkVarSet (map fst pairs)

    sccs :: [SCC (Node Details)]
niteria's avatar
niteria committed
196 197
    sccs = {-# SCC "occAnalBind.scc" #-}
      stronglyConnCompFromEdgedVerticesUniqR nodes
198 199

    nodes :: [Node Details]
niteria's avatar
niteria committed
200 201
    nodes = {-# SCC "occAnalBind.assoc" #-}
      map (makeNode env imp_rule_edges bndr_set) pairs
202

Austin Seipp's avatar
Austin Seipp committed
203
{-
204 205
Note [Dead code]
~~~~~~~~~~~~~~~~
206 207
Dropping dead code for a cyclic Strongly Connected Component is done
in a very simple way:
208

209 210
        the entire SCC is dropped if none of its binders are mentioned
        in the body; otherwise the whole thing is kept.
211

212 213 214
The key observation is that dead code elimination happens after
dependency analysis: so 'occAnalBind' processes SCCs instead of the
original term's binding groups.
215

216
Thus 'occAnalBind' does indeed drop 'f' in an example like
217

Ian Lynagh's avatar
Ian Lynagh committed
218
        letrec f = ...g...
219
               g = ...(...g...)...
Ian Lynagh's avatar
Ian Lynagh committed
220
        in
221
           ...g...
222

223 224 225
when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
'AcyclicSCC f', where 'body_usage' won't contain 'f'.
226

227 228 229 230 231 232 233
------------------------------------------------------------
Note [Forming Rec groups]
~~~~~~~~~~~~~~~~~~~~~~~~~
We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
and "g uses f", no matter how indirectly.  We do a SCC analysis
with an edge f -> g if "f uses g".

Krzysztof Gogolewski's avatar
Typos  
Krzysztof Gogolewski committed
234
More precisely, "f uses g" iff g should be in scope wherever f is.
235 236 237 238
That is, g is free in:
  a) the rhs 'ef'
  b) or the RHS of a rule for f (Note [Rules are extra RHSs])
  c) or the LHS or a rule for f (Note [Rule dependency info])
239

240 241 242 243
These conditions apply regardless of the activation of the RULE (eg it might be
inactive in this phase but become active later).  Once a Rec is broken up
it can never be put back together, so we must be conservative.

Gabor Greif's avatar
Gabor Greif committed
244
The principle is that, regardless of rule firings, every variable is
245
always in scope.
246 247 248 249 250 251 252 253 254 255

  * Note [Rules are extra RHSs]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
    keeps the specialised "children" alive.  If the parent dies
    (because it isn't referenced any more), then the children will die
    too (unless they are already referenced directly).

    To that end, we build a Rec group for each cyclic strongly
    connected component,
Ian Lynagh's avatar
Ian Lynagh committed
256
        *treating f's rules as extra RHSs for 'f'*.
257 258 259 260 261 262 263 264 265 266 267
    More concretely, the SCC analysis runs on a graph with an edge
    from f -> g iff g is mentioned in
        (a) f's rhs
        (b) f's RULES
    These are rec_edges.

    Under (b) we include variables free in *either* LHS *or* RHS of
    the rule.  The former might seems silly, but see Note [Rule
    dependency info].  So in Example [eftInt], eftInt and eftIntFB
    will be put in the same Rec, even though their 'main' RHSs are
    both non-recursive.
268

269 270
  * Note [Rule dependency info]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
271
    The VarSet in a RuleInfo is used for dependency analysis in the
272
    occurrence analyser.  We must track free vars in *both* lhs and rhs.
273 274 275 276 277 278 279 280
    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
    Why both? Consider
        x = y
        RULE f x = v+4
    Then if we substitute y for x, we'd better do so in the
    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
    as well as 'v'

281 282 283
  * Note [Rules are visible in their own rec group]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    We want the rules for 'f' to be visible in f's right-hand side.
284
    And we'd like them to be visible in other functions in f's Rec
285
    group.  E.g. in Note [Specialisation rules] we want f' rule
286 287 288 289 290 291
    to be visible in both f's RHS, and fs's RHS.

    This means that we must simplify the RULEs first, before looking
    at any of the definitions.  This is done by Simplify.simplRecBind,
    when it calls addLetIdInfo.

292 293 294 295 296 297
------------------------------------------------------------
Note [Choosing loop breakers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Loop breaking is surprisingly subtle.  First read the section 4 of
"Secrets of the GHC inliner".  This describes our basic plan.
We avoid infinite inlinings by choosing loop breakers, and
298
ensuring that a loop breaker cuts each loop.
299 300

Fundamentally, we do SCC analysis on a graph.  For each recursive
301
group we choose a loop breaker, delete all edges to that node,
302 303 304 305 306 307 308
re-analyse the SCC, and iterate.

But what is the graph?  NOT the same graph as was used for Note
[Forming Rec groups]!  In particular, a RULE is like an equation for
'f' that is *always* inlined if it is applicable.  We do *not* disable
rules for loop-breakers.  It's up to whoever makes the rules to make
sure that the rules themselves always terminate.  See Note [Rules for
309
recursive functions] in Simplify.hs
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329

Hence, if
    f's RHS (or its INLINE template if it has one) mentions g, and
    g has a RULE that mentions h, and
    h has a RULE that mentions f

then we *must* choose f to be a loop breaker.  Example: see Note
[Specialisation rules].

In general, take the free variables of f's RHS, and augment it with
all the variables reachable by RULES from those starting points.  That
is the whole reason for computing rule_fv_env in occAnalBind.  (Of
course we only consider free vars that are also binders in this Rec
group.)  See also Note [Finding rule RHS free vars]

Note that when we compute this rule_fv_env, we only consider variables
free in the *RHS* of the rule, in contrast to the way we build the
Rec group in the first place (Note [Rule dependency info])

Note that if 'g' has RHS that mentions 'w', we should add w to
330
g's loop-breaker edges.  More concretely there is an edge from f -> g
331
iff
332 333 334
        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
            (see Note [Inline rules])
        (b) or h is mentioned in f's RHS, and
335 336
            g appears in the RHS of an active RULE of h
            or a transitive sequence of active rules starting with h
337

338 339 340 341 342 343 344 345 346
Why "active rules"?  See Note [Finding rule RHS free vars]

Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
chosen as a loop breaker, because their RHSs don't mention each other.
And indeed both can be inlined safely.

Note again that the edges of the graph we use for computing loop breakers
are not the same as the edges we use for computing the Rec blocks.
That's why we compute
347 348 349

- rec_edges          for the Rec block analysis
- loop_breaker_edges for the loop breaker analysis
350

351 352 353
  * Note [Finding rule RHS free vars]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Consider this real example from Data Parallel Haskell
354 355 356
         tagZero :: Array Int -> Array Tag
         {-# INLINE [1] tagZeroes #-}
         tagZero xs = pmap (\x -> fromBool (x==0)) xs
357

358 359
         {-# RULES "tagZero" [~1] forall xs n.
             pmap fromBool <blah blah> = tagZero xs #-}
360 361 362 363 364
    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
    However, tagZero can only be inlined in phase 1 and later, while
    the RULE is only active *before* phase 1.  So there's no problem.

    To make this work, we look for the RHS free vars only for
365
    *active* rules. That's the reason for the occ_rule_act field
366
    of the OccEnv.
367

368 369 370 371
  * Note [Weak loop breakers]
    ~~~~~~~~~~~~~~~~~~~~~~~~~
    There is a last nasty wrinkle.  Suppose we have

Ian Lynagh's avatar
Ian Lynagh committed
372
        Rec { f = f_rhs
373
              RULE f [] = g
Ian Lynagh's avatar
Ian Lynagh committed
374 375 376 377

              h = h_rhs
              g = h
              ...more...
378 379
        }

380
    Remember that we simplify the RULES before any RHS (see Note
381 382
    [Rules are visible in their own rec group] above).

383
    So we must *not* postInlineUnconditionally 'g', even though
384
    its RHS turns out to be trivial.  (I'm assuming that 'g' is
385 386 387
    not choosen as a loop breaker.)  Why not?  Because then we
    drop the binding for 'g', which leaves it out of scope in the
    RULE!
388

389 390 391 392 393 394
    Here's a somewhat different example of the same thing
        Rec { g = h
            ; h = ...f...
            ; f = f_rhs
              RULE f [] = g }
    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
395
    g, because the RULE for f is active throughout.  So the RHS of h
396
    might rewrite to     h = ...g...
397
    So g must remain in scope in the output program!
398

399 400 401
    We "solve" this by:

        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
402 403 404 405 406 407 408
        iff g is a "missing free variable" of the Rec group

    A "missing free variable" x is one that is mentioned in an RHS or
    INLINE or RULE of a binding in the Rec group, but where the
    dependency on x may not show up in the loop_breaker_edges (see
    note [Choosing loop breakers} above).

409
    A normal "strong" loop breaker has IAmLoopBreaker False.  So
410

411 412 413 414
                                    Inline  postInlineUnconditionally
   strong   IAmLoopBreaker False    no      no
   weak     IAmLoopBreaker True     yes     no
            other                   yes     yes
415 416

    The **sole** reason for this kind of loop breaker is so that
417 418 419
    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
    inline via the usual callSiteInline stuff, so it'll be dead in the
    next pass, so the main Ugh is the tiresome complication.)
420

421 422 423 424 425
Note [Rules for imported functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this
   f = /\a. B.g a
   RULE B.g Int = 1 + f Int
426 427
Note that
  * The RULE is for an imported function.
428 429 430
  * f is non-recursive
Now we
can get
431
   f Int --> B.g Int      Inlining f
432
         --> 1 + f Int    Firing RULE
433
and so the simplifier goes into an infinite loop. This
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
would not happen if the RULE was for a local function,
because we keep track of dependencies through rules.  But
that is pretty much impossible to do for imported Ids.  Suppose
f's definition had been
   f = /\a. C.h a
where (by some long and devious process), C.h eventually inlines to
B.g.  We could only spot such loops by exhaustively following
unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
f.

Note that RULES for imported functions are important in practice; they
occur a lot in the libraries.

We regard this potential infinite loop as a *programmer* error.
It's up the programmer not to write silly rules like
     RULE f x = f x
450
and the example above is just a more complicated version.
451

452 453 454 455
Note [Preventing loops due to imported functions rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider:
  import GHC.Base (foldr)
456

457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
  {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
  filter p xs = build (\c n -> foldr (filterFB c p) n xs)
  filterFB c p = ...

  f = filter p xs

Note that filter is not a loop-breaker, so what happens is:
  f =          filter p xs
    = {inline} build (\c n -> foldr (filterFB c p) n xs)
    = {inline} foldr (filterFB (:) p) [] xs
    = {RULE}   filter p xs

We are in an infinite loop.

A more elaborate example (that I actually saw in practice when I went to
mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
473
  {-# LANGUAGE RankNTypes #-}
474
  module GHCList where
475

476 477
  import Prelude hiding (filter)
  import GHC.Base (build)
478

479 480 481 482
  {-# INLINABLE filter #-}
  filter :: (a -> Bool) -> [a] -> [a]
  filter p [] = []
  filter p (x:xs) = if p x then x : filter p xs else filter p xs
483

484 485 486 487
  {-# NOINLINE [0] filterFB #-}
  filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
  filterFB c p x r | p x       = x `c` r
                   | otherwise = r
488

489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
  {-# RULES
  "filter"     [~1] forall p xs.  filter p xs = build (\c n -> foldr
  (filterFB c p) n xs)
  "filterList" [1]  forall p.     foldr (filterFB (:) p) [] = filter p
   #-}

Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
are not), the unfolding given to "filter" in the interface file will be:
  filter p []     = []
  filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
                           else     build (\c n -> foldr (filterFB c p) n xs

Note that because this unfolding does not mention "filter", filter is not
marked as a strong loop breaker. Therefore at a use site in another module:
  filter p xs
    = {inline}
      case xs of []     -> []
                 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
                                  else     build (\c n -> foldr (filterFB c p) n xs)

  build (\c n -> foldr (filterFB c p) n xs)
    = {inline} foldr (filterFB (:) p) [] xs
    = {RULE}   filter p xs

And we are in an infinite loop again, except that this time the loop is producing an
infinitely large *term* (an unrolling of filter) and so the simplifier finally
dies with "ticks exhausted"

Because of this problem, we make a small change in the occurrence analyser
designed to mark functions like "filter" as strong loop breakers on the basis that:
  1. The RHS of filter mentions the local function "filterFB"
  2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS

522 523 524 525 526
So for each RULE for an *imported* function we are going to add
dependency edges between the *local* FVS of the rule LHS and the
*local* FVS of the rule RHS. We don't do anything special for RULES on
local functions because the standard occurrence analysis stuff is
pretty good at getting loop-breakerness correct there.
527 528 529 530 531 532

It is important to note that even with this extra hack we aren't always going to get
things right. For example, it might be that the rule LHS mentions an imported Id,
and another module has a RULE that can rewrite that imported Id to one of our local
Ids.

533 534 535 536 537 538 539
Note [Specialising imported functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
BUT for *automatically-generated* rules, the programmer can't be
responsible for the "programmer error" in Note [Rules for imported
functions].  In paricular, consider specialising a recursive function
defined in another module.  If we specialise a recursive function B.g,
we get
540 541
         g_spec = .....(B.g Int).....
         RULE B.g Int = g_spec
542 543 544 545
Here, g_spec doesn't look recursive, but when the rule fires, it
becomes so.  And if B.g was mutually recursive, the loop might
not be as obvious as it is here.

546 547
To avoid this,
 * When specialising a function that is a loop breaker,
548 549 550 551 552
   give a NOINLINE pragma to the specialised function

Note [Glomming]
~~~~~~~~~~~~~~~
RULES for imported Ids can make something at the top refer to something at the bottom:
553 554 555 556
        f = \x -> B.g (q x)
        h = \y -> 3

        RULE:  B.g (q x) = h x
557 558 559 560 561 562 563 564 565 566 567 568 569

Applying this rule makes f refer to h, although f doesn't appear to
depend on h.  (And, as in Note [Rules for imported functions], the
dependency might be more indirect. For example, f might mention C.t
rather than B.g, where C.t eventually inlines to B.g.)

NOTICE that this cannot happen for rules whose head is a
locally-defined function, because we accurately track dependencies
through RULES.  It only happens for rules whose head is an imported
function (B.g in the example above).

Solution:
  - When simplifying, bring all top level identifiers into
570
    scope at the start, ignoring the Rec/NonRec structure, so
571 572 573 574 575 576 577 578
    that when 'h' pops up in f's rhs, we find it in the in-scope set
    (as the simplifier generally expects). This happens in simplTopBinds.

  - In the occurrence analyser, if there are any out-of-scope
    occurrences that pop out of the top, which will happen after
    firing the rule:      f = \x -> h x
                          h = \y -> 3
    then just glom all the bindings into a single Rec, so that
579
    the *next* iteration of the occurrence analyser will sort
580 581 582 583 584 585 586 587 588 589
    them all out.   This part happens in occurAnalysePgm.

------------------------------------------------------------
Note [Inline rules]
~~~~~~~~~~~~~~~~~~~
None of the above stuff about RULES applies to Inline Rules,
stored in a CoreUnfolding.  The unfolding, if any, is simplified
at the same time as the regular RHS of the function (ie *not* like
Note [Rules are visible in their own rec group]), so it should be
treated *exactly* like an extra RHS.
590

591 592 593 594 595
Or, rather, when computing loop-breaker edges,
  * If f has an INLINE pragma, and it is active, we treat the
    INLINE rhs as f's rhs
  * If it's inactive, we treat f as having no rhs
  * If it has no INLINE pragma, we look at f's actual rhs
596

597

598 599 600 601 602
There is a danger that we'll be sub-optimal if we see this
     f = ...f...
     [INLINE f = ..no f...]
where f is recursive, but the INLINE is not. This can just about
happen with a sufficiently odd set of rules; eg
603

604 605 606
        foo :: Int -> Int
        {-# INLINE [1] foo #-}
        foo x = x+1
607

608 609 610
        bar :: Int -> Int
        {-# INLINE [1] bar #-}
        bar x = foo x + 1
611

612
        {-# RULES "foo" [~1] forall x. foo x = bar x #-}
613

614 615 616 617 618 619 620 621 622 623 624
Here the RULE makes bar recursive; but it's INLINE pragma remains
non-recursive. It's tempting to then say that 'bar' should not be
a loop breaker, but an attempt to do so goes wrong in two ways:
   a) We may get
         $df = ...$cfoo...
         $cfoo = ...$df....
         [INLINE $cfoo = ...no-$df...]
      But we want $cfoo to depend on $df explicitly so that we
      put the bindings in the right order to inline $df in $cfoo
      and perhaps break the loop altogether.  (Maybe this
   b)
625

626

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
Example [eftInt]
~~~~~~~~~~~~~~~
Example (from GHC.Enum):

  eftInt :: Int# -> Int# -> [Int]
  eftInt x y = ...(non-recursive)...

  {-# INLINE [0] eftIntFB #-}
  eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
  eftIntFB c n x y = ...(non-recursive)...

  {-# RULES
  "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
  "eftIntList"  [1] eftIntFB  (:) [] = eftInt
   #-}

643 644
Note [Specialisation rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
645 646 647 648 649 650 651 652 653
Consider this group, which is typical of what SpecConstr builds:

   fs a = ....f (C a)....
   f  x = ....f (C a)....
   {-# RULE f (C a) = fs a #-}

So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).

But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
654 655 656
  - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
  - fs is inlined (say it's small)
  - now there's another opportunity to apply the RULE
657 658

This showed up when compiling Control.Concurrent.Chan.getChanContents.
Austin Seipp's avatar
Austin Seipp committed
659
-}
660

661 662
type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
                                                -- which is gotten from the Id.
663 664 665
data Details
  = ND { nd_bndr :: Id          -- Binder
       , nd_rhs  :: CoreExpr    -- RHS, already occ-analysed
666

667
       , nd_uds  :: UsageDetails  -- Usage from RHS, and RULES, and stable unfoldings
668 669
                                  -- ignoring phase (ie assuming all are active)
                                  -- See Note [Forming Rec groups]
670

671
       , nd_inl  :: IdSet       -- Free variables of
672 673
                                --   the stable unfolding (if present and active)
                                --   or the RHS (if not)
674 675 676
                                -- but excluding any RULES
                                -- This is the IdSet that may be used if the Id is inlined

677
       , nd_weak :: IdSet       -- Binders of this Rec that are mentioned in nd_uds
678 679 680 681
                                -- but are *not* in nd_inl.  These are the ones whose
                                -- dependencies might not be respected by loop_breaker_edges
                                -- See Note [Weak loop breakers]

682
       , nd_active_rule_fvs :: IdSet   -- Free variables of the RHS of active RULES
683 684
  }

685
instance Outputable Details where
686 687 688 689 690 691
   ppr nd = text "ND" <> braces
             (sep [ text "bndr =" <+> ppr (nd_bndr nd)
                  , text "uds =" <+> ppr (nd_uds nd)
                  , text "inl =" <+> ppr (nd_inl nd)
                  , text "weak =" <+> ppr (nd_weak nd)
                  , text "rule =" <+> ppr (nd_active_rule_fvs nd)
692
             ])
693

694 695
makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
makeNode env imp_rule_edges bndr_set (bndr, rhs)
niteria's avatar
niteria committed
696 697 698 699
  = (details, varUnique bndr, nonDetKeysUFM node_fvs)
    -- It's OK to use nonDetKeysUFM here as stronglyConnCompFromEdgedVerticesR
    -- is still deterministic with edges in nondeterministic order as
    -- explained in Note [Deterministic SCC] in Digraph.
700 701 702
  where
    details = ND { nd_bndr = bndr
                 , nd_rhs  = rhs'
703
                 , nd_uds  = rhs_usage3
704
                 , nd_weak = node_fvs `minusVarSet` inl_fvs
705
                 , nd_inl  = inl_fvs
706
                 , nd_active_rule_fvs = active_rule_fvs }
707 708 709

    -- Constructing the edges for the main Rec computation
    -- See Note [Forming Rec groups]
710
    (rhs_usage1, rhs') = occAnalRecRhs env rhs
711 712 713 714 715
    rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs   -- Note [Rules are extra RHSs]
                                                     -- Note [Rule dependency info]
    rhs_usage3 = case mb_unf_fvs of
                   Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
                   Nothing      -> rhs_usage2
716
    node_fvs = udFreeVars bndr_set rhs_usage3
717 718 719 720 721

    -- Finding the free variables of the rules
    is_active = occ_rule_act env :: Activation -> Bool
    rules = filterOut isBuiltinRule (idCoreRules bndr)
    rules_w_fvs :: [(Activation, VarSet)]    -- Find the RHS fvs
722
    rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
723 724
                   -- See Note [Preventing loops due to imported functions rules]
                  [ (ru_act rule, fvs)
725 726
                  | rule <- rules
                  , let fvs = exprFreeVars (ru_rhs rule)
727
                              `delVarSetList` ru_bndrs rule
728
                  , not (isEmptyVarSet fvs) ]
729 730 731 732
    all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
    rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
    rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
                                          `delVarSetList` ru_bndrs ru) rules
733 734 735 736
    active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]

    -- Finding the free variables of the INLINE pragma (if any)
    unf        = realIdUnfolding bndr     -- Ignore any current loop-breaker flag
737
    mb_unf_fvs = stableUnfoldingVars unf
738 739 740

    -- Find the "nd_inl" free vars; for the loop-breaker phase
    inl_fvs = case mb_unf_fvs of
741 742
                Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
                Just unf_fvs -> unf_fvs
743
                      -- We could check for an *active* INLINE (returning
744 745 746
                      -- emptyVarSet for an inactive one), but is_active
                      -- isn't the right thing (it tells about
                      -- RULE activation), so we'd need more plumbing
747

748
-----------------------------
749
occAnalRec :: SCC (Node Details)
750
           -> (UsageDetails, [CoreBind])
751
           -> (UsageDetails, [CoreBind])
752

753
        -- The NonRec case is just like a Let (NonRec ...) above
754 755
occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
           (body_uds, binds)
756
  | not (bndr `usedIn` body_uds)
Simon Peyton Jones's avatar
Simon Peyton Jones committed
757
  = (body_uds, binds)           -- See Note [Dead code]
758

759 760
  | otherwise                   -- It's mentioned in the body
  = (body_uds' +++ rhs_uds,
761 762
     NonRec tagged_bndr rhs : binds)
  where
763
    (body_uds', tagged_bndr) = tagBinder body_uds bndr
764

765 766
        -- The Rec case is the interesting one
        -- See Note [Loop breaking]
767
occAnalRec (CyclicSCC nodes) (body_uds, binds)
768
  | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
Simon Peyton Jones's avatar
Simon Peyton Jones committed
769
  = (body_uds, binds)                   -- See Note [Dead code]
770

771
  | otherwise   -- At this point we always build a single Rec
772
  = -- pprTrace "occAnalRec" (vcat
773 774
    --   [ text "tagged nodes" <+> ppr tagged_nodes
    --   , text "lb edges" <+> ppr loop_breaker_edges])
775
    (final_uds, Rec pairs : binds)
776 777

  where
778
    bndrs    = [b | (ND { nd_bndr = b }, _, _) <- nodes]
779
    bndr_set = mkVarSet bndrs
Ian Lynagh's avatar
Ian Lynagh committed
780

781 782
        ----------------------------
        -- Tag the binders with their occurrence info
783 784 785 786 787 788 789
    tagged_nodes = map tag_node nodes
    total_uds = foldl add_uds body_uds nodes
    final_uds = total_uds `minusVarEnv` bndr_set
    add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd

    tag_node :: Node Details -> Node Details
    tag_node (details@ND { nd_bndr = bndr }, k, ks)
790 791
      | let bndr1 = setBinderOcc total_uds bndr
      = (details { nd_bndr = bndr1 }, k, ks)
792 793 794 795

    ---------------------------
    -- Now reconstruct the cycle
    pairs :: [(Id,CoreExpr)]
796 797
    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 bndr_set weak_fvs tagged_nodes       []
          | otherwise              = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
798 799 800
          -- If weak_fvs is empty, the loop_breaker_edges will include all
          -- the edges in tagged_nodes, so there isn't any point in doing
          -- a fresh SCC computation that will yield a single CyclicSCC result.
801 802

    weak_fvs :: VarSet
803
    weak_fvs = mapUnionVarSet (nd_weak . fstOf3) nodes
804

805
        -- See Note [Choosing loop breakers] for loop_breaker_edges
806
    loop_breaker_edges = map mk_node tagged_nodes
807
    mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
niteria's avatar
niteria committed
808 809 810 811 812
      = (details, k, nonDetKeysUFM (extendFvs_ rule_fv_env inl_fvs))
        -- It's OK to use nonDetKeysUFM here as
        -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
        -- in nondeterministic order as explained in
        -- Note [Deterministic SCC] in Digraph.
Ian Lynagh's avatar
Ian Lynagh committed
813

814
    ------------------------------------
815 816
    rule_fv_env :: IdEnv IdSet
        -- Maps a variable f to the variables from this group
817 818
        --      mentioned in RHS of active rules for f
        -- Domain is *subset* of bound vars (others have no rule fvs)
819 820 821
    rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
    init_rule_fvs   -- See Note [Finding rule RHS free vars]
      = [ (b, trimmed_rule_fvs)
822
        | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
823 824
        , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
        , not (isEmptyVarSet trimmed_rule_fvs)]
sof's avatar
sof committed
825

Austin Seipp's avatar
Austin Seipp committed
826
{-
827
@loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
sof's avatar
sof committed
828
strongly connected component (there's guaranteed to be a cycle).  It returns the
Ian Lynagh's avatar
Ian Lynagh committed
829 830 831
same pairs, but
        a) in a better order,
        b) with some of the Ids having a IAmALoopBreaker pragma
sof's avatar
sof committed
832

833
The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
sof's avatar
sof committed
834 835 836 837 838 839 840
that the simplifier can guarantee not to loop provided it never records an inlining
for these no-inline guys.

Furthermore, the order of the binds is such that if we neglect dependencies
on the no-inline Ids then the binds are topologically sorted.  This means
that the simplifier will generally do a good job if it works from top bottom,
recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
Austin Seipp's avatar
Austin Seipp committed
841
-}
sof's avatar
sof committed
842

843 844 845
type Binding = (Id,CoreExpr)

mk_loop_breaker :: Node Details -> Binding
846
mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
847 848 849 850
  = (setIdOccInfo bndr strongLoopBreaker, rhs)

mk_non_loop_breaker :: VarSet -> Node Details -> Binding
-- See Note [Weak loop breakers]
851
mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
852 853
  | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
  | otherwise                       = (bndr, rhs)
854 855 856 857 858

udFreeVars :: VarSet -> UsageDetails -> VarSet
-- Find the subset of bndrs that are mentioned in uds
udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds

859 860 861 862
loopBreakNodes :: Int
               -> VarSet        -- All binders
               -> VarSet        -- Binders whose dependencies may be "missing"
                                -- See Note [Weak loop breakers]
863
               -> [Node Details]
864
               -> [Binding]             -- Append these to the end
865
               -> [Binding]
866
-- Return the bindings sorted into a plausible order, and marked with loop breakers.
867
loopBreakNodes depth bndr_set weak_fvs nodes binds
niteria's avatar
niteria committed
868
  = go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
sof's avatar
sof committed
869
  where
870 871
    go []         binds = binds
    go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
872

873
    loop_break_scc scc binds
874
      = case scc of
875
          AcyclicSCC node  -> mk_non_loop_breaker weak_fvs node : binds
876
          CyclicSCC [node] -> mk_loop_breaker node : binds
877
          CyclicSCC nodes  -> reOrderNodes depth bndr_set weak_fvs nodes binds
878 879 880 881 882

reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
    -- Choose a loop breaker, mark it no-inline,
    -- do SCC analysis on the rest, and recursively sort them out
reOrderNodes _ _ _ [] _  = panic "reOrderNodes"
883
reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
884
  = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
885
    --                           text "chosen" <+> ppr chosen_nodes) $
886
    loopBreakNodes new_depth bndr_set weak_fvs unchosen $
887 888 889
    (map mk_loop_breaker chosen_nodes ++ binds)
  where
    (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
890 891 892

    approximate_loop_breaker = depth >= 2
    new_depth | approximate_loop_breaker = 0
893 894 895 896 897 898 899 900
              | otherwise                = depth+1
        -- After two iterations (d=0, d=1) give up
        -- and approximate, returning to d=0

    choose_loop_breaker :: Int                  -- Best score so far
                        -> [Node Details]       -- Nodes with this score
                        -> [Node Details]       -- Nodes with higher scores
                        -> [Node Details]       -- Unprocessed nodes