OccurAnal.hs 75.4 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2
{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3

Austin Seipp's avatar
Austin Seipp committed
4 5
************************************************************************
*                                                                      *
6
\section[OccurAnal]{Occurrence analysis pass}
Austin Seipp's avatar
Austin Seipp committed
7 8
*                                                                      *
************************************************************************
9

10 11
The occurrence analyser re-typechecks a core expression, returning a new
core expression with (hopefully) improved usage information.
Austin Seipp's avatar
Austin Seipp committed
12
-}
13

14 15
{-# LANGUAGE CPP, BangPatterns #-}

16
module OccurAnal (
17
        occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 19
    ) where

20
#include "HsVersions.h"
21 22

import CoreSyn
23
import CoreFVs
Peter Wortmann's avatar
Peter Wortmann committed
24 25
import CoreUtils        ( exprIsTrivial, isDefaultAlt, isExpandableApp,
                          stripTicksTopE, mkTicks )
26
import Id
27
import Name( localiseName )
twanvl's avatar
twanvl committed
28
import BasicTypes
29
import Module( Module )
30 31
import Coercion

32 33
import VarSet
import VarEnv
34
import Var
35
import Demand           ( argOneShots, argsOneShots )
Ian Lynagh's avatar
Ian Lynagh committed
36
import Maybes           ( orElse )
37
import Digraph          ( SCC(..), stronglyConnCompFromEdgedVerticesR )
38 39
import Unique
import UniqFM
40
import Util
41
import Outputable
42
import Data.List
Peter Wortmann's avatar
Peter Wortmann committed
43
import Control.Arrow    ( second )
44

Austin Seipp's avatar
Austin Seipp committed
45 46 47
{-
************************************************************************
*                                                                      *
48
\subsection[OccurAnal-main]{Counting occurrences: main function}
Austin Seipp's avatar
Austin Seipp committed
49 50
*                                                                      *
************************************************************************
51 52

Here's the externally-callable interface:
Austin Seipp's avatar
Austin Seipp committed
53
-}
54

55
occurAnalysePgm :: Module       -- Used only in debug output
56
                -> (Activation -> Bool)
57
                -> [CoreRule] -> [CoreVect] -> VarSet
58
                -> CoreProgram -> CoreProgram
59
occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
60
  | isEmptyVarEnv final_usage
61 62
  = occ_anald_binds

63
  | otherwise   -- See Note [Glomming]
64 65
  = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
                   2 (ppr final_usage ) )
66
    occ_anald_glommed_binds
67
  where
68 69
    init_env = initOccEnv active_rule
    (final_usage, occ_anald_binds) = go init_env binds
70
    (_, occ_anald_glommed_binds)   = occAnalRecBind init_env imp_rule_edges
71 72 73 74 75 76 77 78
                                                    (flattenBinds occ_anald_binds)
                                                    initial_uds
          -- It's crucial to re-analyse the glommed-together bindings
          -- so that we establish the right loop breakers. Otherwise
          -- we can easily create an infinite loop (Trac #9583 is an example)

    initial_uds = addIdOccs emptyDetails
                            (rulesFreeVars imp_rules `unionVarSet`
79 80 81 82
                             vectsFreeVars vects `unionVarSet`
                             vectVars)
    -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
    -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
Gabor Greif's avatar
Gabor Greif committed
83
    -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
84

85
    -- Note [Preventing loops due to imported functions rules]
86
    imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
87 88
                            [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
                            | imp_rule <- imp_rules
89
                            , not (isBuiltinRule imp_rule)  -- See Note [Plugin rules]
90 91 92 93
                            , let maps_to = exprFreeIds (ru_rhs imp_rule)
                                             `delVarSetList` ru_bndrs imp_rule
                            , arg <- ru_args imp_rule ]

94
    go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
twanvl's avatar
twanvl committed
95
    go _ []
96
        = (initial_uds, [])
Ian Lynagh's avatar
Ian Lynagh committed
97 98 99 100
    go env (bind:binds)
        = (final_usage, bind' ++ binds')
        where
           (bs_usage, binds')   = go env binds
101
           (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
102

103
occurAnalyseExpr :: CoreExpr -> CoreExpr
Gabor Greif's avatar
Gabor Greif committed
104
        -- Do occurrence analysis, and discard occurrence info returned
105 106 107 108 109 110 111 112
occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap

occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap

occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
occurAnalyseExpr' enable_binder_swap expr
  = snd (occAnal env expr)
113
  where
114
    env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
115
    -- To be conservative, we say that all inlines and rules are active
116
    all_active_rules = \_ -> True
117

118 119
{- Note [Plugin rules]
~~~~~~~~~~~~~~~~~~~~~~
Gabor Greif's avatar
Gabor Greif committed
120
Conal Elliott (Trac #11651) built a GHC plugin that added some
121 122 123 124 125 126 127 128 129 130
BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
do some domain-specific transformations that could not be expressed
with an ordinary pattern-matching CoreRule.  But then we can't extract
the dependencies (in imp_rule_edges) from ru_rhs etc, because a
BuiltinRule doesn't have any of that stuff.

So we simply assume that BuiltinRules have no dependencies, and filter
them out from the imp_rule_edges comprehension.
-}

Austin Seipp's avatar
Austin Seipp committed
131 132 133
{-
************************************************************************
*                                                                      *
134
\subsection[OccurAnal-main]{Counting occurrences: main function}
Austin Seipp's avatar
Austin Seipp committed
135 136
*                                                                      *
************************************************************************
137 138 139

Bindings
~~~~~~~~
Austin Seipp's avatar
Austin Seipp committed
140
-}
141

142 143 144 145 146
type ImpRuleEdges = IdEnv IdSet     -- Mapping from FVs of imported RULE LHSs to RHS FVs

noImpRuleEdges :: ImpRuleEdges
noImpRuleEdges = emptyVarEnv

147
occAnalBind :: OccEnv           -- The incoming OccEnv
148
            -> ImpRuleEdges
Ian Lynagh's avatar
Ian Lynagh committed
149 150 151 152
            -> CoreBind
            -> UsageDetails             -- Usage details of scope
            -> (UsageDetails,           -- Of the whole let(rec)
                [CoreBind])
153

154 155 156 157
occAnalBind env top_env (NonRec binder rhs) body_usage
  = occAnalNonRecBind env top_env binder rhs body_usage
occAnalBind env top_env (Rec pairs) body_usage
  = occAnalRecBind env top_env pairs body_usage
158 159

-----------------
160
occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
161
                  -> UsageDetails -> (UsageDetails, [CoreBind])
162
occAnalNonRecBind env imp_rule_edges binder rhs body_usage
163
  | isTyVar binder      -- A type let; we don't gather usage info
164 165
  = (body_usage, [NonRec binder rhs])

166
  | not (binder `usedIn` body_usage)    -- It's not mentioned
167 168
  = (body_usage, [])

Ian Lynagh's avatar
Ian Lynagh committed
169
  | otherwise                   -- It's mentioned in the body
170
  = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
171
  where
172
    (body_usage', tagged_binder) = tagBinder body_usage binder
173
    (rhs_usage1, rhs')           = occAnalNonRecRhs env tagged_binder rhs
174
    rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
175

176 177
    rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
       -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
178 179 180

    rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
                 lookupVarEnv imp_rule_edges binder
181
       -- See Note [Preventing loops due to imported functions rules]
182

183
-----------------
184
occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
185
               -> UsageDetails -> (UsageDetails, [CoreBind])
186
occAnalRecBind env imp_rule_edges pairs body_usage
187
  = foldr occAnalRec (body_usage, []) sccs
188 189 190 191
        -- For a recursive group, we
        --      * occ-analyse all the RHSs
        --      * compute strongly-connected components
        --      * feed those components to occAnalRec
192 193 194 195 196 197 198
  where
    bndr_set = mkVarSet (map fst pairs)

    sccs :: [SCC (Node Details)]
    sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR nodes

    nodes :: [Node Details]
199
    nodes = {-# SCC "occAnalBind.assoc" #-} map (makeNode env imp_rule_edges bndr_set) pairs
200

Austin Seipp's avatar
Austin Seipp committed
201
{-
202 203
Note [Dead code]
~~~~~~~~~~~~~~~~
204 205
Dropping dead code for a cyclic Strongly Connected Component is done
in a very simple way:
206

207 208
        the entire SCC is dropped if none of its binders are mentioned
        in the body; otherwise the whole thing is kept.
209

210 211 212
The key observation is that dead code elimination happens after
dependency analysis: so 'occAnalBind' processes SCCs instead of the
original term's binding groups.
213

214
Thus 'occAnalBind' does indeed drop 'f' in an example like
215

Ian Lynagh's avatar
Ian Lynagh committed
216
        letrec f = ...g...
217
               g = ...(...g...)...
Ian Lynagh's avatar
Ian Lynagh committed
218
        in
219
           ...g...
220

221 222 223
when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
'AcyclicSCC f', where 'body_usage' won't contain 'f'.
224

225 226 227 228 229 230 231
------------------------------------------------------------
Note [Forming Rec groups]
~~~~~~~~~~~~~~~~~~~~~~~~~
We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
and "g uses f", no matter how indirectly.  We do a SCC analysis
with an edge f -> g if "f uses g".

Krzysztof Gogolewski's avatar
Typos  
Krzysztof Gogolewski committed
232
More precisely, "f uses g" iff g should be in scope wherever f is.
233 234 235 236
That is, g is free in:
  a) the rhs 'ef'
  b) or the RHS of a rule for f (Note [Rules are extra RHSs])
  c) or the LHS or a rule for f (Note [Rule dependency info])
237

238 239 240 241
These conditions apply regardless of the activation of the RULE (eg it might be
inactive in this phase but become active later).  Once a Rec is broken up
it can never be put back together, so we must be conservative.

Gabor Greif's avatar
Gabor Greif committed
242
The principle is that, regardless of rule firings, every variable is
243
always in scope.
244 245 246 247 248 249 250 251 252 253

  * Note [Rules are extra RHSs]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
    keeps the specialised "children" alive.  If the parent dies
    (because it isn't referenced any more), then the children will die
    too (unless they are already referenced directly).

    To that end, we build a Rec group for each cyclic strongly
    connected component,
Ian Lynagh's avatar
Ian Lynagh committed
254
        *treating f's rules as extra RHSs for 'f'*.
255 256 257 258 259 260 261 262 263 264 265
    More concretely, the SCC analysis runs on a graph with an edge
    from f -> g iff g is mentioned in
        (a) f's rhs
        (b) f's RULES
    These are rec_edges.

    Under (b) we include variables free in *either* LHS *or* RHS of
    the rule.  The former might seems silly, but see Note [Rule
    dependency info].  So in Example [eftInt], eftInt and eftIntFB
    will be put in the same Rec, even though their 'main' RHSs are
    both non-recursive.
266

267 268
  * Note [Rule dependency info]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
269
    The VarSet in a RuleInfo is used for dependency analysis in the
270
    occurrence analyser.  We must track free vars in *both* lhs and rhs.
271 272 273 274 275 276 277 278
    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
    Why both? Consider
        x = y
        RULE f x = v+4
    Then if we substitute y for x, we'd better do so in the
    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
    as well as 'v'

279 280 281
  * Note [Rules are visible in their own rec group]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    We want the rules for 'f' to be visible in f's right-hand side.
282
    And we'd like them to be visible in other functions in f's Rec
283
    group.  E.g. in Note [Specialisation rules] we want f' rule
284 285 286 287 288 289
    to be visible in both f's RHS, and fs's RHS.

    This means that we must simplify the RULEs first, before looking
    at any of the definitions.  This is done by Simplify.simplRecBind,
    when it calls addLetIdInfo.

290 291 292 293 294 295
------------------------------------------------------------
Note [Choosing loop breakers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Loop breaking is surprisingly subtle.  First read the section 4 of
"Secrets of the GHC inliner".  This describes our basic plan.
We avoid infinite inlinings by choosing loop breakers, and
296
ensuring that a loop breaker cuts each loop.
297 298

Fundamentally, we do SCC analysis on a graph.  For each recursive
299
group we choose a loop breaker, delete all edges to that node,
300 301 302 303 304 305 306
re-analyse the SCC, and iterate.

But what is the graph?  NOT the same graph as was used for Note
[Forming Rec groups]!  In particular, a RULE is like an equation for
'f' that is *always* inlined if it is applicable.  We do *not* disable
rules for loop-breakers.  It's up to whoever makes the rules to make
sure that the rules themselves always terminate.  See Note [Rules for
307
recursive functions] in Simplify.hs
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327

Hence, if
    f's RHS (or its INLINE template if it has one) mentions g, and
    g has a RULE that mentions h, and
    h has a RULE that mentions f

then we *must* choose f to be a loop breaker.  Example: see Note
[Specialisation rules].

In general, take the free variables of f's RHS, and augment it with
all the variables reachable by RULES from those starting points.  That
is the whole reason for computing rule_fv_env in occAnalBind.  (Of
course we only consider free vars that are also binders in this Rec
group.)  See also Note [Finding rule RHS free vars]

Note that when we compute this rule_fv_env, we only consider variables
free in the *RHS* of the rule, in contrast to the way we build the
Rec group in the first place (Note [Rule dependency info])

Note that if 'g' has RHS that mentions 'w', we should add w to
328
g's loop-breaker edges.  More concretely there is an edge from f -> g
329
iff
330 331 332
        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
            (see Note [Inline rules])
        (b) or h is mentioned in f's RHS, and
333 334
            g appears in the RHS of an active RULE of h
            or a transitive sequence of active rules starting with h
335

336 337 338 339 340 341 342 343 344
Why "active rules"?  See Note [Finding rule RHS free vars]

Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
chosen as a loop breaker, because their RHSs don't mention each other.
And indeed both can be inlined safely.

Note again that the edges of the graph we use for computing loop breakers
are not the same as the edges we use for computing the Rec blocks.
That's why we compute
345 346 347

- rec_edges          for the Rec block analysis
- loop_breaker_edges for the loop breaker analysis
348

349 350 351
  * Note [Finding rule RHS free vars]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Consider this real example from Data Parallel Haskell
352 353 354
         tagZero :: Array Int -> Array Tag
         {-# INLINE [1] tagZeroes #-}
         tagZero xs = pmap (\x -> fromBool (x==0)) xs
355

356 357
         {-# RULES "tagZero" [~1] forall xs n.
             pmap fromBool <blah blah> = tagZero xs #-}
358 359 360 361 362
    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
    However, tagZero can only be inlined in phase 1 and later, while
    the RULE is only active *before* phase 1.  So there's no problem.

    To make this work, we look for the RHS free vars only for
363
    *active* rules. That's the reason for the occ_rule_act field
364
    of the OccEnv.
365

366 367 368 369
  * Note [Weak loop breakers]
    ~~~~~~~~~~~~~~~~~~~~~~~~~
    There is a last nasty wrinkle.  Suppose we have

Ian Lynagh's avatar
Ian Lynagh committed
370
        Rec { f = f_rhs
371
              RULE f [] = g
Ian Lynagh's avatar
Ian Lynagh committed
372 373 374 375

              h = h_rhs
              g = h
              ...more...
376 377
        }

378
    Remember that we simplify the RULES before any RHS (see Note
379 380
    [Rules are visible in their own rec group] above).

381
    So we must *not* postInlineUnconditionally 'g', even though
382
    its RHS turns out to be trivial.  (I'm assuming that 'g' is
383 384 385
    not choosen as a loop breaker.)  Why not?  Because then we
    drop the binding for 'g', which leaves it out of scope in the
    RULE!
386

387 388 389 390 391 392
    Here's a somewhat different example of the same thing
        Rec { g = h
            ; h = ...f...
            ; f = f_rhs
              RULE f [] = g }
    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
393
    g, because the RULE for f is active throughout.  So the RHS of h
394
    might rewrite to     h = ...g...
395
    So g must remain in scope in the output program!
396

397 398 399
    We "solve" this by:

        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
400 401 402 403 404 405 406
        iff g is a "missing free variable" of the Rec group

    A "missing free variable" x is one that is mentioned in an RHS or
    INLINE or RULE of a binding in the Rec group, but where the
    dependency on x may not show up in the loop_breaker_edges (see
    note [Choosing loop breakers} above).

407
    A normal "strong" loop breaker has IAmLoopBreaker False.  So
408

409 410 411 412
                                    Inline  postInlineUnconditionally
   strong   IAmLoopBreaker False    no      no
   weak     IAmLoopBreaker True     yes     no
            other                   yes     yes
413 414

    The **sole** reason for this kind of loop breaker is so that
415 416 417
    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
    inline via the usual callSiteInline stuff, so it'll be dead in the
    next pass, so the main Ugh is the tiresome complication.)
418

419 420 421 422 423
Note [Rules for imported functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this
   f = /\a. B.g a
   RULE B.g Int = 1 + f Int
424 425
Note that
  * The RULE is for an imported function.
426 427 428
  * f is non-recursive
Now we
can get
429
   f Int --> B.g Int      Inlining f
430
         --> 1 + f Int    Firing RULE
431
and so the simplifier goes into an infinite loop. This
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
would not happen if the RULE was for a local function,
because we keep track of dependencies through rules.  But
that is pretty much impossible to do for imported Ids.  Suppose
f's definition had been
   f = /\a. C.h a
where (by some long and devious process), C.h eventually inlines to
B.g.  We could only spot such loops by exhaustively following
unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
f.

Note that RULES for imported functions are important in practice; they
occur a lot in the libraries.

We regard this potential infinite loop as a *programmer* error.
It's up the programmer not to write silly rules like
     RULE f x = f x
448
and the example above is just a more complicated version.
449

450 451 452 453
Note [Preventing loops due to imported functions rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider:
  import GHC.Base (foldr)
454

455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
  {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
  filter p xs = build (\c n -> foldr (filterFB c p) n xs)
  filterFB c p = ...

  f = filter p xs

Note that filter is not a loop-breaker, so what happens is:
  f =          filter p xs
    = {inline} build (\c n -> foldr (filterFB c p) n xs)
    = {inline} foldr (filterFB (:) p) [] xs
    = {RULE}   filter p xs

We are in an infinite loop.

A more elaborate example (that I actually saw in practice when I went to
mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
471
  {-# LANGUAGE RankNTypes #-}
472
  module GHCList where
473

474 475
  import Prelude hiding (filter)
  import GHC.Base (build)
476

477 478 479 480
  {-# INLINABLE filter #-}
  filter :: (a -> Bool) -> [a] -> [a]
  filter p [] = []
  filter p (x:xs) = if p x then x : filter p xs else filter p xs
481

482 483 484 485
  {-# NOINLINE [0] filterFB #-}
  filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
  filterFB c p x r | p x       = x `c` r
                   | otherwise = r
486

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
  {-# RULES
  "filter"     [~1] forall p xs.  filter p xs = build (\c n -> foldr
  (filterFB c p) n xs)
  "filterList" [1]  forall p.     foldr (filterFB (:) p) [] = filter p
   #-}

Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
are not), the unfolding given to "filter" in the interface file will be:
  filter p []     = []
  filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
                           else     build (\c n -> foldr (filterFB c p) n xs

Note that because this unfolding does not mention "filter", filter is not
marked as a strong loop breaker. Therefore at a use site in another module:
  filter p xs
    = {inline}
      case xs of []     -> []
                 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
                                  else     build (\c n -> foldr (filterFB c p) n xs)

  build (\c n -> foldr (filterFB c p) n xs)
    = {inline} foldr (filterFB (:) p) [] xs
    = {RULE}   filter p xs

And we are in an infinite loop again, except that this time the loop is producing an
infinitely large *term* (an unrolling of filter) and so the simplifier finally
dies with "ticks exhausted"

Because of this problem, we make a small change in the occurrence analyser
designed to mark functions like "filter" as strong loop breakers on the basis that:
  1. The RHS of filter mentions the local function "filterFB"
  2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS

520 521 522 523 524
So for each RULE for an *imported* function we are going to add
dependency edges between the *local* FVS of the rule LHS and the
*local* FVS of the rule RHS. We don't do anything special for RULES on
local functions because the standard occurrence analysis stuff is
pretty good at getting loop-breakerness correct there.
525 526 527 528 529 530

It is important to note that even with this extra hack we aren't always going to get
things right. For example, it might be that the rule LHS mentions an imported Id,
and another module has a RULE that can rewrite that imported Id to one of our local
Ids.

531 532 533 534 535 536 537
Note [Specialising imported functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
BUT for *automatically-generated* rules, the programmer can't be
responsible for the "programmer error" in Note [Rules for imported
functions].  In paricular, consider specialising a recursive function
defined in another module.  If we specialise a recursive function B.g,
we get
538 539
         g_spec = .....(B.g Int).....
         RULE B.g Int = g_spec
540 541 542 543
Here, g_spec doesn't look recursive, but when the rule fires, it
becomes so.  And if B.g was mutually recursive, the loop might
not be as obvious as it is here.

544 545
To avoid this,
 * When specialising a function that is a loop breaker,
546 547 548 549 550
   give a NOINLINE pragma to the specialised function

Note [Glomming]
~~~~~~~~~~~~~~~
RULES for imported Ids can make something at the top refer to something at the bottom:
551 552 553 554
        f = \x -> B.g (q x)
        h = \y -> 3

        RULE:  B.g (q x) = h x
555 556 557 558 559 560 561 562 563 564 565 566 567

Applying this rule makes f refer to h, although f doesn't appear to
depend on h.  (And, as in Note [Rules for imported functions], the
dependency might be more indirect. For example, f might mention C.t
rather than B.g, where C.t eventually inlines to B.g.)

NOTICE that this cannot happen for rules whose head is a
locally-defined function, because we accurately track dependencies
through RULES.  It only happens for rules whose head is an imported
function (B.g in the example above).

Solution:
  - When simplifying, bring all top level identifiers into
568
    scope at the start, ignoring the Rec/NonRec structure, so
569 570 571 572 573 574 575 576
    that when 'h' pops up in f's rhs, we find it in the in-scope set
    (as the simplifier generally expects). This happens in simplTopBinds.

  - In the occurrence analyser, if there are any out-of-scope
    occurrences that pop out of the top, which will happen after
    firing the rule:      f = \x -> h x
                          h = \y -> 3
    then just glom all the bindings into a single Rec, so that
577
    the *next* iteration of the occurrence analyser will sort
578 579 580 581 582 583 584 585 586 587
    them all out.   This part happens in occurAnalysePgm.

------------------------------------------------------------
Note [Inline rules]
~~~~~~~~~~~~~~~~~~~
None of the above stuff about RULES applies to Inline Rules,
stored in a CoreUnfolding.  The unfolding, if any, is simplified
at the same time as the regular RHS of the function (ie *not* like
Note [Rules are visible in their own rec group]), so it should be
treated *exactly* like an extra RHS.
588

589 590 591 592 593
Or, rather, when computing loop-breaker edges,
  * If f has an INLINE pragma, and it is active, we treat the
    INLINE rhs as f's rhs
  * If it's inactive, we treat f as having no rhs
  * If it has no INLINE pragma, we look at f's actual rhs
594

595

596 597 598 599 600
There is a danger that we'll be sub-optimal if we see this
     f = ...f...
     [INLINE f = ..no f...]
where f is recursive, but the INLINE is not. This can just about
happen with a sufficiently odd set of rules; eg
601

602 603 604
        foo :: Int -> Int
        {-# INLINE [1] foo #-}
        foo x = x+1
605

606 607 608
        bar :: Int -> Int
        {-# INLINE [1] bar #-}
        bar x = foo x + 1
609

610
        {-# RULES "foo" [~1] forall x. foo x = bar x #-}
611

612 613 614 615 616 617 618 619 620 621 622
Here the RULE makes bar recursive; but it's INLINE pragma remains
non-recursive. It's tempting to then say that 'bar' should not be
a loop breaker, but an attempt to do so goes wrong in two ways:
   a) We may get
         $df = ...$cfoo...
         $cfoo = ...$df....
         [INLINE $cfoo = ...no-$df...]
      But we want $cfoo to depend on $df explicitly so that we
      put the bindings in the right order to inline $df in $cfoo
      and perhaps break the loop altogether.  (Maybe this
   b)
623

624

625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
Example [eftInt]
~~~~~~~~~~~~~~~
Example (from GHC.Enum):

  eftInt :: Int# -> Int# -> [Int]
  eftInt x y = ...(non-recursive)...

  {-# INLINE [0] eftIntFB #-}
  eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
  eftIntFB c n x y = ...(non-recursive)...

  {-# RULES
  "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
  "eftIntList"  [1] eftIntFB  (:) [] = eftInt
   #-}

641 642
Note [Specialisation rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
643 644 645 646 647 648 649 650 651
Consider this group, which is typical of what SpecConstr builds:

   fs a = ....f (C a)....
   f  x = ....f (C a)....
   {-# RULE f (C a) = fs a #-}

So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).

But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
652 653 654
  - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
  - fs is inlined (say it's small)
  - now there's another opportunity to apply the RULE
655 656

This showed up when compiling Control.Concurrent.Chan.getChanContents.
Austin Seipp's avatar
Austin Seipp committed
657
-}
658

659 660
type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
                                                -- which is gotten from the Id.
661 662 663
data Details
  = ND { nd_bndr :: Id          -- Binder
       , nd_rhs  :: CoreExpr    -- RHS, already occ-analysed
664

665
       , nd_uds  :: UsageDetails  -- Usage from RHS, and RULES, and stable unfoldings
666 667
                                  -- ignoring phase (ie assuming all are active)
                                  -- See Note [Forming Rec groups]
668

669
       , nd_inl  :: IdSet       -- Free variables of
670 671
                                --   the stable unfolding (if present and active)
                                --   or the RHS (if not)
672 673 674
                                -- but excluding any RULES
                                -- This is the IdSet that may be used if the Id is inlined

675
       , nd_weak :: IdSet       -- Binders of this Rec that are mentioned in nd_uds
676 677 678 679
                                -- but are *not* in nd_inl.  These are the ones whose
                                -- dependencies might not be respected by loop_breaker_edges
                                -- See Note [Weak loop breakers]

680
       , nd_active_rule_fvs :: IdSet   -- Free variables of the RHS of active RULES
681 682
  }

683
instance Outputable Details where
684 685 686 687 688 689
   ppr nd = text "ND" <> braces
             (sep [ text "bndr =" <+> ppr (nd_bndr nd)
                  , text "uds =" <+> ppr (nd_uds nd)
                  , text "inl =" <+> ppr (nd_inl nd)
                  , text "weak =" <+> ppr (nd_weak nd)
                  , text "rule =" <+> ppr (nd_active_rule_fvs nd)
690
             ])
691

692 693
makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
makeNode env imp_rule_edges bndr_set (bndr, rhs)
694
  = (details, varUnique bndr, keysUFM node_fvs)
695 696 697
  where
    details = ND { nd_bndr = bndr
                 , nd_rhs  = rhs'
698
                 , nd_uds  = rhs_usage3
699
                 , nd_weak = node_fvs `minusVarSet` inl_fvs
700
                 , nd_inl  = inl_fvs
701
                 , nd_active_rule_fvs = active_rule_fvs }
702 703 704

    -- Constructing the edges for the main Rec computation
    -- See Note [Forming Rec groups]
705
    (rhs_usage1, rhs') = occAnalRecRhs env rhs
706 707 708 709 710
    rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs   -- Note [Rules are extra RHSs]
                                                     -- Note [Rule dependency info]
    rhs_usage3 = case mb_unf_fvs of
                   Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
                   Nothing      -> rhs_usage2
711
    node_fvs = udFreeVars bndr_set rhs_usage3
712 713 714 715 716

    -- Finding the free variables of the rules
    is_active = occ_rule_act env :: Activation -> Bool
    rules = filterOut isBuiltinRule (idCoreRules bndr)
    rules_w_fvs :: [(Activation, VarSet)]    -- Find the RHS fvs
717
    rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
718 719
                   -- See Note [Preventing loops due to imported functions rules]
                  [ (ru_act rule, fvs)
720 721
                  | rule <- rules
                  , let fvs = exprFreeVars (ru_rhs rule)
722
                              `delVarSetList` ru_bndrs rule
723
                  , not (isEmptyVarSet fvs) ]
724 725 726 727
    all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
    rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
    rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
                                          `delVarSetList` ru_bndrs ru) rules
728 729 730 731
    active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]

    -- Finding the free variables of the INLINE pragma (if any)
    unf        = realIdUnfolding bndr     -- Ignore any current loop-breaker flag
732
    mb_unf_fvs = stableUnfoldingVars unf
733 734 735

    -- Find the "nd_inl" free vars; for the loop-breaker phase
    inl_fvs = case mb_unf_fvs of
736 737
                Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
                Just unf_fvs -> unf_fvs
738
                      -- We could check for an *active* INLINE (returning
739 740 741
                      -- emptyVarSet for an inactive one), but is_active
                      -- isn't the right thing (it tells about
                      -- RULE activation), so we'd need more plumbing
742

743
-----------------------------
744
occAnalRec :: SCC (Node Details)
745
           -> (UsageDetails, [CoreBind])
746
           -> (UsageDetails, [CoreBind])
747

748
        -- The NonRec case is just like a Let (NonRec ...) above
749 750
occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
           (body_uds, binds)
751
  | not (bndr `usedIn` body_uds)
Simon Peyton Jones's avatar
Simon Peyton Jones committed
752
  = (body_uds, binds)           -- See Note [Dead code]
753

754 755
  | otherwise                   -- It's mentioned in the body
  = (body_uds' +++ rhs_uds,
756 757
     NonRec tagged_bndr rhs : binds)
  where
758
    (body_uds', tagged_bndr) = tagBinder body_uds bndr
759

760 761
        -- The Rec case is the interesting one
        -- See Note [Loop breaking]
762
occAnalRec (CyclicSCC nodes) (body_uds, binds)
763
  | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
Simon Peyton Jones's avatar
Simon Peyton Jones committed
764
  = (body_uds, binds)                   -- See Note [Dead code]
765

766
  | otherwise   -- At this point we always build a single Rec
767
  = -- pprTrace "occAnalRec" (vcat
768 769
    --   [ text "tagged nodes" <+> ppr tagged_nodes
    --   , text "lb edges" <+> ppr loop_breaker_edges])
770
    (final_uds, Rec pairs : binds)
771 772

  where
773
    bndrs    = [b | (ND { nd_bndr = b }, _, _) <- nodes]
774
    bndr_set = mkVarSet bndrs
Ian Lynagh's avatar
Ian Lynagh committed
775

776 777
        ----------------------------
        -- Tag the binders with their occurrence info
778 779 780 781 782 783 784
    tagged_nodes = map tag_node nodes
    total_uds = foldl add_uds body_uds nodes
    final_uds = total_uds `minusVarEnv` bndr_set
    add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd

    tag_node :: Node Details -> Node Details
    tag_node (details@ND { nd_bndr = bndr }, k, ks)
785 786
      | let bndr1 = setBinderOcc total_uds bndr
      = (details { nd_bndr = bndr1 }, k, ks)
787 788 789 790

    ---------------------------
    -- Now reconstruct the cycle
    pairs :: [(Id,CoreExpr)]
791 792
    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 bndr_set weak_fvs tagged_nodes       []
          | otherwise              = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
793 794 795
          -- If weak_fvs is empty, the loop_breaker_edges will include all
          -- the edges in tagged_nodes, so there isn't any point in doing
          -- a fresh SCC computation that will yield a single CyclicSCC result.
796 797

    weak_fvs :: VarSet
798
    weak_fvs = mapUnionVarSet (nd_weak . fstOf3) nodes
799

800
        -- See Note [Choosing loop breakers] for loop_breaker_edges
801
    loop_breaker_edges = map mk_node tagged_nodes
802
    mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
803
      = (details, k, keysUFM (extendFvs_ rule_fv_env inl_fvs))
Ian Lynagh's avatar
Ian Lynagh committed
804

805
    ------------------------------------
806 807
    rule_fv_env :: IdEnv IdSet
        -- Maps a variable f to the variables from this group
808 809
        --      mentioned in RHS of active rules for f
        -- Domain is *subset* of bound vars (others have no rule fvs)
810 811 812
    rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
    init_rule_fvs   -- See Note [Finding rule RHS free vars]
      = [ (b, trimmed_rule_fvs)
813
        | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
814 815
        , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
        , not (isEmptyVarSet trimmed_rule_fvs)]
sof's avatar
sof committed
816

Austin Seipp's avatar
Austin Seipp committed
817
{-
818
@loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
sof's avatar
sof committed
819
strongly connected component (there's guaranteed to be a cycle).  It returns the
Ian Lynagh's avatar
Ian Lynagh committed
820 821 822
same pairs, but
        a) in a better order,
        b) with some of the Ids having a IAmALoopBreaker pragma
sof's avatar
sof committed
823

824
The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
sof's avatar
sof committed
825 826 827 828 829 830 831
that the simplifier can guarantee not to loop provided it never records an inlining
for these no-inline guys.

Furthermore, the order of the binds is such that if we neglect dependencies
on the no-inline Ids then the binds are topologically sorted.  This means
that the simplifier will generally do a good job if it works from top bottom,
recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
Austin Seipp's avatar
Austin Seipp committed
832
-}
sof's avatar
sof committed
833

834 835 836
type Binding = (Id,CoreExpr)

mk_loop_breaker :: Node Details -> Binding
837
mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
838 839 840 841
  = (setIdOccInfo bndr strongLoopBreaker, rhs)

mk_non_loop_breaker :: VarSet -> Node Details -> Binding
-- See Note [Weak loop breakers]
842
mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
843 844
  | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
  | otherwise                       = (bndr, rhs)
845 846 847 848 849

udFreeVars :: VarSet -> UsageDetails -> VarSet
-- Find the subset of bndrs that are mentioned in uds
udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds

850 851 852 853
loopBreakNodes :: Int
               -> VarSet        -- All binders
               -> VarSet        -- Binders whose dependencies may be "missing"
                                -- See Note [Weak loop breakers]
854
               -> [Node Details]
855
               -> [Binding]             -- Append these to the end
856
               -> [Binding]
857
-- Return the bindings sorted into a plausible order, and marked with loop breakers.
858
loopBreakNodes depth bndr_set weak_fvs nodes binds
859
  = go (stronglyConnCompFromEdgedVerticesR nodes) binds
sof's avatar
sof committed
860
  where
861 862
    go []         binds = binds
    go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
863

864
    loop_break_scc scc binds
865
      = case scc of
866
          AcyclicSCC node  -> mk_non_loop_breaker weak_fvs node : binds
867
          CyclicSCC [node] -> mk_loop_breaker node : binds
868
          CyclicSCC nodes  -> reOrderNodes depth bndr_set weak_fvs nodes binds
869 870 871 872 873

reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
    -- Choose a loop breaker, mark it no-inline,
    -- do SCC analysis on the rest, and recursively sort them out
reOrderNodes _ _ _ [] _  = panic "reOrderNodes"
874
reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
875
  = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
876
    --                           text "chosen" <+> ppr chosen_nodes) $
877
    loopBreakNodes new_depth bndr_set weak_fvs unchosen $
878 879 880
    (map mk_loop_breaker chosen_nodes ++ binds)
  where
    (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
881 882 883

    approximate_loop_breaker = depth >= 2
    new_depth | approximate_loop_breaker = 0
884 885 886 887 888 889 890 891
              | otherwise                = depth+1
        -- After two iterations (d=0, d=1) give up
        -- and approximate, returning to d=0

    choose_loop_breaker :: Int                  -- Best score so far
                        -> [Node Details]       -- Nodes with this score
                        -> [Node Details]       -- Nodes with higher scores
                        -> [Node Details]       -- Unprocessed nodes
892
                        -> ([Node Details], [Node Details])
Ian Lynagh's avatar
Ian Lynagh committed
893 894
        -- This loop looks for the bind with the lowest score
        -- to pick as the loop  breaker.  The rest accumulate in
895 896
    choose_loop_breaker _ loop_nodes acc []
        = (loop_nodes, acc)        -- Done
897

898 899 900
        -- If approximate_loop_breaker is True, we pick *all*
        -- nodes with lowest score, else just one
        -- See Note [Complexity of loop breaking]
901
    choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
Ian Lynagh's avatar
Ian Lynagh committed
902
        | sc < loop_sc  -- Lower score so pick this new one
903
        = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
Ian Lynagh's avatar
Ian Lynagh committed
904

905 906 907
        | approximate_loop_breaker && sc == loop_sc
        = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes

908
        | otherwise     -- Higher score so don't pick it
909
        = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
Ian Lynagh's avatar
Ian Lynagh committed
910
        where
911
          sc = score node
Ian Lynagh's avatar
Ian Lynagh committed
912 913

    score :: Node Details -> Int        -- Higher score => less likely to be picked as loop breaker
914
    score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
915
        | not (isId bndr) = 100     -- A type or cercion variable is never a loop breaker
916

917
        | isDFunId bndr = 9   -- Never choose a DFun as a loop breaker
918
                              -- Note [DFuns should not be loop breakers]
Ian Lynagh's avatar
Ian Lynagh committed
919

920 921 922
        | Just be_very_keen <- hasStableCoreUnfolding_maybe (idUnfolding bndr)
        = if be_very_keen then 6    -- Note [Loop breakers and INLINE/INLINEABLE pragmas]
                          else 3
923 924
               -- Data structures are more important than INLINE pragmas
               -- so that dictionary/method recursion unravels
925 926 927
               -- Note that this case hits all stable unfoldings, so we
               -- never look at 'rhs' for stable unfoldings. That's right, because
               -- 'rhs' is irrelevant for inlining things with a stable unfolding
928

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
929
        | is_con_app rhs = 5  -- Data types help with cases: Note [Constructor applications]
930

931 932 933 934 935 936
        | exprIsTrivial rhs = 10  -- Practically certain to be inlined
                -- Used to have also: && not (isExportedId bndr)
                -- But I found this sometimes cost an extra iteration when we have
                --      rec { d = (a,b); a = ...df...; b = ...df...; df = d }
                -- where df is the exported dictionary. Then df makes a really
                -- bad choice for loop breaker
937

938

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
939 940 941 942
-- If an Id is marked "never inline" then it makes a great loop breaker
-- The only reason for not checking that here is that it is rare
-- and I've never seen a situation where it makes a difference,
-- so it probably isn't worth the time to test on every binder
943
--      | isNeverActive (idInlinePragma bndr) = -10
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
944

945
        | isOneOcc (idOccInfo bndr) = 2  -- Likely to be inlined
946

947 948
        | canUnfold (realIdUnfolding bndr) = 1
                -- The Id has some kind of unfolding
949
                -- Ignore loop-breaker-ness here because that is what we are setting!
950

Ian Lynagh's avatar
Ian Lynagh committed
951
        | otherwise = 0
952