OccurAnal.lhs 36.4 KB
Newer Older
1
%
2
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3 4 5 6 7 8 9
%
%************************************************************************
%*									*
\section[OccurAnal]{Occurrence analysis pass}
%*									*
%************************************************************************

10 11
The occurrence analyser re-typechecks a core expression, returning a new
core expression with (hopefully) improved usage information.
12 13 14

\begin{code}
module OccurAnal (
15
	occurAnalysePgm, occurAnalyseExpr
16 17
    ) where

twanvl's avatar
twanvl committed
18 19
-- XXX This define is a bit of a hack, and should be done more nicely
#define FAST_STRING_NOT_NEEDED 1
20
#include "HsVersions.h"
21 22

import CoreSyn
23
import CoreFVs
24
import CoreUtils	( exprIsTrivial, isDefaultAlt )
25 26
import Id
import IdInfo
twanvl's avatar
twanvl committed
27
import BasicTypes
28 29 30 31

import VarSet
import VarEnv

32
import Maybes		( orElse )
33
import Digraph		( stronglyConnCompR, SCC(..) )
34
import PrelNames	( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey )
35
import Unique		( Unique )
36
import LazyUniqFM		( keysUFM, intersectUFM_C, foldUFM_Directly )
37
import Util		( mapAndUnzip )
38
import Outputable
39 40

import Data.List
41 42 43 44 45 46 47 48 49 50 51 52
\end{code}


%************************************************************************
%*									*
\subsection[OccurAnal-main]{Counting occurrences: main function}
%*									*
%************************************************************************

Here's the externally-callable interface:

\begin{code}
53 54
occurAnalysePgm :: [CoreBind] -> [CoreBind]
occurAnalysePgm binds
55
  = snd (go initOccEnv binds)
56 57
  where
    go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
twanvl's avatar
twanvl committed
58
    go _ []
59 60 61 62
	= (emptyDetails, [])
    go env (bind:binds) 
	= (final_usage, bind' ++ binds')
	where
63
	   (bs_usage, binds')   = go env binds
64 65
	   (final_usage, bind') = occAnalBind env bind bs_usage

66 67 68
occurAnalyseExpr :: CoreExpr -> CoreExpr
	-- Do occurrence analysis, and discard occurence info returned
occurAnalyseExpr expr = snd (occAnal initOccEnv expr)
69 70
\end{code}

71

72 73 74 75 76 77 78 79 80 81 82
%************************************************************************
%*									*
\subsection[OccurAnal-main]{Counting occurrences: main function}
%*									*
%************************************************************************

Bindings
~~~~~~~~

\begin{code}
occAnalBind :: OccEnv
83
	    -> CoreBind
84 85
	    -> UsageDetails		-- Usage details of scope
	    -> (UsageDetails,		-- Of the whole let(rec)
86
		[CoreBind])
87

88
occAnalBind env (NonRec binder rhs) body_usage
89
  | not (binder `usedIn` body_usage)		-- It's not mentioned
90 91 92
  = (body_usage, [])

  | otherwise			-- It's mentioned in the body
93
  = (body_usage' +++ addRuleUsage rhs_usage binder,	-- Note [Rules are extra RHSs]
94
     [NonRec tagged_binder rhs'])
95
  where
96 97
    (body_usage', tagged_binder) = tagBinder body_usage binder
    (rhs_usage, rhs')		 = occAnalRhs env tagged_binder rhs
98 99
\end{code}

100 101
Note [Dead code]
~~~~~~~~~~~~~~~~
102 103 104 105 106 107
Dropping dead code for recursive bindings is done in a very simple way:

	the entire set of bindings is dropped if none of its binders are
	mentioned in its body; otherwise none are.

This seems to miss an obvious improvement.
108

109 110 111
	letrec  f = ...g...
		g = ...f...
	in
112 113 114 115 116 117 118
	...g...
===>
	letrec f = ...g...
	       g = ...(...g...)...
	in
	...g...

119 120 121
Now 'f' is unused! But it's OK!  Dependency analysis will sort this
out into a letrec for 'g' and a 'let' for 'f', and then 'f' will get
dropped.  It isn't easy to do a perfect job in one blow.  Consider
122 123 124 125 126 127 128 129

	letrec f = ...g...
	       g = ...h...
	       h = ...k...
	       k = ...m...
	       m = ...m...
	in
	...m...
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148


Note [Loop breaking and RULES]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Loop breaking is surprisingly subtle.  First read the section 4 of 
"Secrets of the GHC inliner".  This describes our basic plan.

However things are made quite a bit more complicated by RULES.  Remember

  * Note [Rules are extra RHSs]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
    keeps the specialised "children" alive.  If the parent dies
    (because it isn't referenced any more), then the children will die
    too (unless they are already referenced directly).

    To that end, we build a Rec group for each cyclic strongly
    connected component,
	*treating f's rules as extra RHSs for 'f'*.
149 150 151 152

    When we make the Rec groups we include variables free in *either*
    LHS *or* RHS of the rule.  The former might seems silly, but see
    Note [Rule dependency info].
153 154 155 156 157 158 159
 
    So in Example [eftInt], eftInt and eftIntFB will be put in the
    same Rec, even though their 'main' RHSs are both non-recursive.

  * Note [Rules are visible in their own rec group]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    We want the rules for 'f' to be visible in f's right-hand side.
160
    And we'd like them to be visible in other functions in f's Rec
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
    group.  E.g. in Example [Specialisation rules] we want f' rule
    to be visible in both f's RHS, and fs's RHS.

    This means that we must simplify the RULEs first, before looking
    at any of the definitions.  This is done by Simplify.simplRecBind,
    when it calls addLetIdInfo.

  * Note [Choosing loop breakers]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    We avoid infinite inlinings by choosing loop breakers, and
    ensuring that a loop breaker cuts each loop.  But what is a
    "loop"?  In particular, a RULES is like an equation for 'f' that
    is *always* inlined if it are applicable.  We do *not* disable
    rules for loop-breakers.  It's up to whoever makes the rules to
    make sure that the rules themselves alwasys terminate.  See Note
    [Rules for recursive functions] in Simplify.lhs

    Hence, if 
	f's RHS mentions g, and
	g has a RULE that mentions h, and
	h has a RULE that mentions f

    then we *must* choose f to be a loop breaker.  In general, take the
    free variables of f's RHS, and augment it with all the variables
    reachable by RULES from those starting points.  That is the whole
    reason for computing rule_fv_env in occAnalBind.  (Of course we
    only consider free vars that are also binders in this Rec group.)

189 190 191 192
    Note that when we compute this rule_fv_env, we only consider variables
    free in the *RHS* of the rule, in contrast to the way we build the 
    Rec group in the first place (Note [Rule dependency info])

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
    Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
    chosen as a loop breaker, because their RHSs don't mention each other.
    And indeed both can be inlined safely.

    Note that the edges of the graph we use for computing loop breakers
    are not the same as the edges we use for computing the Rec blocks.
    That's why we compute 
	rec_edges	   for the Rec block analysis
	loop_breaker_edges for the loop breaker analysis


  * Note [Weak loop breakers]
    ~~~~~~~~~~~~~~~~~~~~~~~~~
    There is a last nasty wrinkle.  Suppose we have

	Rec { f = f_rhs
              RULE f [] = g
            
	      h = h_rhs
              g = h 
	      ...more...
        }

    Remmber that we simplify the RULES before any RHS (see Note
    [Rules are visible in their own rec group] above).

219
    So we must *not* postInlineUnconditionally 'g', even though
220 221 222 223 224 225 226 227 228 229 230 231 232
    its RHS turns out to be trivial.  (I'm assuming that 'g' is
    not choosen as a loop breaker.)

    We "solve" this by making g a "weak" or "rules-only" loop breaker,
    with OccInfo = IAmLoopBreaker True.  A normal "strong" loop breaker
    has IAmLoopBreaker False.  So

				Inline	postInlineUnconditinoally
	IAmLoopBreaker False	no	no
	IAmLoopBreaker True	yes	no
	other			yes 	yes

    The **sole** reason for this kind of loop breaker is so that
233
    postInlineUnconditionally does not fire.  Ugh.
234

235 236 237 238 239 240 241 242 243 244
  * Note [Rule dependency info]
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    The VarSet in a SpecInfo is used for dependency analysis in the 
    occurrence analyser.  We must track free vars in *both* lhs and rhs.  Why both?  
    Consider
    	x = y
     	RULE f x = 4
    Then if we substitute y for x, we'd better do so in the
    rule's LHS too, so we'd better ensure the dependency is respected

245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277

Example [eftInt]
~~~~~~~~~~~~~~~
Example (from GHC.Enum):

  eftInt :: Int# -> Int# -> [Int]
  eftInt x y = ...(non-recursive)...

  {-# INLINE [0] eftIntFB #-}
  eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
  eftIntFB c n x y = ...(non-recursive)...

  {-# RULES
  "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
  "eftIntList"  [1] eftIntFB  (:) [] = eftInt
   #-}

Example [Specialisation rules]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this group, which is typical of what SpecConstr builds:

   fs a = ....f (C a)....
   f  x = ....f (C a)....
   {-# RULE f (C a) = fs a #-}

So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).

But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
	- the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
	- fs is inlined (say it's small)
	- now there's another opportunity to apply the RULE

This showed up when compiling Control.Concurrent.Chan.getChanContents.
278 279 280


\begin{code}
281
occAnalBind env (Rec pairs) body_usage
282 283 284 285
  | not (any (`usedIn` body_usage) bndrs)	-- NB: look at body_usage, not total_usage
  = (body_usage, [])				-- Dead code
  | otherwise
  = (final_usage, map ({-# SCC "occAnalBind.dofinal" #-} do_final_bind) sccs)
286
  where
287 288
    bndrs    = map fst pairs
    bndr_set = mkVarSet bndrs
289

290 291 292 293 294 295 296 297 298
	---------------------------------------
	-- See Note [Loop breaking]
	---------------------------------------

    -------------Dependency analysis ------------------------------
    occ_anald :: [(Id, (UsageDetails, CoreExpr))]
	-- The UsageDetails here are strictly those arising from the RHS
	-- *not* from any rules in the Id
    occ_anald = [(bndr, occAnalRhs env bndr rhs) | (bndr,rhs) <- pairs]
299

300 301 302 303 304
    total_usage        = foldl add_usage body_usage occ_anald
    add_usage body_usage (bndr, (rhs_usage, _))
	= body_usage +++ addRuleUsage rhs_usage bndr

    (final_usage, tagged_bndrs) = tagBinders total_usage bndrs
305
    final_bndrs | isEmptyVarSet all_rule_fvs = tagged_bndrs
306
		| otherwise = map tag_rule_var tagged_bndrs
307
		
308 309
    tag_rule_var bndr | bndr `elemVarSet` all_rule_fvs = makeLoopBreaker True bndr
	    	      | otherwise		       = bndr
310 311 312
    all_rule_fvs = bndr_set `intersectVarSet` foldr (unionVarSet . idRuleVars) emptyVarSet bndrs
	-- Mark the binder with OccInfo saying "no preInlineUnconditionally" if
	-- it is used in any rule (lhs or rhs) of the recursive group
313 314

    ---- stuff for dependency analysis of binds -------------------------------
315 316 317 318 319 320 321 322 323 324 325
    sccs :: [SCC (Node Details)]
    sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompR rec_edges

    rec_edges :: [Node Details]	-- The binders are tagged with correct occ-info
    rec_edges = {-# SCC "occAnalBind.assoc" #-} zipWith make_node final_bndrs occ_anald
    make_node tagged_bndr (_bndr, (rhs_usage, rhs))
	= ((tagged_bndr, rhs, rhs_fvs), idUnique tagged_bndr, out_edges)
	where
	  rhs_fvs = intersectUFM_C (\b _ -> b) bndr_set rhs_usage
	  out_edges = keysUFM (rhs_fvs `unionVarSet` idRuleVars tagged_bndr)
	
sof's avatar
sof committed
326 327 328 329 330 331 332

	-- (a -> b) means a mentions b
	-- Given the usage details (a UFM that gives occ info for each free var of
	-- the RHS) we can get the list of free vars -- or rather their Int keys --
	-- by just extracting the keys from the finite map.  Grimy, but fast.
	-- Previously we had this:
	-- 	[ bndr | bndr <- bndrs,
333
	--		 maybeToBool (lookupVarEnv rhs_usage bndr)]
sof's avatar
sof committed
334 335
	-- which has n**2 cost, and this meant that edges_from alone 
	-- consumed 10% of total runtime!
336

337
    ---- Stuff to "re-constitute" bindings from dependency-analysis info ------
338 339 340 341
    do_final_bind (AcyclicSCC ((bndr, rhs, _), _, _)) = NonRec bndr rhs
    do_final_bind (CyclicSCC cycle)
	| no_rules  = Rec (reOrderCycle cycle)
	| otherwise = Rec (concatMap reOrderRec (stronglyConnCompR loop_breaker_edges))
342
	where	-- See Note [Choosing loop breakers] for looop_breker_edges
343
	  loop_breaker_edges = map mk_node cycle
twanvl's avatar
twanvl committed
344
	  mk_node (details@(_bndr, _rhs, rhs_fvs), k, _) = (details, k, new_ks)
345 346
		where
		  new_ks = keysUFM (extendFvs rule_fv_env rhs_fvs rhs_fvs)
347

348 349 350 351 352 353 354 355 356
	
    ------------------------------------
    rule_fv_env :: IdEnv IdSet	-- Variables from this group mentioned in RHS of rules
				-- Domain is *subset* of bound vars (others have no rule fvs)
    rule_fv_env = rule_loop init_rule_fvs

    no_rules      = null init_rule_fvs
    init_rule_fvs = [(b, rule_fvs)
		    | b <- bndrs 
357
		    , let rule_fvs = idRuleRhsVars b `intersectVarSet` bndr_set
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
		    , not (isEmptyVarSet rule_fvs)]

    rule_loop :: [(Id,IdSet)] -> IdEnv IdSet	-- Finds fixpoint
    rule_loop fv_list 
	| no_change = env
	| otherwise = rule_loop new_fv_list
	where
	  env = mkVarEnv init_rule_fvs
	  (no_change, new_fv_list) = mapAccumL bump True fv_list
	  bump no_change (b,fvs) 
		| new_fvs `subVarSet` fvs = (no_change, (b,fvs))
		| otherwise		  = (False,     (b,new_fvs `unionVarSet` fvs))
		where
		  new_fvs = extendFvs env emptyVarSet fvs

373 374 375 376 377
idRuleRhsVars :: Id -> VarSet
-- Just the variables free on the *rhs* of a rule
-- See Note [Choosing loop breakers]
idRuleRhsVars id = foldr (unionVarSet . ruleRhsFreeVars) emptyVarSet (idCoreRules id)

378 379 380 381 382 383 384 385 386
extendFvs :: IdEnv IdSet -> IdSet -> IdSet -> IdSet
-- (extendFVs env fvs s) returns (fvs `union` env(s))
extendFvs env fvs id_set
  = foldUFM_Directly add fvs id_set
  where
    add uniq _ fvs 
	= case lookupVarEnv_Directly env uniq  of
	    Just fvs' -> fvs' `unionVarSet` fvs
	    Nothing   -> fvs
sof's avatar
sof committed
387 388 389 390 391 392
\end{code}

@reOrderRec@ is applied to the list of (binder,rhs) pairs for a cyclic
strongly connected component (there's guaranteed to be a cycle).  It returns the
same pairs, but 
	a) in a better order,
393
	b) with some of the Ids having a IAmALoopBreaker pragma
sof's avatar
sof committed
394

395
The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
sof's avatar
sof committed
396 397 398 399 400 401 402 403
that the simplifier can guarantee not to loop provided it never records an inlining
for these no-inline guys.

Furthermore, the order of the binds is such that if we neglect dependencies
on the no-inline Ids then the binds are topologically sorted.  This means
that the simplifier will generally do a good job if it works from top bottom,
recording inlinings for any Ids which aren't marked as "no-inline" as it goes.

404 405 406 407
==============
[June 98: I don't understand the following paragraphs, and I've 
	  changed the a=b case again so that it isn't a special case any more.]

sof's avatar
sof committed
408
Here's a case that bit me:
409

sof's avatar
sof committed
410 411 412 413 414 415 416
	letrec
		a = b
		b = \x. BIG
	in
	...a...a...a....

Re-ordering doesn't change the order of bindings, but there was no loop-breaker.
sof's avatar
sof committed
417

sof's avatar
sof committed
418 419
My solution was to make a=b bindings record b as Many, rather like INLINE bindings.
Perhaps something cleverer would suffice.
420
===============
sof's avatar
sof committed
421

sof's avatar
sof committed
422

sof's avatar
sof committed
423
\begin{code}
424 425
type Node details = (details, Unique, [Unique])	-- The Ints are gotten from the Unique,
						-- which is gotten from the Id.
426 427 428
type Details = (Id, 		-- Binder
		CoreExpr,	-- RHS
		IdSet)		-- RHS free vars (*not* include rules)
429

430
reOrderRec :: SCC (Node Details)
431
	   -> [(Id,CoreExpr)]
432 433
-- Sorted into a plausible order.  Enough of the Ids have
--	IAmALoopBreaker pragmas that there are no loops left.
434 435
reOrderRec (AcyclicSCC ((bndr, rhs, _), _, _)) = [(bndr, rhs)]
reOrderRec (CyclicSCC cycle)  		       = reOrderCycle cycle
436

437 438
reOrderCycle :: [Node Details] -> [(Id,CoreExpr)]
reOrderCycle []
439
  = panic "reOrderCycle"
440 441
reOrderCycle [bind]	-- Common case of simple self-recursion
  = [(makeLoopBreaker False bndr, rhs)]
sof's avatar
sof committed
442
  where
443
    ((bndr, rhs, _), _, _) = bind
sof's avatar
sof committed
444

445
reOrderCycle (bind : binds)
sof's avatar
sof committed
446 447
  = 	-- Choose a loop breaker, mark it no-inline,
	-- do SCC analysis on the rest, and recursively sort them out
448 449
    concatMap reOrderRec (stronglyConnCompR unchosen) ++
    [(makeLoopBreaker False bndr, rhs)]
sof's avatar
sof committed
450 451

  where
452
    (chosen_bind, unchosen) = choose_loop_breaker bind (score bind) [] binds
453
    (bndr, rhs, _)  = chosen_bind
454 455 456

	-- This loop looks for the bind with the lowest score
	-- to pick as the loop  breaker.  The rest accumulate in 
twanvl's avatar
twanvl committed
457
    choose_loop_breaker (details,_,_) _loop_sc acc []
458 459 460 461 462 463 464 465
	= (details, acc)	-- Done

    choose_loop_breaker loop_bind loop_sc acc (bind : binds)
	| sc < loop_sc	-- Lower score so pick this new one
	= choose_loop_breaker bind sc (loop_bind : acc) binds

	| otherwise	-- No lower so don't pick it
	= choose_loop_breaker loop_bind loop_sc (bind : acc) binds
sof's avatar
sof committed
466
	where
467 468
	  sc = score bind
	  
469
    score :: Node Details -> Int	-- Higher score => less likely to be picked as loop breaker
470
    score ((bndr, rhs, _), _, _)
471 472 473
        | workerExists (idWorkerInfo bndr)      = 10
                -- Note [Worker inline loop]

474 475 476 477 478 479 480
	| exprIsTrivial rhs 	   = 4	-- Practically certain to be inlined
		-- Used to have also: && not (isExportedId bndr)
		-- But I found this sometimes cost an extra iteration when we have
		--	rec { d = (a,b); a = ...df...; b = ...df...; df = d }
		-- where df is the exported dictionary. Then df makes a really
		-- bad choice for loop breaker
	  
481 482
	| is_con_app rhs = 2	-- Data types help with cases
                -- Note [conapp]
483

484 485
	| inlineCandidate bndr rhs = 1	-- Likely to be inlined
		-- Note [Inline candidates]
486

487 488
	| otherwise = 0

489
    inlineCandidate :: Id -> CoreExpr -> Bool
twanvl's avatar
twanvl committed
490 491
    inlineCandidate _  (Note InlineMe _) = True
    inlineCandidate id _                 = isOneOcc (idOccInfo id)
492

493 494 495 496 497
        -- Note [conapp]
        --
        -- It's really really important to inline dictionaries.  Real
        -- example (the Enum Ordering instance from GHC.Base):
        --
sof's avatar
sof committed
498 499 500 501 502 503 504
	--	rec	f = \ x -> case d of (p,q,r) -> p x
	--		g = \ x -> case d of (p,q,r) -> q x
	--		d = (v, f, g)
	--
	-- Here, f and g occur just once; but we can't inline them into d.
	-- On the other hand we *could* simplify those case expressions if
	-- we didn't stupidly choose d as the loop breaker.
505
	-- But we won't because constructor args are marked "Many".
506 507
        -- Inlining dictionaries is really essential to unravelling
        -- the loops in static numeric dictionaries, see GHC.Float.
sof's avatar
sof committed
508

509
	-- Cheap and cheerful; the simplifer moves casts out of the way
510 511 512 513
	-- The lambda case is important to spot x = /\a. C (f a)
	-- which comes up when C is a dictionary constructor and
	-- f is a default method.  
	-- Example: the instance for Show (ST s a) in GHC.ST
514 515 516
	--
	-- However we *also* treat (\x. C p q) as a con-app-like thing, 
	-- 	Note [Closure conversion]
517 518
    is_con_app (Var v)    = isDataConWorkId v
    is_con_app (App f _)  = is_con_app f
twanvl's avatar
twanvl committed
519
    is_con_app (Lam _ e)  = is_con_app e
520
    is_con_app (Note _ e) = is_con_app e
twanvl's avatar
twanvl committed
521
    is_con_app _          = False
522

523 524 525 526
makeLoopBreaker :: Bool -> Id -> Id
-- Set the loop-breaker flag
-- See Note [Weak loop breakers]
makeLoopBreaker weak bndr = setIdOccInfo bndr (IAmALoopBreaker weak)
527 528
\end{code}

529
Note [Worker inline loop]
530
~~~~~~~~~~~~~~~~~~~~~~~~
531 532 533
Never choose a wrapper as the loop breaker!  Because
wrappers get auto-generated inlinings when importing, and
that can lead to an infinite inlining loop.  For example:
534 535 536 537 538
  rec {
	$wfoo x = ....foo x....
	
	{-loop brk-} foo x = ...$wfoo x...
  }
539 540 541 542 543 544 545 546

The interface file sees the unfolding for $wfoo, and sees that foo is
strict (and hence it gets an auto-generated wrapper).  Result: an
infinite inlining in the importing scope.  So be a bit careful if you
change this.  A good example is Tree.repTree in
nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
breaker then compiling Game.hs goes into an infinite loop (this
happened when we gave is_con_app a lower score than inline candidates).
547

548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
Note [Closure conversion]
~~~~~~~~~~~~~~~~~~~~~~~~~
We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
The immediate motivation came from the result of a closure-conversion transformation
which generated code like this:

    data Clo a b = forall c. Clo (c -> a -> b) c

    ($:) :: Clo a b -> a -> b
    Clo f env $: x = f env x

    rec { plus = Clo plus1 ()

        ; plus1 _ n = Clo plus2 n

	; plus2 Zero     n = n
	; plus2 (Succ m) n = Succ (plus $: m $: n) }

If we inline 'plus' and 'plus1', everything unravels nicely.  But if
we choose 'plus1' as the loop breaker (which is entirely possible
otherwise), the loop does not unravel nicely.


571 572 573 574 575 576 577
@occAnalRhs@ deals with the question of bindings where the Id is marked
by an INLINE pragma.  For these we record that anything which occurs
in its RHS occurs many times.  This pessimistically assumes that ths
inlined binder also occurs many times in its scope, but if it doesn't
we'll catch it next time round.  At worst this costs an extra simplifier pass.
ToDo: try using the occurrence info for the inline'd binder.

sof's avatar
sof committed
578
[March 97] We do the same for atomic RHSs.  Reason: see notes with reOrderRec.
579
[June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with reOrderRec.
sof's avatar
sof committed
580

581

582 583
\begin{code}
occAnalRhs :: OccEnv
sof's avatar
sof committed
584
	   -> Id -> CoreExpr	-- Binder and rhs
585 586
				-- For non-recs the binder is alrady tagged
				-- with occurrence info
587
	   -> (UsageDetails, CoreExpr)
588 589

occAnalRhs env id rhs
590
  = occAnal ctxt rhs
591
  where
592
    ctxt | certainly_inline id = env
593
	 | otherwise	       = rhsCtxt
594 595
	-- Note that we generally use an rhsCtxt.  This tells the occ anal n
	-- that it's looking at an RHS, which has an effect in occAnalApp
596 597 598 599 600
	--
	-- But there's a problem.  Consider
	--	x1 = a0 : []
	--	x2 = a1 : x1
	--	x3 = a2 : x2
601
	--	g  = f x3
602 603 604 605 606
	-- First time round, it looks as if x1 and x2 occur as an arg of a 
	-- let-bound constructor ==> give them a many-occurrence.
	-- But then x3 is inlined (unconditionally as it happens) and
	-- next time round, x2 will be, and the next time round x1 will be
	-- Result: multiple simplifier iterations.  Sigh.  
607 608 609
	-- Crude solution: use rhsCtxt for things that occur just once...

    certainly_inline id = case idOccInfo id of
610
			    OneOcc in_lam one_br _ -> not in_lam && one_br
twanvl's avatar
twanvl committed
611
			    _                      -> False
612 613 614 615 616
\end{code}



\begin{code}
617 618 619 620 621
addRuleUsage :: UsageDetails -> Id -> UsageDetails
-- Add the usage from RULES in Id to the usage
addRuleUsage usage id
  = foldVarSet add usage (idRuleVars id)
  where
622
    add v u = addOneOcc u v NoOccInfo		-- Give a non-committal binder info
623 624
						-- (i.e manyOcc) because many copies
						-- of the specialised thing can appear
625 626 627 628 629 630
\end{code}

Expressions
~~~~~~~~~~~
\begin{code}
occAnal :: OccEnv
631
	-> CoreExpr
632
	-> (UsageDetails,	-- Gives info only about the "interesting" Ids
633
	    CoreExpr)
634

twanvl's avatar
twanvl committed
635
occAnal _   (Type t)  = (emptyDetails, Type t)
636
occAnal env (Var v)   = (mkOneOcc env v False, Var v)
637 638
    -- At one stage, I gathered the idRuleVars for v here too,
    -- which in a way is the right thing to do.
639
    -- Btu that went wrong right after specialisation, when
640 641 642
    -- the *occurrences* of the overloaded function didn't have any
    -- rules in them, so the *specialised* versions looked as if they
    -- weren't used at all.
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
\end{code}

We regard variables that occur as constructor arguments as "dangerousToDup":

\begin{verbatim}
module A where
f x = let y = expensive x in 
      let z = (True,y) in 
      (case z of {(p,q)->q}, case z of {(p,q)->q})
\end{verbatim}

We feel free to duplicate the WHNF (True,y), but that means
that y may be duplicated thereby.

If we aren't careful we duplicate the (expensive x) call!
Constructors are rather like lambdas in this way.

\begin{code}
twanvl's avatar
twanvl committed
661
occAnal _   expr@(Lit _) = (emptyDetails, expr)
662
\end{code}
663

664
\begin{code}
665 666 667 668 669
occAnal env (Note InlineMe body)
  = case occAnal env body of { (usage, body') -> 
    (mapVarEnv markMany usage, Note InlineMe body')
    }

twanvl's avatar
twanvl committed
670
occAnal env (Note note@(SCC _) body)
671 672 673
  = case occAnal env body of { (usage, body') ->
    (mapVarEnv markInsideSCC usage, Note note body')
    }
674

675
occAnal env (Note note body)
676 677 678
  = case occAnal env body of { (usage, body') ->
    (usage, Note note body')
    }
679 680 681

occAnal env (Cast expr co)
  = case occAnal env expr of { (usage, expr') ->
682 683 684 685
    (markRhsUds env True usage, Cast expr' co)
	-- If we see let x = y `cast` co
	-- then mark y as 'Many' so that we don't
	-- immediately inline y again. 
686
    }
687
\end{code}
688

689
\begin{code}
twanvl's avatar
twanvl committed
690 691
occAnal env app@(App _ _)
  = occAnalApp env (collectArgs app)
692

693 694 695 696
-- Ignore type variables altogether
--   (a) occurrences inside type lambdas only not marked as InsideLam
--   (b) type variables not in environment

twanvl's avatar
twanvl committed
697
occAnal env (Lam x body) | isTyVar x
698 699 700
  = case occAnal env body of { (body_usage, body') ->
    (body_usage, Lam x body')
    }
701

702 703 704 705 706
-- For value lambdas we do a special hack.  Consider
-- 	(\x. \y. ...x...)
-- If we did nothing, x is used inside the \y, so would be marked
-- as dangerous to dup.  But in the common case where the abstraction
-- is applied to two arguments this is over-pessimistic.
707 708 709 710 711
-- So instead, we just mark each binder with its occurrence
-- info in the *body* of the multiple lambda.
-- Then, the simplifier is careful when partially applying lambdas.

occAnal env expr@(Lam _ _)
712
  = case occAnal env_body body of { (body_usage, body') ->
713 714
    let
        (final_usage, tagged_binders) = tagBinders body_usage binders
715 716 717 718
	--	URGH!  Sept 99: we don't seem to be able to use binders' here, because
	--	we get linear-typed things in the resulting program that we can't handle yet.
	--	(e.g. PrelShow)  TODO 

719 720 721 722
	really_final_usage = if linear then
				final_usage
			     else
				mapVarEnv markInsideLam final_usage
723
    in
724
    (really_final_usage,
725
     mkLams tagged_binders body') }
726
  where
727 728 729 730 731
    env_body	    = vanillaCtxt			-- Body is (no longer) an RhsContext
    (binders, body) = collectBinders expr
    binders' 	    = oneShotGroup env binders
    linear	    = all is_one_shot binders'
    is_one_shot b   = isId b && isOneShotBndr b
732

733
occAnal env (Case scrut bndr ty alts)
734 735
  = case occ_anal_scrut scrut alts		    of { (scrut_usage, scrut') ->
    case mapAndUnzip (occAnalAlt alt_env bndr) alts of { (alts_usage_s, alts')   -> 
736 737
    let
	alts_usage  = foldr1 combineAltsUsageDetails alts_usage_s
738 739
	alts_usage' = addCaseBndrUsage alts_usage
	(alts_usage1, tagged_bndr) = tagBinder alts_usage' bndr
740
        total_usage = scrut_usage +++ alts_usage1
741
    in
742
    total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
743
  where
744 745 746 747 748 749 750 751 752 753 754
	-- The case binder gets a usage of either "many" or "dead", never "one".
	-- Reason: we like to inline single occurrences, to eliminate a binding,
	-- but inlining a case binder *doesn't* eliminate a binding.
	-- We *don't* want to transform
	--	case x of w { (p,q) -> f w }
	-- into
	--	case x of w { (p,q) -> f (p,q) }
    addCaseBndrUsage usage = case lookupVarEnv usage bndr of
				Nothing  -> usage
				Just occ -> extendVarEnv usage bndr (markMany occ)

755 756 757 758
    alt_env = setVanillaCtxt env
	-- Consider 	x = case v of { True -> (p,q); ... }
	-- Then it's fine to inline p and q

759 760 761
    occ_anal_scrut (Var v) (alt1 : other_alts)
				| not (null other_alts) || not (isDefaultAlt alt1)
			        = (mkOneOcc env v True, Var v)
twanvl's avatar
twanvl committed
762
    occ_anal_scrut scrut _alts  = occAnal vanillaCtxt scrut
763 764
					-- No need for rhsCtxt

765
occAnal env (Let bind body)
766
  = case occAnal env body     	         of { (body_usage, body') ->
767
    case occAnalBind env bind body_usage of { (final_usage, new_binds) ->
768
       (final_usage, mkLets new_binds body') }}
769

twanvl's avatar
twanvl committed
770 771
occAnalArgs :: OccEnv -> [CoreExpr] -> (UsageDetails, [CoreExpr])
occAnalArgs _env args
772
  = case mapAndUnzip (occAnal arg_env) args of	{ (arg_uds_s, args') ->
773
    (foldr (+++) emptyDetails arg_uds_s, args')}
774
  where
775
    arg_env = vanillaCtxt
776 777
\end{code}

778 779 780 781
Applications are dealt with specially because we want
the "build hack" to work.

\begin{code}
twanvl's avatar
twanvl committed
782 783 784 785
occAnalApp :: OccEnv
           -> (Expr CoreBndr, [Arg CoreBndr])
           -> (UsageDetails, Expr CoreBndr)
occAnalApp env (Var fun, args)
786 787
  = case args_stuff of { (args_uds, args') ->
    let
788
        final_args_uds = markRhsUds env is_pap args_uds
789
    in
790
    (fun_uds +++ final_args_uds, mkApps (Var fun) args') }
791 792
  where
    fun_uniq = idUnique fun
793
    fun_uds  = mkOneOcc env fun (valArgCount args > 0)
794
    is_pap = isDataConWorkId fun || valArgCount args < idArity fun
795 796

		-- Hack for build, fold, runST
797 798 799
    args_stuff	| fun_uniq == buildIdKey    = appSpecial env 2 [True,True]  args
		| fun_uniq == augmentIdKey  = appSpecial env 2 [True,True]  args
		| fun_uniq == foldrIdKey    = appSpecial env 3 [False,True] args
800 801 802 803 804 805 806 807 808 809
		| fun_uniq == runSTRepIdKey = appSpecial env 2 [True]	    args
			-- (foldr k z xs) may call k many times, but it never
			-- shares a partial application of k; hence [False,True]
			-- This means we can optimise
			--	foldr (\x -> let v = ...x... in \y -> ...v...) z xs
			-- by floating in the v

		| otherwise = occAnalArgs env args


twanvl's avatar
twanvl committed
810
occAnalApp env (fun, args)
811 812 813 814 815 816 817 818 819
  = case occAnal (addAppCtxt env args) fun of	{ (fun_uds, fun') ->
	-- The addAppCtxt is a bit cunning.  One iteration of the simplifier
	-- often leaves behind beta redexs like
	--	(\x y -> e) a1 a2
	-- Here we would like to mark x,y as one-shot, and treat the whole
	-- thing much like a let.  We do this by pushing some True items
	-- onto the context stack.

    case occAnalArgs env args of	{ (args_uds, args') ->
820
    let
821
	final_uds = fun_uds +++ args_uds
822 823 824
    in
    (final_uds, mkApps fun' args') }}
    
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841

markRhsUds :: OccEnv		-- Check if this is a RhsEnv
	   -> Bool		-- and this is true
	   -> UsageDetails	-- The do markMany on this
	   -> UsageDetails
-- We mark the free vars of the argument of a constructor or PAP 
-- as "many", if it is the RHS of a let(rec).
-- This means that nothing gets inlined into a constructor argument
-- position, which is what we want.  Typically those constructor
-- arguments are just variables, or trivial expressions.
--
-- This is the *whole point* of the isRhsEnv predicate
markRhsUds env is_pap arg_uds
  | isRhsEnv env && is_pap = mapVarEnv markMany arg_uds
  | otherwise 		   = arg_uds


842 843 844 845
appSpecial :: OccEnv 
	   -> Int -> CtxtTy	-- Argument number, and context to use for it
	   -> [CoreExpr]
	   -> (UsageDetails, [CoreExpr])
846 847 848
appSpecial env n ctxt args
  = go n args
  where
849
    arg_env = vanillaCtxt
850

twanvl's avatar
twanvl committed
851
    go _ [] = (emptyDetails, [])	-- Too few args
852 853

    go 1 (arg:args)			-- The magic arg
854 855
      = case occAnal (setCtxt arg_env ctxt) arg of	{ (arg_uds, arg') ->
	case occAnalArgs env args of			{ (args_uds, args') ->
856
	(arg_uds +++ args_uds, arg':args') }}
857 858
    
    go n (arg:args)
859
      = case occAnal arg_env arg of	{ (arg_uds, arg') ->
860
	case go (n-1) args of		{ (args_uds, args') ->
861
	(arg_uds +++ args_uds, arg':args') }}
862 863 864
\end{code}

    
865 866
Case alternatives
~~~~~~~~~~~~~~~~~
867 868 869 870 871 872 873 874
If the case binder occurs at all, the other binders effectively do too.  
For example
	case e of x { (a,b) -> rhs }
is rather like
	let x = (a,b) in rhs
If e turns out to be (e1,e2) we indeed get something like
	let a = e1; b = e2; x = (a,b) in rhs

875 876 877 878
Note [Aug 06]: I don't think this is necessary any more, and it helpe
	       to know when binders are unused.  See esp the call to
	       isDeadBinder in Simplify.mkDupableAlt

879
\begin{code}
twanvl's avatar
twanvl committed
880 881 882 883 884
occAnalAlt :: OccEnv
           -> CoreBndr
           -> CoreAlt
           -> (UsageDetails, Alt IdWithOccInfo)
occAnalAlt env _case_bndr (con, bndrs, rhs)
885
  = case occAnal env rhs of { (rhs_usage, rhs') ->
886 887
    let
        (final_usage, tagged_bndrs) = tagBinders rhs_usage bndrs
888 889
	final_bndrs = tagged_bndrs	-- See Note [Aug06] above
{-
890 891 892 893
	final_bndrs | case_bndr `elemVarEnv` final_usage = bndrs
		    | otherwise				= tagged_bndrs
		-- Leave the binders untagged if the case 
		-- binder occurs at all; see note above
894
-}
895
    in
896
    (final_usage, (con, final_bndrs, rhs')) }
897
\end{code}
898 899 900 901


%************************************************************************
%*									*
902
\subsection[OccurAnal-types]{OccEnv}
903 904 905 906
%*									*
%************************************************************************

\begin{code}
907
data OccEnv
908
  = OccEnv OccEncl	-- Enclosing context information
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
	   CtxtTy	-- Tells about linearity

-- OccEncl is used to control whether to inline into constructor arguments
-- For example:
--	x = (p,q)		-- Don't inline p or q
--	y = /\a -> (p a, q a)	-- Still don't inline p or q
--	z = f (p,q)		-- Do inline p,q; it may make a rule fire
-- So OccEncl tells enought about the context to know what to do when
-- we encounter a contructor application or PAP.

data OccEncl
  = OccRhs 		-- RHS of let(rec), albeit perhaps inside a type lambda
			-- Don't inline into constructor args here
  | OccVanilla		-- Argument of function, body of lambda, scruintee of case etc.
			-- Do inline into constructor args here
924

925 926 927 928 929 930 931 932 933
type CtxtTy = [Bool]
	-- []	 	No info
	--
	-- True:ctxt  	Analysing a function-valued expression that will be
	--			applied just once
	--
	-- False:ctxt	Analysing a function-valued expression that may
	--			be applied many times; but when it is, 
	--			the CtxtTy inside applies
934

935 936
initOccEnv :: OccEnv
initOccEnv = OccEnv OccRhs []
937

twanvl's avatar
twanvl committed
938
vanillaCtxt :: OccEnv
939
vanillaCtxt = OccEnv OccVanilla []
twanvl's avatar
twanvl committed
940 941

rhsCtxt :: OccEnv
942
rhsCtxt     = OccEnv OccRhs     []
943

twanvl's avatar
twanvl committed
944
isRhsEnv :: OccEnv -> Bool
945 946
isRhsEnv (OccEnv OccRhs     _) = True
isRhsEnv (OccEnv OccVanilla _) = False
947

948 949 950 951
setVanillaCtxt :: OccEnv -> OccEnv
setVanillaCtxt (OccEnv OccRhs ctxt_ty) = OccEnv OccVanilla ctxt_ty
setVanillaCtxt other_env	       = other_env

952
setCtxt :: OccEnv -> CtxtTy -> OccEnv
953
setCtxt (OccEnv encl _) ctxt = OccEnv encl ctxt
954

955
oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr]
956 957 958 959 960
	-- The result binders have one-shot-ness set that they might not have had originally.
	-- This happens in (build (\cn -> e)).  Here the occurrence analyser
	-- linearity context knows that c,n are one-shot, and it records that fact in
	-- the binder. This is useful to guide subsequent float-in/float-out tranformations

twanvl's avatar
twanvl committed
961
oneShotGroup (OccEnv _encl ctxt) bndrs
962
  = go ctxt bndrs []
963
  where
twanvl's avatar
twanvl committed
964
    go _ [] rev_bndrs = reverse rev_bndrs
965 966 967 968 969 970 971 972 973

    go (lin_ctxt:ctxt) (bndr:bndrs) rev_bndrs
	| isId bndr = go ctxt bndrs (bndr':rev_bndrs)
	where
	  bndr' | lin_ctxt  = setOneShotLambda bndr
		| otherwise = bndr

    go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs)

twanvl's avatar
twanvl committed
974
addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
975 976
addAppCtxt (OccEnv encl ctxt) args 
  = OccEnv encl (replicate (valArgCount args) True ++ ctxt)
977 978 979 980 981 982 983
\end{code}

%************************************************************************
%*									*
\subsection[OccurAnal-types]{OccEnv}
%*									*
%************************************************************************
984

985
\begin{code}
986
type UsageDetails = IdEnv OccInfo	-- A finite map from ids to their usage
987

988
(+++), combineAltsUsageDetails
989 990
	:: UsageDetails -> UsageDetails -> UsageDetails

991
(+++) usage1 usage2
992
  = plusVarEnv_C addOccInfo usage1 usage2
993 994

combineAltsUsageDetails usage1 usage2
995
  = plusVarEnv_C orOccInfo usage1 usage2
996

997
addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
998
addOneOcc usage id info
999
  = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1000 1001
	-- ToDo: make this more efficient

twanvl's avatar
twanvl committed
1002
emptyDetails :: UsageDetails
1003
emptyDetails = (emptyVarEnv :: UsageDetails)
1004

1005
usedIn :: Id -> UsageDetails -> Bool
1006
v `usedIn` details =  isExportedId v || v `elemVarEnv` details
1007

1008 1009
type IdWithOccInfo = Id

1010 1011 1012
tagBinders :: UsageDetails	    -- Of scope
	   -> [Id]		    -- Binders
	   -> (UsageDetails, 	    -- Details with binders removed
1013 1014 1015 1016 1017
	      [IdWithOccInfo])    -- Tagged binders

tagBinders usage binders
 = let
     usage' = usage `delVarEnvList` binders
1018
     uss    = map (setBinderOcc usage) binders
1019 1020 1021
   in
   usage' `seq` (usage', uss)

1022 1023 1024
tagBinder :: UsageDetails	    -- Of scope
	  -> Id			    -- Binders
	  -> (UsageDetails, 	    -- Details with binders removed
1025 1026 1027 1028 1029
	      IdWithOccInfo)	    -- Tagged binders

tagBinder usage binder
 = let
     usage'  = usage `delVarEnv` binder
1030
     binder' = setBinderOcc usage binder
1031 1032 1033
   in
   usage' `seq` (usage', binder')

1034 1035 1036
setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
setBinderOcc usage bndr
  | isTyVar bndr      = bndr
1037 1038
  | isExportedId bndr = case idOccInfo bndr of
			  NoOccInfo -> bndr
twanvl's avatar
twanvl committed
1039
			  _         -> setIdOccInfo bndr NoOccInfo
1040 1041 1042
  	    -- Don't use local usage info for visible-elsewhere things
	    -- BUT *do* erase any IAmALoopBreaker annotation, because we're
	    -- about to re-generate it and it shouldn't be "sticky"
1043
			  
1044
  | otherwise = setIdOccInfo bndr occ_info
1045
  where
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
    occ_info = lookupVarEnv usage bndr `orElse` IAmDead
\end{code}


%************************************************************************
%*									*
\subsection{Operations over OccInfo}
%*									*
%************************************************************************

\begin{code}
1057
mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
twanvl's avatar
twanvl committed
1058
mkOneOcc _env id int_cxt
1059 1060
  | isLocalId id = unitVarEnv id (OneOcc False True int_cxt)
  | otherwise    = emptyDetails
1061 1062 1063 1064

markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo

markMany IAmDead = IAmDead
twanvl's avatar
twanvl committed
1065
markMany _       = NoOccInfo
1066 1067 1068

markInsideSCC occ = markMany occ

1069 1070
markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
markInsideLam occ		 	= occ
1071 1072 1073

addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo

1074 1075
addOccInfo IAmDead info2       = info2
addOccInfo info1 IAmDead       = info1
twanvl's avatar
twanvl committed
1076
addOccInfo _     _             = NoOccInfo
1077 1078 1079 1080 1081 1082

-- (orOccInfo orig new) is used
-- when combining occurrence info from branches of a case

orOccInfo IAmDead info2 = info2
orOccInfo info1 IAmDead = info1
twanvl's avatar
twanvl committed
1083 1084
orOccInfo (OneOcc in_lam1 _ int_cxt1)
	  (OneOcc in_lam2 _ int_cxt2)
1085 1086
  = OneOcc (in_lam1 || in_lam2)
	   False	-- False, because it occurs in both branches
1087
	   (int_cxt1 && int_cxt2)
twanvl's avatar
twanvl committed
1088
orOccInfo _     _       = NoOccInfo
1089
\end{code}