SpecConstr.hs 84.4 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1
{-
2
ToDo [Oct 2013]
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
3
~~~~~~~~~~~~~~~
4
1. Nuke ForceSpecConstr for good (it is subsumed by GHC.Types.SPEC in ghc-prim)
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
5 6
2. Nuke NoSpecConstr

Austin Seipp's avatar
Austin Seipp committed
7 8 9

(c) The GRASP/AQUA Project, Glasgow University, 1992-1998

10
\section[SpecConstr]{Specialise over constructors}
Austin Seipp's avatar
Austin Seipp committed
11
-}
12

13 14
{-# LANGUAGE CPP #-}

15
module SpecConstr(
16
        specConstrProgram
17 18 19
#ifdef GHCI
        , SpecConstrAnnotation(..)
#endif
20 21 22 23 24
    ) where

#include "HsVersions.h"

import CoreSyn
25 26
import CoreSubst
import CoreUtils
27
import CoreUnfold       ( couldBeSmallEnoughToInline )
28
import CoreFVs          ( exprsFreeVarsList )
29
import CoreMonad
30
import Literal          ( litIsLifted )
31
import HscTypes         ( ModGuts(..) )
32
import WwLib            ( mkWorkerArgs )
33
import DataCon
34
import Coercion         hiding( substCo )
35
import Rules
36
import Type             hiding ( substTy )
37
import TyCon            ( isRecursiveTyCon, tyConName )
38
import Id
39
import PprCore          ( pprParendExpr )
40
import MkCore           ( mkImpossibleExpr )
41
import Var
42 43
import VarEnv
import VarSet
44
import Name
45
import BasicTypes
46 47 48
import DynFlags         ( DynFlags(..) )
import StaticFlags      ( opt_PprStyle_Debug )
import Maybes           ( orElse, catMaybes, isJust, isNothing )
49
import Demand
50
import GHC.Serialized   ( deserializeWithData )
51
import Util
52
import Pair
53 54
import UniqSupply
import Outputable
55
import FastString
56
import UniqFM
Ian Lynagh's avatar
Ian Lynagh committed
57
import MonadUtils
58
import Control.Monad    ( zipWithM )
Ian Lynagh's avatar
Ian Lynagh committed
59
import Data.List
60
import PrelNames        ( specTyConName )
61
import Module
62

63
-- See Note [Forcing specialisation]
64 65 66
#ifndef GHCI
type SpecConstrAnnotation = ()
#else
Austin Seipp's avatar
Austin Seipp committed
67
import TyCon ( TyCon )
68 69
import GHC.Exts( SpecConstrAnnotation(..) )
#endif
70

Austin Seipp's avatar
Austin Seipp committed
71
{-
72
-----------------------------------------------------
73
                        Game plan
74 75 76
-----------------------------------------------------

Consider
77 78 79
        drop n []     = []
        drop 0 xs     = []
        drop n (x:xs) = drop (n-1) xs
80 81 82 83

After the first time round, we could pass n unboxed.  This happens in
numerical code too.  Here's what it looks like in Core:

84 85 86 87 88 89
        drop n xs = case xs of
                      []     -> []
                      (y:ys) -> case n of
                                  I# n# -> case n# of
                                             0 -> []
                                             _ -> drop (I# (n# -# 1#)) xs
90 91 92 93

Notice that the recursive call has an explicit constructor as argument.
Noticing this, we can make a specialised version of drop

94 95 96
        RULE: drop (I# n#) xs ==> drop' n# xs

        drop' n# xs = let n = I# n# in ...orig RHS...
97 98 99

Now the simplifier will apply the specialisation in the rhs of drop', giving

100 101 102 103
        drop' n# xs = case xs of
                      []     -> []
                      (y:ys) -> case n# of
                                  0 -> []
104
                                  _ -> drop' (n# -# 1#) xs
105

106
Much better!
107 108 109 110

We'd also like to catch cases where a parameter is carried along unchanged,
but evaluated each time round the loop:

111
        f i n = if i>0 || i>n then i else f (i*2) n
112 113 114 115

Here f isn't strict in n, but we'd like to avoid evaluating it each iteration.
In Core, by the time we've w/wd (f is strict in i) we get

116 117
        f i# n = case i# ># 0 of
                   False -> I# i#
118
                   True  -> case n of { I# n# ->
119 120
                            case i# ># n# of
                                False -> I# i#
121
                                True  -> f (i# *# 2#) n
122

123
At the call to f, we see that the argument, n is known to be (I# n#),
124
and n is evaluated elsewhere in the body of f, so we can play the same
125
trick as above.
126 127 128 129 130


Note [Reboxing]
~~~~~~~~~~~~~~~
We must be careful not to allocate the same constructor twice.  Consider
131 132
        f p = (...(case p of (a,b) -> e)...p...,
               ...let t = (r,s) in ...t...(f t)...)
133 134
At the recursive call to f, we can see that t is a pair.  But we do NOT want
to make a specialised copy:
135
        f' a b = let p = (a,b) in (..., ...)
136 137 138 139
because now t is allocated by the caller, then r and s are passed to the
recursive call, which allocates the (r,s) pair again.

This happens if
140
  (a) the argument p is used in other than a case-scrutinisation way.
141
  (b) the argument to the call is not a 'fresh' tuple; you have to
142
        look into its unfolding to see that it's a tuple
143 144 145

Hence the "OR" part of Note [Good arguments] below.

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
146
ALTERNATIVE 2: pass both boxed and unboxed versions.  This no longer saves
147 148 149 150 151 152 153 154
allocation, but does perhaps save evals. In the RULE we'd have
something like

  f (I# x#) = f' (I# x#) x#

If at the call site the (I# x) was an unfolding, then we'd have to
rely on CSE to eliminate the duplicate allocation.... This alternative
doesn't look attractive enough to pursue.
155

156
ALTERNATIVE 3: ignore the reboxing problem.  The trouble is that
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
157 158
the conservative reboxing story prevents many useful functions from being
specialised.  Example:
159 160 161
        foo :: Maybe Int -> Int -> Int
        foo   (Just m) 0 = 0
        foo x@(Just m) n = foo x (n-m)
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
162 163 164
Here the use of 'x' will clearly not require boxing in the specialised function.

The strictness analyser has the same problem, in fact.  Example:
165
        f p@(a,b) = ...
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
166 167 168 169 170 171 172 173 174
If we pass just 'a' and 'b' to the worker, it might need to rebox the
pair to create (a,b).  A more sophisticated analysis might figure out
precisely the cases in which this could happen, but the strictness
analyser does no such analysis; it just passes 'a' and 'b', and hopes
for the best.

So my current choice is to make SpecConstr similarly aggressive, and
ignore the bad potential of reboxing.

175

176 177
Note [Good arguments]
~~~~~~~~~~~~~~~~~~~~~
178 179
So we look for

180
* A self-recursive function.  Ignore mutual recursion for now,
181 182 183 184
  because it's less common, and the code is simpler for self-recursion.

* EITHER

185
   a) At a recursive call, one or more parameters is an explicit
186
      constructor application
187 188
        AND
      That same parameter is scrutinised by a case somewhere in
189 190 191 192 193 194
      the RHS of the function

  OR

    b) At a recursive call, one or more parameters has an unfolding
       that is an explicit constructor application
195 196
        AND
      That same parameter is scrutinised by a case somewhere in
197
      the RHS of the function
198
        AND
199
      Those are the only uses of the parameter (see Note [Reboxing])
200 201


202 203
What to abstract over
~~~~~~~~~~~~~~~~~~~~~
204 205 206
There's a bit of a complication with type arguments.  If the call
site looks like

207
        f p = ...f ((:) [a] x xs)...
208 209 210

then our specialised function look like

211
        f_spec x xs = let p = (:) [a] x xs in ....as before....
212 213 214 215 216 217 218 219 220

This only makes sense if either
  a) the type variable 'a' is in scope at the top of f, or
  b) the type variable 'a' is an argument to f (and hence fs)

Actually, (a) may hold for value arguments too, in which case
we may not want to pass them.  Supose 'x' is in scope at f's
defn, but xs is not.  Then we'd like

221
        f_spec xs = let p = (:) [a] x xs in ....as before....
222 223 224 225 226 227 228

Similarly (b) may hold too.  If x is already an argument at the
call, no need to pass it again.

Finally, if 'a' is not in scope at the call site, we could abstract
it as we do the term variables:

229
        f_spec a x xs = let p = (:) [a] x xs in ...as before...
230 231 232

So the grand plan is:

233 234
        * abstract the call site to a constructor-only pattern
          e.g.  C x (D (f p) (g q))  ==>  C s1 (D s2 s3)
235

236
        * Find the free variables of the abstracted pattern
237

238 239
        * Pass these variables, less any that are in scope at
          the fn defn.  But see Note [Shadowing] below.
240 241 242 243 244 245 246


NOTICE that we only abstract over variables that are not in scope,
so we're in no danger of shadowing variables used in "higher up"
in f_spec's RHS.


247 248 249 250 251 252 253
Note [Shadowing]
~~~~~~~~~~~~~~~~
In this pass we gather up usage information that may mention variables
that are bound between the usage site and the definition site; or (more
seriously) may be bound to something different at the definition site.
For example:

254 255
        f x = letrec g y v = let x = ...
                             in ...(g (a,b) x)...
256

257
Since 'x' is in scope at the call site, we may make a rewrite rule that
258
looks like
259 260
        RULE forall a,b. g (a,b) x = ...
But this rule will never match, because it's really a different 'x' at
261 262 263 264 265 266 267 268 269
the call site -- and that difference will be manifest by the time the
simplifier gets to it.  [A worry: the simplifier doesn't *guarantee*
no-shadowing, so perhaps it may not be distinct?]

Anyway, the rule isn't actually wrong, it's just not useful.  One possibility
is to run deShadowBinds before running SpecConstr, but instead we run the
simplifier.  That gives the simplest possible program for SpecConstr to
chew on; and it virtually guarantees no shadowing.

270 271
Note [Specialising for constant parameters]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
This one is about specialising on a *constant* (but not necessarily
constructor) argument

    foo :: Int -> (Int -> Int) -> Int
    foo 0 f = 0
    foo m f = foo (f m) (+1)

It produces

    lvl_rmV :: GHC.Base.Int -> GHC.Base.Int
    lvl_rmV =
      \ (ds_dlk :: GHC.Base.Int) ->
        case ds_dlk of wild_alH { GHC.Base.I# x_alG ->
        GHC.Base.I# (GHC.Prim.+# x_alG 1)

    T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
    GHC.Prim.Int#
    T.$wfoo =
      \ (ww_sme :: GHC.Prim.Int#) (w_smg :: GHC.Base.Int -> GHC.Base.Int) ->
        case ww_sme of ds_Xlw {
          __DEFAULT ->
293 294 295
        case w_smg (GHC.Base.I# ds_Xlw) of w1_Xmo { GHC.Base.I# ww1_Xmz ->
        T.$wfoo ww1_Xmz lvl_rmV
        };
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
          0 -> 0
        }

The recursive call has lvl_rmV as its argument, so we could create a specialised copy
with that argument baked in; that is, not passed at all.   Now it can perhaps be inlined.

When is this worth it?  Call the constant 'lvl'
- If 'lvl' has an unfolding that is a constructor, see if the corresponding
  parameter is scrutinised anywhere in the body.

- If 'lvl' has an unfolding that is a inlinable function, see if the corresponding
  parameter is applied (...to enough arguments...?)

  Also do this is if the function has RULES?

311
Also
312

313 314
Note [Specialising for lambda parameters]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
315 316 317 318 319 320 321 322 323 324 325 326 327
    foo :: Int -> (Int -> Int) -> Int
    foo 0 f = 0
    foo m f = foo (f m) (\n -> n-m)

This is subtly different from the previous one in that we get an
explicit lambda as the argument:

    T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
    GHC.Prim.Int#
    T.$wfoo =
      \ (ww_sm8 :: GHC.Prim.Int#) (w_sma :: GHC.Base.Int -> GHC.Base.Int) ->
        case ww_sm8 of ds_Xlr {
          __DEFAULT ->
328 329 330 331 332 333 334 335
        case w_sma (GHC.Base.I# ds_Xlr) of w1_Xmf { GHC.Base.I# ww1_Xmq ->
        T.$wfoo
          ww1_Xmq
          (\ (n_ad3 :: GHC.Base.Int) ->
             case n_ad3 of wild_alB { GHC.Base.I# x_alA ->
             GHC.Base.I# (GHC.Prim.-# x_alA ds_Xlr)
             })
        };
336 337 338 339 340 341 342
          0 -> 0
        }

I wonder if SpecConstr couldn't be extended to handle this? After all,
lambda is a sort of constructor for functions and perhaps it already
has most of the necessary machinery?

Gabor Greif's avatar
Gabor Greif committed
343
Furthermore, there's an immediate win, because you don't need to allocate the lambda
344 345 346 347 348
at the call site; and if perchance it's called in the recursive call, then you
may avoid allocating it altogether.  Just like for constructors.

Looks cool, but probably rare...but it might be easy to implement.

349 350 351

Note [SpecConstr for casts]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
352
Consider
353 354 355 356 357 358 359 360
    data family T a :: *
    data instance T Int = T Int

    foo n = ...
       where
         go (T 0) = 0
         go (T n) = go (T (n-1))

361 362
The recursive call ends up looking like
        go (T (I# ...) `cast` g)
363
So we want to spot the constructor application inside the cast.
364 365
That's why we have the Cast case in argToPat

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
Note [Local recursive groups]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For a *local* recursive group, we can see all the calls to the
function, so we seed the specialisation loop from the calls in the
body, not from the calls in the RHS.  Consider:

  bar m n = foo n (n,n) (n,n) (n,n) (n,n)
   where
     foo n p q r s
       | n == 0    = m
       | n > 3000  = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s }
       | n > 2000  = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s }
       | n > 1000  = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s }
       | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) }

If we start with the RHSs of 'foo', we get lots and lots of specialisations,
most of which are not needed.  But if we start with the (single) call
in the rhs of 'bar' we get exactly one fully-specialised copy, and all
the recursive calls go to this fully-specialised copy. Indeed, the original
385
function is later collected as dead code.  This is very important in
386 387 388
specialising the loops arising from stream fusion, for example in NDP where
we were getting literally hundreds of (mostly unused) specialisations of
a local function.
389

390 391 392 393 394 395 396 397
In a case like the above we end up never calling the original un-specialised
function.  (Although we still leave its code around just in case.)

However, if we find any boring calls in the body, including *unsaturated*
ones, such as
      letrec foo x y = ....foo...
      in map foo xs
then we will end up calling the un-specialised function, so then we *should*
398 399
use the calls in the un-specialised RHS as seeds.  We call these
"boring call patterns", and callsToPats reports if it finds any of these.
400

401
Note [Seeding top-level recursive groups]
402
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
403 404 405 406 407 408 409 410 411 412 413 414 415 416
This seeding is done in the binding for seed_calls in specRec.

1. If all the bindings in a top-level recursive group are local (not
   exported), then all the calls are in the rest of the top-level
   bindings.  This means we can specialise with those call patterns
   ONLY, and NOT with the RHSs of the recursive group (exactly like
   Note [Local recursive groups])

2. But if any of the bindings are exported, the function may be called
   with any old arguments, so (for lack of anything better) we specialise
   based on
     (a) the call patterns in the RHS
     (b) the call patterns in the rest of the top-level bindings
   NB: before Apr 15 we used (a) only, but Dimitrios had an example
Simon Peyton Jones's avatar
Simon Peyton Jones committed
417 418 419 420
       where (b) was crucial, so I added that.
       Adding (b) also improved nofib allocation results:
                  multiplier: 4%   better
                  minimax:    2.8% better
421 422 423 424 425 426

Actually in case (2), instead of using the calls from the RHS, it
would be better to specialise in the importing module.  We'd need to
add an INLINEABLE pragma to the function, and then it can be
specialised in the importing scope, just as is done for type classes
in Specialise.specImports. This remains to be done (#10346).
427

428 429 430 431 432 433 434 435 436 437 438
Note [Top-level recursive groups]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To get the call usage information from "the rest of the top level
bindings" (c.f. Note [Seeding top-level recursive groups]), we work
backwards through the top-level bindings so we see the usage before we
get to the binding of the function.  Before we can collect the usage
though, we go through all the bindings and add them to the
environment. This is necessary because usage is only tracked for
functions in the environment.  These two passes are called
   'go' and 'goEnv'
in specConstrProgram.  (Looks a bit revolting to me.)
439

440 441 442 443 444 445 446 447
Note [Do not specialise diverging functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Specialising a function that just diverges is a waste of code.
Furthermore, it broke GHC (simpl014) thus:
   {-# STR Sb #-}
   f = \x. case x of (a,b) -> f x
If we specialise f we get
   f = \x. case x of (a,b) -> fspec a b
448
But fspec doesn't have decent strictness info.  As it happened,
449 450 451 452
(f x) :: IO t, so the state hack applied and we eta expanded fspec,
and hence f.  But now f's strictness is less than its arity, which
breaks an invariant.

453

454 455
Note [Forcing specialisation]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477

With stream fusion and in other similar cases, we want to fully
specialise some (but not necessarily all!) loops regardless of their
size and the number of specialisations.

We allow a library to do this, in one of two ways (one which is
deprecated):

  1) Add a parameter of type GHC.Types.SPEC (from ghc-prim) to the loop body.

  2) (Deprecated) Annotate a type with ForceSpecConstr from GHC.Exts,
     and then add *that* type as a parameter to the loop body

The reason #2 is deprecated is because it requires GHCi, which isn't
available for things like a cross compiler using stage1.

Here's a (simplified) example from the `vector` package. You may bring
the special 'force specialization' type into scope by saying:

  import GHC.Types (SPEC(..))

or by defining your own type (again, deprecated):
478 479 480 481

  data SPEC = SPEC | SPEC2
  {-# ANN type SPEC ForceSpecConstr #-}

482 483 484 485 486
(Note this is the exact same definition of GHC.Types.SPEC, just
without the annotation.)

After that, you say:

487 488 489 490
  foldl :: (a -> b -> a) -> a -> Stream b -> a
  {-# INLINE foldl #-}
  foldl f z (Stream step s _) = foldl_loop SPEC z s
    where
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
491 492 493
      foldl_loop !sPEC z s = case step s of
                              Yield x s' -> foldl_loop sPEC (f z x) s'
                              Skip       -> foldl_loop sPEC z s'
494 495 496
                              Done       -> z

SpecConstr will spot the SPEC parameter and always fully specialise
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
497 498 499 500 501 502 503 504 505
foldl_loop. Note that

  * We have to prevent the SPEC argument from being removed by
    w/w which is why (a) SPEC is a sum type, and (b) we have to seq on
    the SPEC argument.

  * And lastly, the SPEC argument is ultimately eliminated by
    SpecConstr itself so there is no runtime overhead.

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
506
This is all quite ugly; we ought to come up with a better design.
507 508

ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
509
sc_force to True when calling specLoop. This flag does four things:
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
510 511 512 513 514 515
  * Ignore specConstrThreshold, to specialise functions of arbitrary size
        (see scTopBind)
  * Ignore specConstrCount, to make arbitrary numbers of specialisations
        (see specialise)
  * Specialise even for arguments that are not scrutinised in the loop
        (see argToPat; Trac #4488)
516
  * Only specialise on recursive types a finite number of times
517
        (see is_too_recursive; Trac #5550; Note [Limit recursive specialisation])
518

519 520 521 522
This flag is inherited for nested non-recursive bindings (which are likely to
be join points and hence should be fully specialised) but reset for nested
recursive bindings.

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
523
What alternatives did I consider? Annotating the loop itself doesn't
524 525
work because (a) it is local and (b) it will be w/w'ed and having
w/w propagating annotations somehow doesn't seem like a good idea. The
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
526 527 528
types of the loop arguments really seem to be the most persistent
thing.

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
529
Annotating the types that make up the loop state doesn't work,
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
530 531 532 533 534 535
either, because (a) it would prevent us from using types like Either
or tuples here, (b) we don't want to restrict the set of types that
can be used in Stream states and (c) some types are fixed by the user
(e.g., the accumulator here) but we still want to specialise as much
as possible.

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
536 537 538 539 540 541 542 543 544 545 546
Alternatives to ForceSpecConstr
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Instead of giving the loop an extra argument of type SPEC, we
also considered *wrapping* arguments in SPEC, thus
  data SPEC a = SPEC a | SPEC2

  loop = \arg -> case arg of
                     SPEC state ->
                        case state of (x,y) -> ... loop (SPEC (x',y')) ...
                        S2 -> error ...
The idea is that a SPEC argument says "specialise this argument
547
regardless of whether the function case-analyses it".  But this
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
548 549 550 551 552 553 554
doesn't work well:
  * SPEC must still be a sum type, else the strictness analyser
    eliminates it
  * But that means that 'loop' won't be strict in its real payload
This loss of strictness in turn screws up specialisation, because
we may end up with calls like
   loop (SPEC (case z of (p,q) -> (q,p)))
555 556
Without the SPEC, if 'loop' were strict, the case would move out
and we'd see loop applied to a pair. But if 'loop' isn't strict
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
557 558
this doesn't look like a specialisable call.

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
Note [Limit recursive specialisation]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It is possible for ForceSpecConstr to cause an infinite loop of specialisation.
Because there is no limit on the number of specialisations, a recursive call with
a recursive constructor as an argument (for example, list cons) will generate
a specialisation for that constructor. If the resulting specialisation also
contains a recursive call with the constructor, this could proceed indefinitely.

For example, if ForceSpecConstr is on:
  loop :: [Int] -> [Int] -> [Int]
  loop z []         = z
  loop z (x:xs)     = loop (x:z) xs
this example will create a specialisation for the pattern
  loop (a:b) c      = loop' a b c

  loop' a b []      = (a:b)
  loop' a b (x:xs)  = loop (x:(a:b)) xs
and a new pattern is found:
  loop (a:(b:c)) d  = loop'' a b c d
which can continue indefinitely.

Roman's suggestion to fix this was to stop after a couple of times on recursive types,
but still specialising on non-recursive types as much as possible.

To implement this, we count the number of recursive constructors in each
function argument. If the maximum is greater than the specConstrRecursive limit,
do not specialise on that pattern.

This is only necessary when ForceSpecConstr is on: otherwise the specConstrCount
will force termination anyway.

See Trac #5550.

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
592 593
Note [NoSpecConstr]
~~~~~~~~~~~~~~~~~~~
594
The ignoreDataCon stuff allows you to say
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
595
    {-# ANN type T NoSpecConstr #-}
596
to mean "don't specialise on arguments of this type".  It was added
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
597 598 599 600 601
before we had ForceSpecConstr.  Lacking ForceSpecConstr we specialised
regardless of size; and then we needed a way to turn that *off*.  Now
that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
(Used only for PArray.)

602
-----------------------------------------------------
603
                Stuff not yet handled
604 605 606 607
-----------------------------------------------------

Here are notes arising from Roman's work that I don't want to lose.

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
Example 1
~~~~~~~~~
    data T a = T !a

    foo :: Int -> T Int -> Int
    foo 0 t = 0
    foo x t | even x    = case t of { T n -> foo (x-n) t }
            | otherwise = foo (x-1) t

SpecConstr does no specialisation, because the second recursive call
looks like a boxed use of the argument.  A pity.

    $wfoo_sFw :: GHC.Prim.Int# -> T.T GHC.Base.Int -> GHC.Prim.Int#
    $wfoo_sFw =
      \ (ww_sFo [Just L] :: GHC.Prim.Int#) (w_sFq [Just L] :: T.T GHC.Base.Int) ->
623 624 625 626 627 628 629 630 631 632
         case ww_sFo of ds_Xw6 [Just L] {
           __DEFAULT ->
                case GHC.Prim.remInt# ds_Xw6 2 of wild1_aEF [Dead Just A] {
                  __DEFAULT -> $wfoo_sFw (GHC.Prim.-# ds_Xw6 1) w_sFq;
                  0 ->
                    case w_sFq of wild_Xy [Just L] { T.T n_ad5 [Just U(L)] ->
                    case n_ad5 of wild1_aET [Just A] { GHC.Base.I# y_aES [Just L] ->
                    $wfoo_sFw (GHC.Prim.-# ds_Xw6 y_aES) wild_Xy
                    } } };
           0 -> 0
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652

Example 2
~~~~~~~~~
    data a :*: b = !a :*: !b
    data T a = T !a

    foo :: (Int :*: T Int) -> Int
    foo (0 :*: t) = 0
    foo (x :*: t) | even x    = case t of { T n -> foo ((x-n) :*: t) }
                  | otherwise = foo ((x-1) :*: t)

Very similar to the previous one, except that the parameters are now in
a strict tuple. Before SpecConstr, we have

    $wfoo_sG3 :: GHC.Prim.Int# -> T.T GHC.Base.Int -> GHC.Prim.Int#
    $wfoo_sG3 =
      \ (ww_sFU [Just L] :: GHC.Prim.Int#) (ww_sFW [Just L] :: T.T
    GHC.Base.Int) ->
        case ww_sFU of ds_Xws [Just L] {
          __DEFAULT ->
653 654 655 656 657 658 659 660 661 662
        case GHC.Prim.remInt# ds_Xws 2 of wild1_aEZ [Dead Just A] {
          __DEFAULT ->
            case ww_sFW of tpl_B2 [Just L] { T.T a_sFo [Just A] ->
            $wfoo_sG3 (GHC.Prim.-# ds_Xws 1) tpl_B2             -- $wfoo1
            };
          0 ->
            case ww_sFW of wild_XB [Just A] { T.T n_ad7 [Just S(L)] ->
            case n_ad7 of wild1_aFd [Just L] { GHC.Base.I# y_aFc [Just L] ->
            $wfoo_sG3 (GHC.Prim.-# ds_Xws y_aFc) wild_XB        -- $wfoo2
            } } };
663 664 665 666
          0 -> 0 }

We get two specialisations:
"SC:$wfoo1" [0] __forall {a_sFB :: GHC.Base.Int sc_sGC :: GHC.Prim.Int#}
667 668
                  Foo.$wfoo sc_sGC (Foo.T @ GHC.Base.Int a_sFB)
                  = Foo.$s$wfoo1 a_sFB sc_sGC ;
669
"SC:$wfoo2" [0] __forall {y_aFp :: GHC.Prim.Int# sc_sGC :: GHC.Prim.Int#}
670 671
                  Foo.$wfoo sc_sGC (Foo.T @ GHC.Base.Int (GHC.Base.I# y_aFp))
                  = Foo.$s$wfoo y_aFp sc_sGC ;
672 673

But perhaps the first one isn't good.  After all, we know that tpl_B2 is
674
a T (I# x) really, because T is strict and Int has one constructor.  (We can't
Gabor Greif's avatar
typos  
Gabor Greif committed
675
unbox the strict fields, because T is polymorphic!)
676

Austin Seipp's avatar
Austin Seipp committed
677 678
************************************************************************
*                                                                      *
679
\subsection{Top level wrapper stuff}
Austin Seipp's avatar
Austin Seipp committed
680 681 682
*                                                                      *
************************************************************************
-}
683

684 685 686 687 688
specConstrProgram :: ModGuts -> CoreM ModGuts
specConstrProgram guts
  = do
      dflags <- getDynFlags
      us     <- getUniqueSupplyM
689
      annos  <- getFirstAnnotations deserializeWithData guts
690
      this_mod <- getModule
691 692
      let binds' = reverse $ fst $ initUs us $ do
                    -- Note [Top-level recursive groups]
693 694
                    (env, binds) <- goEnv (initScEnv dflags this_mod annos)
                                          (mg_binds guts)
695 696 697 698
                        -- binds is identical to (mg_binds guts), except that the
                        -- binders on the LHS have been replaced by extendBndr
                        --   (SPJ this seems like overkill; I don't think the binders
                        --    will change at all; and we don't substitute in the RHSs anyway!!)
699 700
                    go env nullUsage (reverse binds)

701
      return (guts { mg_binds = binds' })
702
  where
703
    -- See Note [Top-level recursive groups]
704 705 706 707 708
    goEnv env []            = return (env, [])
    goEnv env (bind:binds)  = do (env', bind')   <- scTopBindEnv env bind
                                 (env'', binds') <- goEnv env' binds
                                 return (env'', bind' : binds')

709
    -- Arg list of bindings is in reverse order
710 711 712 713
    go _   _   []           = return []
    go env usg (bind:binds) = do (usg', bind') <- scTopBind env usg bind
                                 binds' <- go env usg' binds
                                 return (bind' : binds')
714

Austin Seipp's avatar
Austin Seipp committed
715 716 717
{-
************************************************************************
*                                                                      *
718
\subsection{Environment: goes downwards}
Austin Seipp's avatar
Austin Seipp committed
719 720
*                                                                      *
************************************************************************
721

722 723
Note [Work-free values only in environment]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Austin Seipp's avatar
Austin Seipp committed
724
The sc_vals field keeps track of in-scope value bindings, so
725 726 727 728 729 730 731 732 733
that if we come across (case x of Just y ->...) we can reduce the
case from knowing that x is bound to a pair.

But only *work-free* values are ok here. For example if the envt had
    x -> Just (expensive v)
then we do NOT want to expand to
     let y = expensive v in ...
because the x-binding still exists and we've now duplicated (expensive v).

Austin Seipp's avatar
Austin Seipp committed
734
This seldom happens because let-bound constructor applications are
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
ANF-ised, but it can happen as a result of on-the-fly transformations in
SpecConstr itself.  Here is Trac #7865:

        let {
          a'_shr =
            case xs_af8 of _ {
              [] -> acc_af6;
              : ds_dgt [Dmd=<L,A>] ds_dgu [Dmd=<L,A>] ->
                (expensive x_af7, x_af7
            } } in
        let {
          ds_sht =
            case a'_shr of _ { (p'_afd, q'_afe) ->
            TSpecConstr_DoubleInline.recursive
              (GHC.Types.: @ GHC.Types.Int x_af7 wild_X6) (q'_afe, p'_afd)
            } } in

Austin Seipp's avatar
Austin Seipp committed
752
When processed knowing that xs_af8 was bound to a cons, we simplify to
753 754 755 756 757 758 759 760
   a'_shr = (expensive x_af7, x_af7)
and we do NOT want to inline that at the occurrence of a'_shr in ds_sht.
(There are other occurrences of a'_shr.)  No no no.

It would be possible to do some on-the-fly ANF-ising, so that a'_shr turned
into a work-free value again, thus
   a1 = expensive x_af7
   a'_shr = (a1, x_af7)
Austin Seipp's avatar
Austin Seipp committed
761
but that's more work, so until its shown to be important I'm going to
762
leave it for now.
Austin Seipp's avatar
Austin Seipp committed
763
-}
764

765
data ScEnv = SCE { sc_dflags    :: DynFlags,
766
                   sc_module    :: !Module,
767 768
                   sc_size      :: Maybe Int,   -- Size threshold
                   sc_count     :: Maybe Int,   -- Max # of specialisations for any one fn
769
                                                -- See Note [Avoiding exponential blowup]
770 771 772 773 774

                   sc_recursive :: Int,         -- Max # of specialisations over recursive type.
                                                -- Stops ForceSpecConstr from diverging.

                   sc_force     :: Bool,        -- Force specialisation?
775
                                                -- See Note [Forcing specialisation]
776

777
                   sc_subst     :: Subst,       -- Current substitution
778
                                                -- Maps InIds to OutExprs
779

780 781 782
                   sc_how_bound :: HowBoundEnv,
                        -- Binds interesting non-top-level variables
                        -- Domain is OutVars (*after* applying the substitution)
783

784
                   sc_vals      :: ValueEnv,
785 786
                        -- Domain is OutIds (*after* applying the substitution)
                        -- Used even for top-level bindings (but not imported ones)
787 788 789 790
                        -- The range of the ValueEnv is *work-free* values
                        -- such as (\x. blah), or (Just v)
                        -- but NOT (Just (expensive v))
                        -- See Note [Work-free values only in environment]
791

792
                   sc_annotations :: UniqFM SpecConstrAnnotation
793
             }
794

795 796
---------------------
-- As we go, we apply a substitution (sc_subst) to the current term
797
type InExpr = CoreExpr          -- _Before_ applying the subst
798
type InVar  = Var
799

800
type OutExpr = CoreExpr         -- _After_ applying the subst
801 802 803 804
type OutId   = Id
type OutVar  = Var

---------------------
805
type HowBoundEnv = VarEnv HowBound      -- Domain is OutVars
806

807
---------------------
808 809 810 811
type ValueEnv = IdEnv Value             -- Domain is OutIds
data Value    = ConVal AltCon [CoreArg] -- _Saturated_ constructors
                                        --   The AltCon is never DEFAULT
              | LambdaVal               -- Inlinable lambdas or PAPs
812

813 814
instance Outputable Value where
   ppr (ConVal con args) = ppr con <+> interpp'SP args
815
   ppr LambdaVal         = text "<Lambda>"
816

817
---------------------
818 819
initScEnv :: DynFlags -> Module -> UniqFM SpecConstrAnnotation -> ScEnv
initScEnv dflags this_mod anns
820
  = SCE { sc_dflags      = dflags,
821
          sc_module      = this_mod,
822 823 824 825 826 827 828
          sc_size        = specConstrThreshold dflags,
          sc_count       = specConstrCount     dflags,
          sc_recursive   = specConstrRecursive dflags,
          sc_force       = False,
          sc_subst       = emptySubst,
          sc_how_bound   = emptyVarEnv,
          sc_vals        = emptyVarEnv,
829
          sc_annotations = anns }
830

831 832
data HowBound = RecFun  -- These are the recursive functions for which
                        -- we seek interesting call patterns
833

834 835
              | RecArg  -- These are those functions' arguments, or their sub-components;
                        -- we gather occurrence information for these
836

837 838 839 840
instance Outputable HowBound where
  ppr RecFun = text "RecFun"
  ppr RecArg = text "RecArg"

841 842 843
scForce :: ScEnv -> Bool -> ScEnv
scForce env b = env { sc_force = b }

844 845 846 847
lookupHowBound :: ScEnv -> Id -> Maybe HowBound
lookupHowBound env id = lookupVarEnv (sc_how_bound env) id

scSubstId :: ScEnv -> Id -> CoreExpr
848
scSubstId env v = lookupIdSubst (text "scSubstId") (sc_subst env) v
849 850 851 852

scSubstTy :: ScEnv -> Type -> Type
scSubstTy env ty = substTy (sc_subst env) ty

853 854 855
scSubstCo :: ScEnv -> Coercion -> Coercion
scSubstCo env co = substCo (sc_subst env) co

856 857
zapScSubst :: ScEnv -> ScEnv
zapScSubst env = env { sc_subst = zapSubstEnv (sc_subst env) }
858

859
extendScInScope :: ScEnv -> [Var] -> ScEnv
860
        -- Bring the quantified variables into scope
861
extendScInScope env qvars = env { sc_subst = extendInScopeList (sc_subst env) qvars }
862

863
        -- Extend the substitution
864 865 866 867 868
extendScSubst :: ScEnv -> Var -> OutExpr -> ScEnv
extendScSubst env var expr = env { sc_subst = extendSubst (sc_subst env) var expr }

extendScSubstList :: ScEnv -> [(Var,OutExpr)] -> ScEnv
extendScSubstList env prs = env { sc_subst = extendSubstList (sc_subst env) prs }
869 870 871 872

extendHowBound :: ScEnv -> [Var] -> HowBound -> ScEnv
extendHowBound env bndrs how_bound
  = env { sc_how_bound = extendVarEnvList (sc_how_bound env)
873
                            [(bndr,how_bound) | bndr <- bndrs] }
874 875

extendBndrsWith :: HowBound -> ScEnv -> [Var] -> (ScEnv, [Var])
876
extendBndrsWith how_bound env bndrs
877
  = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndrs')
878
  where
879
    (subst', bndrs') = substBndrs (sc_subst env) bndrs
880 881
    hb_env' = sc_how_bound env `extendVarEnvList`
                    [(bndr,how_bound) | bndr <- bndrs']
882 883

extendBndrWith :: HowBound -> ScEnv -> Var -> (ScEnv, Var)
884
extendBndrWith how_bound env bndr
885
  = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndr')
886
  where
887 888 889 890 891
    (subst', bndr') = substBndr (sc_subst env) bndr
    hb_env' = extendVarEnv (sc_how_bound env) bndr' how_bound

extendRecBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
extendRecBndrs env bndrs  = (env { sc_subst = subst' }, bndrs')
892 893
                      where
                        (subst', bndrs') = substRecBndrs (sc_subst env) bndrs
894 895 896

extendBndr :: ScEnv -> Var -> (ScEnv, Var)
extendBndr  env bndr  = (env { sc_subst = subst' }, bndr')
897 898
                      where
                        (subst', bndr') = substBndr (sc_subst env) bndr
899

900
extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv
901
extendValEnv env _  Nothing   = env
Austin Seipp's avatar
Austin Seipp committed
902
extendValEnv env id (Just cv)
903 904 905
 | valueIsWorkFree cv      -- Don't duplicate work!!  Trac #7865
 = env { sc_vals = extendVarEnv (sc_vals env) id cv }
extendValEnv env _ _ = env
906

907
extendCaseBndrs :: ScEnv -> OutExpr -> OutId -> AltCon -> [Var] -> (ScEnv, [Var])
908
-- When we encounter
909 910
--      case scrut of b
--          C x y -> ...
911 912 913 914
-- we want to bind b, to (C x y)
-- NB1: Extends only the sc_vals part of the envt
-- NB2: Kill the dead-ness info on the pattern binders x,y, since
--      they are potentially made alive by the [b -> C x y] binding
915 916
extendCaseBndrs env scrut case_bndr con alt_bndrs
   = (env2, alt_bndrs')
917
 where
918
   live_case_bndr = not (isDeadBinder case_bndr)
Peter Wortmann's avatar
Peter Wortmann committed
919 920
   env1 | Var v <- stripTicksTopE (const True) scrut
                         = extendValEnv env v cval
921
        | otherwise      = env  -- See Note [Add scrutinee to ValueEnv too]
922
   env2 | live_case_bndr = extendValEnv env1 case_bndr cval
923 924 925 926 927 928 929
        | otherwise      = env1

   alt_bndrs' | case scrut of { Var {} -> True; _ -> live_case_bndr }
              = map zap alt_bndrs
              | otherwise
              = alt_bndrs

930
   cval = case con of
931 932 933 934 935 936 937 938
                DEFAULT    -> Nothing
                LitAlt {}  -> Just (ConVal con [])
                DataAlt {} -> Just (ConVal con vanilla_args)
                      where
                        vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
                                       varsToCoreExprs alt_bndrs

   zap v | isTyVar v = v                -- See NB2 above
939 940
         | otherwise = zapIdOccInfo v

941

942 943
decreaseSpecCount :: ScEnv -> Int -> ScEnv
-- See Note [Avoiding exponential blowup]
944
decreaseSpecCount env n_specs
945 946 947
  = env { sc_count = case sc_count env of
                       Nothing -> Nothing
                       Just n  -> Just (n `div` (n_specs + 1)) }
948 949
        -- The "+1" takes account of the original function;
        -- See Note [Avoiding exponential blowup]
950 951

---------------------------------------------------
952
-- See Note [Forcing specialisation]
953
ignoreType    :: ScEnv -> Type   -> Bool
954
ignoreDataCon  :: ScEnv -> DataCon -> Bool
955
forceSpecBndr :: ScEnv -> Var    -> Bool
956

957
#ifndef GHCI
958
ignoreType    _ _  = False
959
ignoreDataCon  _ _ = False
960 961
#else /* GHCI */

962
ignoreDataCon env dc = ignoreTyCon env (dataConTyCon dc)
963

964
ignoreType env ty
965 966 967
  = case tyConAppTyCon_maybe ty of
      Just tycon -> ignoreTyCon env tycon
      _          -> False
968

969 970 971
ignoreTyCon :: ScEnv -> TyCon -> Bool
ignoreTyCon env tycon
  = lookupUFM (sc_annotations env) tycon == Just NoSpecConstr
972
#endif /* GHCI */
973

974
forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTys . varType $ var
975 976 977 978 979 980 981 982 983 984 985

forceSpecFunTy :: ScEnv -> Type -> Bool
forceSpecFunTy env = any (forceSpecArgTy env) . fst . splitFunTys

forceSpecArgTy :: ScEnv -> Type -> Bool
forceSpecArgTy env ty
  | Just ty' <- coreView ty = forceSpecArgTy env ty'

forceSpecArgTy env ty
  | Just (tycon, tys) <- splitTyConApp_maybe ty
  , tycon /= funTyCon
986 987 988 989
      = tyConName tycon == specTyConName
#ifdef GHCI
        || lookupUFM (sc_annotations env) tycon == Just ForceSpecConstr
#endif
990 991 992
        || any (forceSpecArgTy env) tys

forceSpecArgTy _ _ = False
993

Austin Seipp's avatar
Austin Seipp committed
994
{-
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
Note [Add scrutinee to ValueEnv too]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this:
   case x of y
     (a,b) -> case b of c
                I# v -> ...(f y)...
By the time we get to the call (f y), the ValueEnv
will have a binding for y, and for c
    y -> (a,b)
    c -> I# v
BUT that's not enough!  Looking at the call (f y) we
see that y is pair (a,b), but we also need to know what 'b' is.
1007
So in extendCaseBndrs we must *also* add the binding
1008 1009 1010 1011 1012 1013
   b -> I# v
else we lose a useful specialisation for f.  This is necessary even
though the simplifier has systematically replaced uses of 'x' with 'y'
and 'b' with 'c' in the code.  The use of 'b' in the ValueEnv came
from outside the case.  See Trac #4908 for the live example.

1014 1015 1016 1017
Note [Avoiding exponential blowup]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The sc_count field of the ScEnv says how many times we are prepared to
duplicate a single function.  But we must take care with recursive
1018
specialisations.  Consider
1019

1020 1021
        let $j1 = let $j2 = let $j3 = ...
                            in
1022
                            ...$j3...
1023
                  in
1024
                  ...$j2...
1025
        in
1026 1027 1028 1029
        ...$j1...

If we specialise $j1 then in each specialisation (as well as the original)
we can specialise $j2, and similarly $j3.  Even if we make just *one*
Gabor Greif's avatar
typos  
Gabor Greif committed
1030
specialisation of each, because we also have the original we'll get 2^n
1031 1032 1033 1034 1035
copies of $j3, which is not good.

So when recursively specialising we divide the sc_count by the number of
copies we are making at this level, including the original.

1036

Austin Seipp's avatar
Austin Seipp committed
1037 1038
************************************************************************
*                                                                      *
1039
\subsection{Usage information: flows upwards}
Austin Seipp's avatar
Austin Seipp committed
1040 1041 1042
*                                                                      *
************************************************************************
-}
1043 1044 1045

data ScUsage
   = SCU {
1046 1047 1048
        scu_calls :: CallEnv,           -- Calls
                                        -- The functions are a subset of the
                                        --      RecFuns in the ScEnv
1049

1050 1051
        scu_occs :: !(IdEnv ArgOcc)     -- Information on argument occurrences
     }                                  -- The domain is OutIds
1052

1053
type CallEnv = IdEnv [Call]
1054
data Call = Call Id [CoreArg] ValueEnv
1055 1056
        -- The arguments of the call, together with the
        -- env giving the constructor bindings at the call site
1057 1058
        -- We keep the function mainly for debug output

1059 1060
instance Outputable ScUsage where
  ppr (SCU { scu_calls = calls, scu_occs = occs })
1061 1062
    = text "SCU" <+> braces (sep [ ptext (sLit "calls =") <+> ppr calls
                                         , text "occs =" <+> ppr occs ])
1063

1064 1065
instance Outputable Call where
  ppr (Call fn args _) = ppr fn <+> fsep (map pprParendExpr args)
1066

1067 1068
nullUsage :: ScUsage
nullUsage = SCU { scu_calls = emptyVarEnv, scu_occs = emptyVarEnv }
1069

1070 1071
combineCalls :: CallEnv -> CallEnv -> CallEnv
combineCalls = plusVarEnv_C (++)
1072 1073
  where
--    plus cs ds | length res > 1