SimplCore.hs 44.8 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1
2
3
{-
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998

4
\section[SimplCore]{Driver for simplifying @Core@ programs}
Austin Seipp's avatar
Austin Seipp committed
5
-}
6

7
8
{-# LANGUAGE CPP #-}

9
module SimplCore ( core2core, simplifyExpr ) where
10
11
12

#include "HsVersions.h"

13
14
import GhcPrelude

15
import DynFlags
16
import CoreSyn
17
import HscTypes
18
import CSE              ( cseProgram )
19
import Rules            ( mkRuleBase, unionRuleBase,
20
21
                          extendRuleBaseList, ruleCheckProgram, addRuleInfo,
                          getRules )
22
23
import PprCore          ( pprCoreBindings, pprCoreExpr )
import OccurAnal        ( occurAnalysePgm, occurAnalyseExpr )
24
import IdInfo
25
26
import CoreStats        ( coreBindsSize, coreBindsStats, exprSize )
import CoreUtils        ( mkTicks, stripTicksTop )
27
import CoreLint         ( endPass, lintPassResult, dumpPassResult,
Peter Wortmann's avatar
Peter Wortmann committed
28
                          lintAnnots )
29
import Simplify         ( simplTopBinds, simplExpr, simplRules )
30
import SimplUtils       ( simplEnvForGHCi, activeRule, activeUnfolding )
31
import SimplEnv
32
import SimplMonad
33
import CoreMonad
34
35
36
import qualified ErrUtils as Err
import FloatIn          ( floatInwards )
import FloatOut         ( floatOutwards )
37
import FamInstEnv
38
import Id
39
import ErrUtils         ( withTiming )
40
import BasicTypes       ( CompilerPhase(..), isDefaultInlinePragma )
41
import VarSet
42
import VarEnv
43
44
45
46
import LiberateCase     ( liberateCase )
import SAT              ( doStaticArgs )
import Specialise       ( specProgram)
import SpecConstr       ( specConstrProgram)
47
import DmdAnal          ( dmdAnalProgram )
48
import CallArity        ( callArityAnalProgram )
49
import Exitify          ( exitifyProgram )
50
import WorkWrap         ( wwTopBinds )
51
import Vectorise        ( vectorise )
52
import SrcLoc
Ian Lynagh's avatar
Ian Lynagh committed
53
import Util
54
import Module
Matthew Pickering's avatar
Matthew Pickering committed
55
56
import Plugins          ( withPlugins,installCoreToDos )
import DynamicLoading  -- ( initializePlugins )
57

58
import Maybes
59
import UniqSupply       ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
60
import UniqFM
61
import Outputable
Ian Lynagh's avatar
Ian Lynagh committed
62
import Control.Monad
63
import qualified GHC.LanguageExtensions as LangExt
Austin Seipp's avatar
Austin Seipp committed
64
65
66
{-
************************************************************************
*                                                                      *
67
\subsection{The driver for the simplifier}
Austin Seipp's avatar
Austin Seipp committed
68
69
70
*                                                                      *
************************************************************************
-}
71

72
core2core :: HscEnv -> ModGuts -> IO ModGuts
73
74
75
76
core2core hsc_env guts@(ModGuts { mg_module  = mod
                                , mg_loc     = loc
                                , mg_deps    = deps
                                , mg_rdr_env = rdr_env })
77
  = do { us <- mkSplitUniqSupply 's'
78
       -- make sure all plugins are loaded
79
80

       ; let builtin_passes = getCoreToDo dflags
81
             orph_mods = mkModuleSet (mod : dep_orphs deps)
82
       ;
83
       ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
84
                                    orph_mods print_unqual loc $
Matthew Pickering's avatar
Matthew Pickering committed
85
86
87
88
89
                           do { hsc_env' <- getHscEnv
                              ; dflags' <- liftIO $ initializePlugins hsc_env'
                                                      (hsc_dflags hsc_env')
                              ; all_passes <- withPlugins dflags'
                                                installCoreToDos builtin_passes
90
                              ; runCorePasses all_passes guts }
91

92
93
94
       ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
             "Grand total simplifier statistics"
             (pprSimplCount stats)
95

96
97
98
       ; return guts2 }
  where
    dflags         = hsc_dflags hsc_env
99
    home_pkg_rules = hptRules hsc_env (dep_mods deps)
100
    hpt_rule_base  = mkRuleBase home_pkg_rules
101
    print_unqual   = mkPrintUnqualified dflags rdr_env
102
    -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
103
104
105
106
    -- This is very convienent for the users of the monad (e.g. plugins do not have to
    -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
    -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
    -- would mean our cached value would go out of date.
107

Austin Seipp's avatar
Austin Seipp committed
108
109
110
{-
************************************************************************
*                                                                      *
111
           Generating the main optimisation pipeline
Austin Seipp's avatar
Austin Seipp committed
112
113
114
*                                                                      *
************************************************************************
-}
115
116
117

getCoreToDo :: DynFlags -> [CoreToDo]
getCoreToDo dflags
118
  = flatten_todos core_todo
119
120
121
122
123
  where
    opt_level     = optLevel           dflags
    phases        = simplPhases        dflags
    max_iter      = maxSimplIterations dflags
    rule_check    = ruleCheck          dflags
124
    call_arity    = gopt Opt_CallArity                    dflags
125
    exitification = gopt Opt_Exitification                dflags
ian@well-typed.com's avatar
ian@well-typed.com committed
126
127
128
129
130
131
132
    strictness    = gopt Opt_Strictness                   dflags
    full_laziness = gopt Opt_FullLaziness                 dflags
    do_specialise = gopt Opt_Specialise                   dflags
    do_float_in   = gopt Opt_FloatIn                      dflags
    cse           = gopt Opt_CSE                          dflags
    spec_constr   = gopt Opt_SpecConstr                   dflags
    liberate_case = gopt Opt_LiberateCase                 dflags
133
    late_dmd_anal = gopt Opt_LateDmdAnal                  dflags
134
    late_specialise = gopt Opt_LateSpecialise             dflags
ian@well-typed.com's avatar
ian@well-typed.com committed
135
136
137
    static_args   = gopt Opt_StaticArgumentTransformation dflags
    rules_on      = gopt Opt_EnableRewriteRules           dflags
    eta_expand_on = gopt Opt_DoLambdaEtaExpansion         dflags
138
    ww_on         = gopt Opt_WorkerWrapper                dflags
Simon Peyton Jones's avatar
Simon Peyton Jones committed
139
    vectorise_on  = gopt Opt_Vectorise                    dflags
140
    static_ptrs   = xopt LangExt.StaticPointers           dflags
141
142
143
144
145
146
147
148

    maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)

    maybe_strictness_before phase
      = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness

    base_mode = SimplMode { sm_phase      = panic "base_mode"
                          , sm_names      = []
149
                          , sm_dflags     = dflags
150
151
152
153
154
155
156
157
158
159
160
161
162
163
                          , sm_rules      = rules_on
                          , sm_eta_expand = eta_expand_on
                          , sm_inline     = True
                          , sm_case_case  = True }

    simpl_phase phase names iter
      = CoreDoPasses
      $   [ maybe_strictness_before phase
          , CoreDoSimplify iter
                (base_mode { sm_phase = Phase phase
                           , sm_names = names })

          , maybe_rule_check (Phase phase) ]

164
          -- Vectorisation can introduce a fair few common sub expressions involving
165
166
          --  DPH primitives. For example, see the Reverse test from dph-examples.
          --  We need to eliminate these common sub expressions before their definitions
167
          --  are inlined in phase 2. The CSE introduces lots of  v1 = v2 bindings,
168
          --  so we also run simpl_gently to inline them.
Simon Peyton Jones's avatar
Simon Peyton Jones committed
169
      ++  (if vectorise_on && phase == 3
170
171
            then [CoreCSE, simpl_gently]
            else [])
172
173

    vectorisation
Simon Peyton Jones's avatar
Simon Peyton Jones committed
174
      = runWhen vectorise_on $
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
          CoreDoPasses [ simpl_gently, CoreDoVectorisation ]

                -- By default, we have 2 phases before phase 0.

                -- Want to run with inline phase 2 after the specialiser to give
                -- maximum chance for fusion to work before we inline build/augment
                -- in phase 1.  This made a difference in 'ansi' where an
                -- overloaded function wasn't inlined till too late.

                -- Need phase 1 so that build/augment get
                -- inlined.  I found that spectral/hartel/genfft lost some useful
                -- strictness in the function sumcode' if augment is not inlined
                -- before strictness analysis runs
    simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
                                | phase <- [phases, phases-1 .. 1] ]


        -- initial simplify: mk specialiser happy: minimum effort please
    simpl_gently = CoreDoSimplify max_iter
                       (base_mode { sm_phase = InitialPhase
                                  , sm_names = ["Gentle"]
                                  , sm_rules = rules_on   -- Note [RULEs enabled in SimplGently]
Simon Peyton Jones's avatar
Simon Peyton Jones committed
197
198
                                  , sm_inline = not vectorise_on
                                              -- See Note [Inline in InitialPhase]
199
200
201
202
                                  , sm_case_case = False })
                          -- Don't do case-of-case transformations.
                          -- This makes full laziness work better

203
204
205
206
207
    strictness_pass = if ww_on
                       then [CoreDoStrictness,CoreDoWorkerWrapper]
                       else [CoreDoStrictness]


208
    -- New demand analyser
209
210
211
212
    demand_analyser = (CoreDoPasses (
                           strictness_pass ++
                           [simpl_phase 0 ["post-worker-wrapper"] max_iter]
                           ))
213

214
    -- Static forms are moved to the top level with the FloatOut pass.
215
    -- See Note [Grand plan for static forms] in StaticPtrTable.
216
    static_ptrs_float_outwards =
lukemaurer's avatar
lukemaurer committed
217
218
219
220
221
222
223
224
225
226
      runWhen static_ptrs $ CoreDoPasses
        [ simpl_gently -- Float Out can't handle type lets (sometimes created
                       -- by simpleOptPgm via mkParallelBindings)
        , CoreDoFloatOutwards FloatOutSwitches
          { floatOutLambdas   = Just 0
          , floatOutConstants = True
          , floatOutOverSatApps = False
          , floatToTopLevelOnly = True
          }
        ]
227

228
229
    core_todo =
     if opt_level == 0 then
230
       [ vectorisation,
231
         static_ptrs_float_outwards,
232
         CoreDoSimplify max_iter
Simon Peyton Jones's avatar
Simon Peyton Jones committed
233
             (base_mode { sm_phase = Phase 0
234
                        , sm_names = ["Non-opt simplification"] })
Simon Peyton Jones's avatar
Simon Peyton Jones committed
235
236
       ]

237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
     else {- opt_level >= 1 -} [

    -- We want to do the static argument transform before full laziness as it
    -- may expose extra opportunities to float things outwards. However, to fix
    -- up the output of the transformation we need at do at least one simplify
    -- after this before anything else
        runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),

        -- We run vectorisation here for now, but we might also try to run
        -- it later
        vectorisation,

        -- initial simplify: mk specialiser happy: minimum effort please
        simpl_gently,

        -- Specialisation is best done before full laziness
        -- so that overloaded functions have all their dictionary lambdas manifest
        runWhen do_specialise CoreDoSpecialising,

256
        if full_laziness then
257
258
259
           CoreDoFloatOutwards FloatOutSwitches {
                                 floatOutLambdas   = Just 0,
                                 floatOutConstants = True,
260
                                 floatOutOverSatApps = False,
261
                                 floatToTopLevelOnly = False }
262
                -- Was: gentleFloatOutSwitches
263
                --
264
265
                -- I have no idea why, but not floating constants to
                -- top level is very bad in some cases.
266
                --
267
268
269
270
271
                -- Notably: p_ident in spectral/rewrite
                --          Changing from "gentle" to "constantsOnly"
                --          improved rewrite's allocation by 19%, and
                --          made 0.0% difference to any other nofib
                --          benchmark
272
                --
273
                -- Not doing floatOutOverSatApps yet, we'll do
274
275
276
277
278
                -- that later on when we've had a chance to get more
                -- accurate arity information.  In fact it makes no
                -- difference at all to performance if we do it here,
                -- but maybe we save some unnecessary to-and-fro in
                -- the simplifier.
279
280
        else
           -- Even with full laziness turned off, we still need to float static
281
282
           -- forms to the top level. See Note [Grand plan for static forms] in
           -- StaticPtrTable.
283
           static_ptrs_float_outwards,
284

285
        simpl_phases,
286

287
288
289
290
291
                -- Phase 0: allow all Ids to be inlined now
                -- This gets foldr inlined before strictness analysis

                -- At least 3 iterations because otherwise we land up with
                -- huge dead expressions because of an infelicity in the
Gabor Greif's avatar
Gabor Greif committed
292
                -- simplifier.
293
294
295
296
297
298
                --      let k = BIG in foldr k z xs
                -- ==>  let k = BIG in letrec go = \xs -> ...(k x).... in go xs
                -- ==>  let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
                -- Don't stop now!
        simpl_phase 0 ["main"] (max max_iter 3),

299
300
301
302
303
304
305
        runWhen do_float_in CoreDoFloatInwards,
            -- Run float-inwards immediately before the strictness analyser
            -- Doing so pushes bindings nearer their use site and hence makes
            -- them more likely to be strict. These bindings might only show
            -- up after the inlining from simplification.  Example in fulsom,
            -- Csg.calc, where an arg of timesDouble thereby becomes strict.

306
307
308
309
310
        runWhen call_arity $ CoreDoPasses
            [ CoreDoCallArity
            , simpl_phase 0 ["post-call-arity"] max_iter
            ],

311
        runWhen strictness demand_analyser,
312

313
314
315
        runWhen exitification CoreDoExitify,
            -- See note [Placement of the exitification pass]

316
317
        runWhen full_laziness $
           CoreDoFloatOutwards FloatOutSwitches {
318
319
                                 floatOutLambdas     = floatLamArgs dflags,
                                 floatOutConstants   = True,
320
321
                                 floatOutOverSatApps = True,
                                 floatToTopLevelOnly = False },
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
                -- nofib/spectral/hartel/wang doubles in speed if you
                -- do full laziness late in the day.  It only happens
                -- after fusion and other stuff, so the early pass doesn't
                -- catch it.  For the record, the redex is
                --        f_el22 (f_el21 r_midblock)


        runWhen cse CoreCSE,
                -- We want CSE to follow the final full-laziness pass, because it may
                -- succeed in commoning up things floated out by full laziness.
                -- CSE used to rely on the no-shadowing invariant, but it doesn't any more

        runWhen do_float_in CoreDoFloatInwards,

        maybe_rule_check (Phase 0),

                -- Case-liberation for -O2.  This should be after
                -- strictness analysis and the simplification which follows it.
        runWhen liberate_case (CoreDoPasses [
            CoreLiberateCase,
            simpl_phase 0 ["post-liberate-case"] max_iter
            ]),         -- Run the simplifier after LiberateCase to vastly
Gabor Greif's avatar
Gabor Greif committed
344
                        -- reduce the possibility of shadowing
345
                        -- Reason: see Note [Shadowing] in SpecConstr.hs
346
347
348
349
350

        runWhen spec_constr CoreDoSpecConstr,

        maybe_rule_check (Phase 0),

351
352
353
354
        runWhen late_specialise
          (CoreDoPasses [ CoreDoSpecialising
                        , simpl_phase 0 ["post-late-spec"] max_iter]),

355
        -- Final clean-up simplification:
356
357
        simpl_phase 0 ["final"] max_iter,

358
359
360
361
        runWhen late_dmd_anal $ CoreDoPasses (
            strictness_pass ++
            [simpl_phase 0 ["post-late-ww"] max_iter]
          ),
362

363
364
365
        -- Final run of the demand_analyser, ensures that one-shot thunks are
        -- really really one-shot thunks. Only needed if the demand analyser
        -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
Simon Peyton Jones's avatar
Simon Peyton Jones committed
366
367
        -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
        -- can become /exponentially/ more expensive. See Trac #11731, #12996.
368
369
        runWhen (strictness || late_dmd_anal) CoreDoStrictness,

370
        maybe_rule_check (Phase 0)
371
372
     ]

373
374
375
376
377
378
379
    -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
    flatten_todos [] = []
    flatten_todos (CoreDoNothing : rest) = flatten_todos rest
    flatten_todos (CoreDoPasses passes : rest) =
      flatten_todos passes ++ flatten_todos rest
    flatten_todos (todo : rest) = todo : flatten_todos rest

Simon Peyton Jones's avatar
Simon Peyton Jones committed
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
{- Note [Inline in InitialPhase]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
confusing for users because when they say INLINE they expect the function to inline
right away.

So now we do inlining immediately, even in the InitialPhase, assuming that the
Id's Activation allows it.

This is a surprisingly big deal. Compiler performance improved a lot
when I made this change:

   perf/compiler/T5837.run            T5837 [stat too good] (normal)
   perf/compiler/parsing001.run       parsing001 [stat too good] (normal)
   perf/compiler/T12234.run           T12234 [stat too good] (optasm)
   perf/compiler/T9020.run            T9020 [stat too good] (optasm)
   perf/compiler/T3064.run            T3064 [stat too good] (normal)
   perf/compiler/T9961.run            T9961 [stat too good] (normal)
   perf/compiler/T13056.run           T13056 [stat too good] (optasm)
   perf/compiler/T9872d.run           T9872d [stat too good] (normal)
   perf/compiler/T783.run             T783 [stat too good] (normal)
   perf/compiler/T12227.run           T12227 [stat too good] (normal)
   perf/should_run/lazy-bs-alloc.run  lazy-bs-alloc [stat too good] (normal)
   perf/compiler/T1969.run            T1969 [stat too good] (normal)
   perf/compiler/T9872a.run           T9872a [stat too good] (normal)
   perf/compiler/T9872c.run           T9872c [stat too good] (normal)
   perf/compiler/T9872b.run           T9872b [stat too good] (normal)
   perf/compiler/T9872d.run           T9872d [stat too good] (normal)

409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
Note [RULEs enabled in SimplGently]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RULES are enabled when doing "gentle" simplification.  Two reasons:

  * We really want the class-op cancellation to happen:
        op (df d1 d2) --> $cop3 d1 d2
    because this breaks the mutual recursion between 'op' and 'df'

  * I wanted the RULE
        lift String ===> ...
    to work in Template Haskell when simplifying
    splices, so we get simpler code for literal strings

But watch out: list fusion can prevent floating.  So use phase control
to switch off those rules until after floating.

Austin Seipp's avatar
Austin Seipp committed
425
426
************************************************************************
*                                                                      *
427
                  The CoreToDo interpreter
Austin Seipp's avatar
Austin Seipp committed
428
429
430
*                                                                      *
************************************************************************
-}
431
432

runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
433
runCorePasses passes guts
434
435
436
  = foldM do_pass guts passes
  where
    do_pass guts CoreDoNothing = return guts
437
    do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
438
    do_pass guts pass
439
440
441
442
       = withTiming getDynFlags
                    (ppr pass <+> brackets (ppr mod))
                    (const ()) $ do
            { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
443
            ; endPass pass (mg_binds guts') (mg_rules guts')
444
            ; return guts' }
445

446
447
    mod = mg_module guts

Joachim Breitner's avatar
Joachim Breitner committed
448
449
450
doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
doCorePass pass@(CoreDoSimplify {})  = {-# SCC "Simplify" #-}
                                       simplifyPgm pass
451

Joachim Breitner's avatar
Joachim Breitner committed
452
453
doCorePass CoreCSE                   = {-# SCC "CommonSubExpr" #-}
                                       doPass cseProgram
454

Joachim Breitner's avatar
Joachim Breitner committed
455
456
doCorePass CoreLiberateCase          = {-# SCC "LiberateCase" #-}
                                       doPassD liberateCase
457

Joachim Breitner's avatar
Joachim Breitner committed
458
doCorePass CoreDoFloatInwards        = {-# SCC "FloatInwards" #-}
459
                                       floatInwards
460

Joachim Breitner's avatar
Joachim Breitner committed
461
462
doCorePass (CoreDoFloatOutwards f)   = {-# SCC "FloatOutwards" #-}
                                       doPassDUM (floatOutwards f)
463

Joachim Breitner's avatar
Joachim Breitner committed
464
465
doCorePass CoreDoStaticArgs          = {-# SCC "StaticArgs" #-}
                                       doPassU doStaticArgs
466

467
468
469
doCorePass CoreDoCallArity           = {-# SCC "CallArity" #-}
                                       doPassD callArityAnalProgram

470
471
472
doCorePass CoreDoExitify             = {-# SCC "Exitify" #-}
                                       doPass exitifyProgram

Joachim Breitner's avatar
Joachim Breitner committed
473
474
doCorePass CoreDoStrictness          = {-# SCC "NewStranal" #-}
                                       doPassDFM dmdAnalProgram
475

Joachim Breitner's avatar
Joachim Breitner committed
476
477
doCorePass CoreDoWorkerWrapper       = {-# SCC "WorkWrap" #-}
                                       doPassDFU wwTopBinds
478

Joachim Breitner's avatar
Joachim Breitner committed
479
480
doCorePass CoreDoSpecialising        = {-# SCC "Specialise" #-}
                                       specProgram
481

Joachim Breitner's avatar
Joachim Breitner committed
482
483
doCorePass CoreDoSpecConstr          = {-# SCC "SpecConstr" #-}
                                       specConstrProgram
484

Joachim Breitner's avatar
Joachim Breitner committed
485
486
doCorePass CoreDoVectorisation       = {-# SCC "Vectorise" #-}
                                       vectorise
487

Joachim Breitner's avatar
Joachim Breitner committed
488
489
490
491
doCorePass CoreDoPrintCore              = observe   printCore
doCorePass (CoreDoRuleCheck phase pat)  = ruleCheckPass phase pat
doCorePass CoreDoNothing                = return
doCorePass (CoreDoPasses passes)        = runCorePasses passes
492

Ben Gamari's avatar
Ben Gamari committed
493
#if defined(GHCI)
Joachim Breitner's avatar
Joachim Breitner committed
494
doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
Simon Jakobi's avatar
Simon Jakobi committed
495
496
#else
doCorePass pass@CoreDoPluginPass {}  = pprPanic "doCorePass" (ppr pass)
497
498
#endif

Simon Jakobi's avatar
Simon Jakobi committed
499
500
501
502
503
doCorePass pass@CoreDesugar          = pprPanic "doCorePass" (ppr pass)
doCorePass pass@CoreDesugarOpt       = pprPanic "doCorePass" (ppr pass)
doCorePass pass@CoreTidy             = pprPanic "doCorePass" (ppr pass)
doCorePass pass@CorePrep             = pprPanic "doCorePass" (ppr pass)
doCorePass pass@CoreOccurAnal        = pprPanic "doCorePass" (ppr pass)
504

Austin Seipp's avatar
Austin Seipp committed
505
506
507
{-
************************************************************************
*                                                                      *
508
\subsection{Core pass combinators}
Austin Seipp's avatar
Austin Seipp committed
509
510
511
*                                                                      *
************************************************************************
-}
512

513
514
515
printCore :: DynFlags -> CoreProgram -> IO ()
printCore dflags binds
    = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
516

517
ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
518
519
520
521
522
523
524
ruleCheckPass current_phase pat guts =
    withTiming getDynFlags
               (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
               (const ()) $ do
    { rb <- getRuleBase
    ; dflags <- getDynFlags
    ; vis_orphs <- getVisibleOrphanMods
525
526
    ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn
                        ++ (mg_rules guts)
Ben Gamari's avatar
Ben Gamari committed
527
    ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan
Sylvain Henry's avatar
Sylvain Henry committed
528
                   (defaultDumpStyle dflags)
529
                   (ruleCheckProgram current_phase pat
530
                      rule_fn (mg_binds guts))
531
    ; return guts }
532

533
doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
534
535
536
537
538
doPassDUM do_pass = doPassM $ \binds -> do
    dflags <- getDynFlags
    us     <- getUniqueSupplyM
    liftIO $ do_pass dflags us binds

539
doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
540
541
doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))

542
doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
543
544
doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)

545
doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
546
547
doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)

548
doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
549
550
doPassU do_pass = doPassDU (const do_pass)

551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDFM do_pass guts = do
    dflags <- getDynFlags
    p_fam_env <- getPackageFamInstEnv
    let fam_envs = (p_fam_env, mg_fam_inst_env guts)
    doPassM (liftIO . do_pass dflags fam_envs) guts

doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
doPassDFU do_pass guts = do
    dflags <- getDynFlags
    us     <- getUniqueSupplyM
    p_fam_env <- getPackageFamInstEnv
    let fam_envs = (p_fam_env, mg_fam_inst_env guts)
    doPass (do_pass dflags fam_envs us) guts

566
567
-- Most passes return no stats and don't change rules: these combinators
-- let us lift them to the full blown ModGuts+CoreM world
568
doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
569
570
571
572
doPassM bind_f guts = do
    binds' <- bind_f (mg_binds guts)
    return (guts { mg_binds = binds' })

573
doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
574
doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
575
576

-- Observer passes just peek; don't modify the bindings at all
577
observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
578
579
observe do_pass = doPassM $ \binds -> do
    dflags <- getDynFlags
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
580
    _ <- liftIO $ do_pass dflags binds
581
    return binds
582

Austin Seipp's avatar
Austin Seipp committed
583
584
585
{-
************************************************************************
*                                                                      *
586
        Gentle simplification
Austin Seipp's avatar
Austin Seipp committed
587
588
589
*                                                                      *
************************************************************************
-}
590

591
simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
592
593
             -> CoreExpr
             -> IO CoreExpr
594
595
596
597
598
-- simplifyExpr is called by the driver to simplify an
-- expression typed in at the interactive prompt
--
-- Also used by Template Haskell
simplifyExpr dflags expr
599
600
  = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
    do  {
601
        ; us <-  mkSplitUniqSupply 's'
602

603
        ; let sz = exprSize expr
pcapriotti's avatar
pcapriotti committed
604

605
606
607
        ; (expr', counts) <- initSmpl dflags emptyRuleEnv
                               emptyFamInstEnvs us sz
                               (simplExprGently (simplEnvForGHCi dflags) expr)
608

609
        ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
610
                  "Simplifier statistics" (pprSimplCount counts)
611

612
613
        ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
                        (pprCoreExpr expr')
614

615
616
        ; return expr'
        }
617

618
simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
619
620
621
622
-- Simplifies an expression
--      does occurrence analysis, then simplification
--      and repeats (twice currently) because one pass
--      alone leaves tons of crud.
623
-- Used (a) for user expressions typed in at the interactive prompt
624
625
--      (b) the LHS and RHS of a RULE
--      (c) Template Haskell splices
626
--
627
-- The name 'Gently' suggests that the SimplMode is SimplGently,
628
629
630
-- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
-- enforce that; it just simplifies the expression twice

631
632
633
634
635
-- It's important that simplExprGently does eta reduction; see
-- Note [Simplifying the left-hand side of a RULE] above.  The
-- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
-- but only if -O is on.

636
637
simplExprGently env expr = do
    expr1 <- simplExpr env (occurAnalyseExpr expr)
638
    simplExpr env (occurAnalyseExpr expr1)
639

Austin Seipp's avatar
Austin Seipp committed
640
641
642
{-
************************************************************************
*                                                                      *
643
\subsection{The driver for the simplifier}
Austin Seipp's avatar
Austin Seipp committed
644
645
646
*                                                                      *
************************************************************************
-}
647

648
649
650
simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
simplifyPgm pass guts
  = do { hsc_env <- getHscEnv
651
652
       ; us <- getUniqueSupplyM
       ; rb <- getRuleBase
653
654
       ; liftIOWithCount $
         simplifyPgmIO pass hsc_env us rb guts }
655

656
simplifyPgmIO :: CoreToDo
657
658
659
660
661
              -> HscEnv
              -> UniqSupply
              -> RuleBase
              -> ModGuts
              -> IO (SimplCount, ModGuts)  -- New bindings
662

663
simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
664
              hsc_env us hpt_rule_base
665
              guts@(ModGuts { mg_module = this_mod
666
                            , mg_rdr_env = rdr_env
667
                            , mg_deps = deps
668
                            , mg_binds = binds, mg_rules = rules
669
                            , mg_fam_inst_env = fam_inst_env })
670
671
  = do { (termination_msg, it_count, counts_out, guts')
           <- do_iteration us 1 [] binds rules
672

Jan Stolarek's avatar
Jan Stolarek committed
673
674
        ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
                                dopt Opt_D_dump_simpl_stats  dflags)
675
                  "Simplifier statistics for following pass"
Jan Stolarek's avatar
Jan Stolarek committed
676
677
                  (vcat [text termination_msg <+> text "after" <+> ppr it_count
                                              <+> text "iterations",
678
679
                         blankLine,
                         pprSimplCount counts_out])
680

681
        ; return (counts_out, guts')
682
683
    }
  where
684
685
686
    dflags       = hsc_dflags hsc_env
    print_unqual = mkPrintUnqualified dflags rdr_env
    simpl_env    = mkSimplEnv mode
687
688
    active_rule  = activeRule mode
    active_unf   = activeUnfolding mode
689

690
    do_iteration :: UniqSupply
691
692
693
694
695
                 -> Int          -- Counts iterations
                 -> [SimplCount] -- Counts from earlier iterations, reversed
                 -> CoreProgram  -- Bindings in
                 -> [CoreRule]   -- and orphan rules
                 -> IO (String, Int, SimplCount, ModGuts)
696

697
    do_iteration us iteration_no counts_so_far binds rules
698
699
700
        -- iteration_no is the number of the iteration we are
        -- about to begin, with '1' for the first
      | iteration_no > max_iterations   -- Stop if we've run out of iterations
701
      = WARN( debugIsOn && (max_iterations > 2)
702
703
            , hang (text "Simplifier bailing out after" <+> int max_iterations
                    <+> text "iterations"
704
705
                    <+> (brackets $ hsep $ punctuate comma $
                         map (int . simplCountN) (reverse counts_so_far)))
706
                 2 (text "Size =" <+> ppr (coreBindsStats binds)))
707

708
709
710
                -- Subtract 1 from iteration_no to get the
                -- number of iterations we actually completed
        return ( "Simplifier baled out", iteration_no - 1
711
712
               , totalise counts_so_far
               , guts { mg_binds = binds, mg_rules = rules } )
713

714
715
      -- Try and force thunks off the binds; significantly reduces
      -- space usage, especially with -O.  JRS, 000620.
716
      | let sz = coreBindsSize binds
717
      , () <- sz `seq` ()     -- Force it
718
      = do {
719
                -- Occurrence analysis
Gabor Greif's avatar
Gabor Greif committed
720
           let {   -- Note [Vectorisation declarations and occurrences]
721
722
                   -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                   -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
723
                   -- that the right-hand sides of vectorisation declarations are taken into
724
725
726
727
                   -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
                   -- that the binders representing variable vectorisation declarations are kept alive.
                   -- (In contrast to automatically vectorised variables, their unvectorised versions
                   -- don't depend on them.)
728
                 vectVars = mkVarSet $
niteria's avatar
niteria committed
729
                              catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
730
                                        | Vect bndr _ <- mg_vect_decls guts]
731
                              ++
niteria's avatar
niteria committed
732
                              catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
733
734
735
736
                                        | bndr <- bindersOfBinds binds]
                                        -- FIXME: This second comprehensions is only needed as long as we
                                        --        have vectorised bindings where we get "Could NOT call
                                        --        vectorised from original version".
737
              ;  (maybeVects, maybeVectVars)
738
739
740
                   = case sm_phase mode of
                       InitialPhase -> (mg_vect_decls guts, vectVars)
                       _            -> ([], vectVars)
741
               ; tagged_binds = {-# SCC "OccAnal" #-}
742
                     occurAnalysePgm this_mod active_unf active_rule rules
743
                                     maybeVects maybeVectVars binds
744
745
746
               } ;
           Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
                     (pprCoreBindings tagged_binds);
747

748
                -- Get any new rules, and extend the rule base
749
                -- See Note [Overall plumbing for rules] in Rules.hs
750
751
752
753
754
755
756
                -- We need to do this regularly, because simplification can
                -- poke on IdInfo thunks, which in turn brings in new rules
                -- behind the scenes.  Otherwise there's a danger we'll simply
                -- miss the rules for Ids hidden inside imported inlinings
           eps <- hscEPS hsc_env ;
           let  { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
                ; rule_base2 = extendRuleBaseList rule_base1 rules
757
758
                ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
                ; vis_orphs = this_mod : dep_orphs deps } ;
759
760

                -- Simplify the program
761
762
           ((binds1, rules1), counts1) <-
             initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
763
764
               do { (floats, env1) <- {-# SCC "SimplTopBinds" #-}
                                      simplTopBinds simpl_env tagged_binds
765
766
767
768
769

                      -- Apply the substitution to rules defined in this module
                      -- for imported Ids.  Eg  RULE map my_f = blah
                      -- If we have a substitution my_f :-> other_f, we'd better
                      -- apply it to the rule to, or it'll never match
770
                  ; rules1 <- simplRules env1 Nothing rules
771

772
                  ; return (getTopFloatBinds floats, rules1) } ;
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790

                -- Stop if nothing happened; don't dump output
           if isZeroSimplCount counts1 then
                return ( "Simplifier reached fixed point", iteration_no
                       , totalise (counts1 : counts_so_far)  -- Include "free" ticks
                       , guts { mg_binds = binds1, mg_rules = rules1 } )
           else do {
                -- Short out indirections
                -- We do this *after* at least one run of the simplifier
                -- because indirection-shorting uses the export flag on *occurrences*
                -- and that isn't guaranteed to be ok until after the first run propagates
                -- stuff from the binding site to its occurrences
                --
                -- ToDo: alas, this means that indirection-shorting does not happen at all
                --       if the simplifier does nothing (not common, I know, but unsavoury)
           let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;

                -- Dump the result of this iteration
791
           dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
792
           lintPassResult hsc_env pass binds2 ;
793
794
795

                -- Loop
           do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
pcapriotti's avatar
pcapriotti committed
796
           } }
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
797
      | otherwise = panic "do_iteration"
798
      where
799
        (us1, us2) = splitUniqSupply us
800

801
        -- Remember the counts_so_far are reversed
802
        totalise :: [SimplCount] -> SimplCount
803
804
        totalise = foldr (\c acc -> acc `plusSimplCount` c)
                         (zeroSimplCount dflags)
805

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
806
807
simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"

808
-------------------
809
810
811
812
dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
                   -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
dump_end_iteration dflags print_unqual iteration_no counts binds rules
  = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
813
  where
Jan Stolarek's avatar
Jan Stolarek committed
814
    mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
815
816
            | otherwise                               = Nothing
            -- Show details if Opt_D_dump_simpl_iterations is on
817

818
819
    hdr = text "Simplifier iteration=" <> int iteration_no
    pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
820
                     , pprSimplCount counts
821
                     , text "---- End of simplifier counts for" <+> hdr ]
822

Austin Seipp's avatar
Austin Seipp committed
823
824
825
{-
************************************************************************
*                                                                      *
826
                Shorting out indirections
Austin Seipp's avatar
Austin Seipp committed
827
828
*                                                                      *
************************************************************************
829

830
If we have this:
831

832
833
834
        x_local = <expression>
        ...bindings...
        x_exported = x_local
835
836
837

where x_exported is exported, and x_local is not, then we replace it with this:

838
839
840
        x_exported = <expression>
        x_local = x_exported
        ...bindings...
841
842
843
844
845
846

Without this we never get rid of the x_exported = x_local thing.  This
save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
makes strictness information propagate better.  This used to happen in
the final phase, but it's tidier to do it here.

847
848
849
850
Note [Transferring IdInfo]
~~~~~~~~~~~~~~~~~~~~~~~~~~
We want to propagage any useful IdInfo on x_local to x_exported.

851
852
853
854
855
856
STRICTNESS: if we have done strictness analysis, we want the strictness info on
x_local to transfer to x_exported.  Hence the copyIdInfo call.

RULES: we want to *add* any RULES for x_local to x_exported.


simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
857
Note [Messing up the exported Id's RULES]
858
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
859
860
861
We must be careful about discarding (obviously) or even merging the
RULES on the exported Id. The example that went bad on me at one stage
was this one:
862

863
    iterate :: (a -> a) -> a -> [a]
864
865
866
        [Exported]
    iterate = iterateList

867
868
    iterateFB c f x = x `c` iterateFB c f (f x)
    iterateList f x =  x : iterateList f (f x)
869
870
        [Not exported]

871
    {-# RULES
872
873
    "iterate"   forall f x.     iterate f x = build (\c _n -> iterateFB c f x)
    "iterateFB"                 iterateFB (:) = iterateList
874
875
876
877
878
879
     #-}

This got shorted out to:

    iterateList :: (a -> a) -> a -> [a]
    iterateList = iterate
880

881
882
    iterateFB c f x = x `c` iterateFB c f (f x)
    iterate f x =  x : iterate f (f x)
883

884
    {-# RULES
885
886
    "iterate"   forall f x.     iterate f x = build (\c _n -> iterateFB c f x)
    "iterateFB"                 iterateFB (:) = iterate
887
888
     #-}

889
890
891
892
And now we get an infinite loop in the rule system
        iterate f x -> build (\cn -> iterateFB c f x)
                    -> iterateFB (:) f x
                    -> iterate f x
893

894
895
896
Old "solution":
        use rule switching-off pragmas to get rid
        of iterateList in the first place
897

898
899
900
901
902
903
904
905
906
907
908
909
910
911
But in principle the user *might* want rules that only apply to the Id
he says.  And inline pragmas are similar
   {-# NOINLINE f #-}
   f = local
   local = <stuff>
Then we do not want to get rid of the NOINLINE.

Hence hasShortableIdinfo.


Note [Rules and indirection-zapping]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Problem: what if x_exported has a RULE that mentions something in ...bindings...?
Then the things mentioned can be out of scope!  Solution
912
913
914
 a) Make sure that in this pass the usage-info from x_exported is
        available for ...bindings...
 b) If there are any such RULES, rec-ify the entire top-level.
915
    It'll get sorted out next time round
916
917
918
919
920
921

Other remarks
~~~~~~~~~~~~~
If more than one exported thing is equal to a local thing (i.e., the
local thing really is shared), then we do one only:
\begin{verbatim}
922
923
924
        x_local = ....
        x_exported1 = x_local
        x_exported2 = x_local
925
==>
926
        x_exported1 = ....
927

928
        x_exported2 = x_exported1
929
930
931
932
\end{verbatim}

We rely on prior eta reduction to simplify things like
\begin{verbatim}
933
        x_exported = /\ tyvars -> x_local tyvars
934
==>
935
        x_exported = x_local
936
937
938
\end{verbatim}
Hence,there's a possibility of leaving unchanged something like this:
\begin{verbatim}
939
940
        x_local = ....
        x_exported1 = x_local Int
941
\end{verbatim}
942
By the time we've thrown away the types in STG land this
943
could be eliminated.  But I don't think it's very common
944
and it's dangerous to do this fiddling in STG land
945
946
because we might elminate a binding that's mentioned in the
unfolding for something.
Peter Wortmann's avatar
Peter Wortmann committed
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965

Note [Indirection zapping and ticks]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Unfortunately this is another place where we need a special case for
ticks. The following happens quite regularly:

        x_local = <expression>
        x_exported = tick<x> x_local

Which we want to become:

        x_exported =  tick<x> <expression>

As it makes no sense to keep the tick and the expression on separate
bindings. Note however that that this might increase the ticks scoping
over the execution of x_local, so we can only do this for floatable
ticks. More often than not, other references will be unfoldings of
x_exported, and therefore carry the tick anyway.
Austin Seipp's avatar
Austin Seipp committed
966
-}
967

Peter Wortmann's avatar
Peter Wortmann committed
968
type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
969

970
shortOutIndirections :: CoreProgram -> CoreProgram
971
972
shortOutIndirections binds
  | isEmptyVarEnv ind_env = binds
973
974
  | no_need_to_flatten    = binds'                      -- See Note [Rules and indirect-zapping]
  | otherwise             = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
975
  where
976
    ind_env            = makeIndEnv binds
Peter Wortmann's avatar
Peter Wortmann committed
977
    -- These exported Ids are the subjects  of the indirection-elimination
978
979
980
981
    exp_ids            = map fst $ nonDetEltsUFM ind_env
      -- It's OK to use nonDetEltsUFM here because we forget the ordering
      -- by immediately converting to a set or check if all the elements
      -- satisfy a predicate.
Peter Wortmann's avatar
Peter Wortmann committed
982
    exp_id_set         = mkVarSet exp_ids
983
    no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
984
    binds'             = concatMap zap binds
985
986

    zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
987
    zap (Rec pairs)       = [Rec (concatMap zapPair pairs)]
988
989

    zapPair (bndr, rhs)
Peter Wortmann's avatar
Peter Wortmann committed
990
991
992
993
994
995
        | bndr `elemVarSet` exp_id_set = []
        | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
                                       = [(transferIdInfo exp_id bndr,
                                           mkTicks ticks rhs),
                                          (bndr, Var exp_id)]
        | otherwise                    = [(bndr,rhs)]
996

997
998
999
1000
1001
1002
makeIndEnv :: [CoreBind] -> IndEnv
makeIndEnv binds
  = foldr add_bind emptyVarEnv binds
  where
    add_bind :: CoreBind -> IndEnv -> IndEnv
    add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
1003
    add_bind (Rec pairs)              env = foldr add_pair env pairs
1004
1005

    add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv