CoreSyn.hs 92 KB
Newer Older
Austin Seipp's avatar
Austin Seipp committed
1 2 3 4
{-
(c) The University of Glasgow 2006
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
-}
Simon Marlow's avatar
Simon Marlow committed
5

lukemaurer's avatar
lukemaurer committed
6
{-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7
{-# LANGUAGE NamedFieldPuns #-}
8
{-# LANGUAGE BangPatterns #-}
Ian Lynagh's avatar
Ian Lynagh committed
9

10
-- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
11
module CoreSyn (
12
        -- * Main data types
Peter Wortmann's avatar
Peter Wortmann committed
13 14
        Expr(..), Alt, Bind(..), AltCon(..), Arg,
        Tickish(..), TickishScoping(..), TickishPlacement(..),
15
        CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
16
        TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
17

18 19 20 21
        -- * In/Out type synonyms
        InId, InBind, InExpr, InAlt, InArg, InType, InKind,
               InBndr, InVar, InCoercion, InTyVar, InCoVar,
        OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
Ningning Xie's avatar
Ningning Xie committed
22
               OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar, MOutCoercion,
23

24
        -- ** 'Expr' construction
25
        mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams,
lukemaurer's avatar
lukemaurer committed
26
        mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
27 28 29 30 31 32 33 34 35 36

        mkIntLit, mkIntLitInt,
        mkWordLit, mkWordLitWord,
        mkWord64LitWord64, mkInt64LitInt64,
        mkCharLit, mkStringLit,
        mkFloatLit, mkFloatLitFloat,
        mkDoubleLit, mkDoubleLitDouble,

        mkConApp, mkConApp2, mkTyBind, mkCoBind,
        varToCoreExpr, varsToCoreExprs,
37

38
        isId, cmpAltCon, cmpAlt, ltAlt,
39 40 41

        -- ** Simple 'Expr' access functions and predicates
        bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
42
        collectBinders, collectTyBinders, collectTyAndValBinders,
lukemaurer's avatar
lukemaurer committed
43
        collectNBinders,
44
        collectArgs, stripNArgs, collectArgsTicks, flattenBinds,
45

46 47 48
        exprToType, exprToCoercion_maybe,
        applyTypeToArg,

49
        isValArg, isTypeArg, isCoArg, isTyCoArg, valArgCount, valBndrCount,
50
        isRuntimeArg, isRuntimeVar,
51

52
        -- * Tick-related functions
Peter Wortmann's avatar
Peter Wortmann committed
53 54 55 56
        tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
        tickishCanSplit, mkNoCount, mkNoScope,
        tickishIsCode, tickishPlace,
        tickishContains,
57 58

        -- * Unfolding data types
59 60
        Unfolding(..),  UnfoldingGuidance(..), UnfoldingSource(..),

61
        -- ** Constructing 'Unfolding's
62
        noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
63
        unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
64 65 66 67 68

        -- ** Predicates and deconstruction on 'Unfolding'
        unfoldingTemplate, expandUnfolding_maybe,
        maybeUnfoldingTemplate, otherCons,
        isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
69
        isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
70
        isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
71
        isBootUnfolding,
72 73 74 75
        canUnfold, neverUnfoldGuidance, isStableSource,

        -- * Annotated expression data types
        AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
76

77
        -- ** Operations on annotated expressions
Peter Wortmann's avatar
Peter Wortmann committed
78
        collectAnnArgs, collectAnnArgsTicks,
79

80
        -- ** Operations on annotations
81
        deAnnotate, deAnnotate', deAnnAlt, deAnnBind,
lukemaurer's avatar
lukemaurer committed
82
        collectAnnBndrs, collectNAnnBndrs,
83

84
        -- * Orphanhood
85
        IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
86

87
        -- * Core rule data types
88
        CoreRule(..), RuleBase,
89
        RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
90
        RuleEnv(..), mkRuleEnv, emptyRuleEnv,
91

92
        -- ** Operations on 'CoreRule's
93
        ruleArity, ruleName, ruleIdName, ruleActivation,
94
        setRuleIdName, ruleModule,
95
        isBuiltinRule, isLocalRule, isAutoRule,
96 97
    ) where

98
#include "HsVersions.h"
99

100 101
import GhcPrelude

Simon Marlow's avatar
Simon Marlow committed
102
import CostCentre
103
import VarEnv( InScopeSet )
Simon Marlow's avatar
Simon Marlow committed
104 105 106 107
import Var
import Type
import Coercion
import Name
108
import NameSet
109
import NameEnv( NameEnv, emptyNameEnv )
Simon Marlow's avatar
Simon Marlow committed
110 111
import Literal
import DataCon
112
import Module
Simon Marlow's avatar
Simon Marlow committed
113
import BasicTypes
114
import DynFlags
115
import Outputable
twanvl's avatar
twanvl committed
116
import Util
David Feuer's avatar
David Feuer committed
117
import UniqSet
Peter Wortmann's avatar
Peter Wortmann committed
118
import SrcLoc     ( RealSrcSpan, containsSpan )
119
import Binary
120

121
import Data.Data hiding (TyCon)
122
import Data.Int
123 124
import Data.Word

125
infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
126
-- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
127

Austin Seipp's avatar
Austin Seipp committed
128 129 130
{-
************************************************************************
*                                                                      *
131
\subsection{The main data types}
Austin Seipp's avatar
Austin Seipp committed
132 133
*                                                                      *
************************************************************************
134

135
These data types are the heart of the compiler
Austin Seipp's avatar
Austin Seipp committed
136
-}
137

138
-- | This is the data type that represents GHCs core intermediate language. Currently
139
-- GHC uses System FC <https://www.microsoft.com/en-us/research/publication/system-f-with-type-equality-coercions/> for this purpose,
140 141 142 143 144
-- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
--
-- We get from Haskell source to this Core language in a number of stages:
--
-- 1. The source code is parsed into an abstract syntax tree, which is represented
145
--    by the data type 'GHC.Hs.Expr.HsExpr' with the names being 'RdrName.RdrNames'
146 147
--
-- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
148
--    (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
149 150 151 152 153 154 155 156 157 158 159 160 161
--    For example, this program:
--
-- @
--      f x = let f x = x + 1
--            in f (x - 2)
-- @
--
--    Would be renamed by having 'Unique's attached so it looked something like this:
--
-- @
--      f_1 x_2 = let f_3 x_4 = x_4 + 1
--                in f_3 (x_2 - 2)
-- @
162
--    But see Note [Shadowing] below.
163 164
--
-- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
165
--    type class arguments) to yield a 'GHC.Hs.Expr.HsExpr' type that has 'Id.Id' as it's names.
166
--
167
-- 4. Finally the syntax tree is /desugared/ from the expressive 'GHC.Hs.Expr.HsExpr' type into
168 169 170 171
--    this 'Expr' type, which has far fewer constructors and hence is easier to perform
--    optimization, analysis and code generation on.
--
-- The type parameter @b@ is for the type of binders in the expression tree.
172 173 174 175
--
-- The language consists of the following elements:
--
-- *  Variables
176
--    See Note [Variable occurrences in Core]
177 178 179 180
--
-- *  Primitive literals
--
-- *  Applications: note that the argument may be a 'Type'.
181 182
--    See Note [CoreSyn let/app invariant]
--    See Note [Levity polymorphism invariants]
183 184
--
-- *  Lambda abstraction
185
--    See Note [Levity polymorphism invariants]
186 187 188 189
--
-- *  Recursive and non recursive @let@s. Operationally
--    this corresponds to allocating a thunk for the things
--    bound and then executing the sub-expression.
190
--
191
--    See Note [CoreSyn letrec invariant]
192
--    See Note [CoreSyn let/app invariant]
193
--    See Note [Levity polymorphism invariants]
194
--    See Note [CoreSyn type and coercion invariant]
195
--
196
-- *  Case expression. Operationally this corresponds to evaluating
197 198 199
--    the scrutinee (expression examined) to weak head normal form
--    and then examining at most one level of resulting constructor (i.e. you
--    cannot do nested pattern matching directly with this).
200
--
201 202
--    The binder gets bound to the value of the scrutinee,
--    and the 'Type' must be that of all the case alternatives
203
--
204
--    IMPORTANT: see Note [Case expression invariants]
Ben Gamari's avatar
Ben Gamari committed
205
--
206 207
-- *  Cast an expression to a particular type.
--    This is used to implement @newtype@s (a @newtype@ constructor or
208 209 210 211 212 213 214 215
--    destructor just becomes a 'Cast' in Core) and GADTs.
--
-- *  Notes. These allow general information to be added to expressions
--    in the syntax tree
--
-- *  A type: this should only show up at the top level of an Arg
--
-- *  A coercion
216

217 218 219 220 221 222 223 224 225 226 227
{- Note [Why does Case have a 'Type' field?]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The obvious alternative is
   exprType (Case scrut bndr alts)
     | (_,_,rhs1):_ <- alts
     = exprType rhs1

But caching the type in the Case constructor
  exprType (Case scrut bndr ty alts) = ty
is better for at least three reasons:

228
* It works when there are no alternatives (see case invariant 1 above)
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251

* It might be faster in deeply-nested situations.

* It might not be quite the same as (exprType rhs) for one
  of the RHSs in alts. Consider a phantom type synonym
       type S a = Int
   and we want to form the case expression
        case x of { K (a::*) -> (e :: S a) }
   Then exprType of the RHS is (S a), but we cannot make that be
   the 'ty' in the Case constructor because 'a' is simply not in
   scope there. Instead we must expand the synonym to Int before
   putting it in the Case constructor.  See CoreUtils.mkSingleAltCase.

   So we'd have to do synonym expansion in exprType which would
   be inefficient.

* The type stored in the case is checked with lintInTy. This checks
  (among other things) that it does not mention any variables that are
  not in scope. If we did not have the type there, it would be a bit
  harder for Core Lint to reject case blah of Ex x -> x where
      data Ex = forall a. Ex a.
-}

252
-- If you edit this type, you may need to update the GHC formalism
253
-- See Note [GHC Formalism] in coreSyn/CoreLint.hs
254
data Expr b
255
  = Var   Id
256 257 258 259
  | Lit   Literal
  | App   (Expr b) (Arg b)
  | Lam   b (Expr b)
  | Let   (Bind b) (Expr b)
260 261
  | Case  (Expr b) b Type [Alt b]   -- See Note [Case expression invariants]
                                    -- and Note [Why does Case have a 'Type' field?]
262
  | Cast  (Expr b) Coercion
263
  | Tick  (Tickish Id) (Expr b)
264 265
  | Type  Type
  | Coercion Coercion
266
  deriving Data
267 268 269 270 271 272 273 274

-- | Type synonym for expressions that occur in function argument positions.
-- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
type Arg b = Expr b

-- | A case split alternative. Consists of the constructor leading to the alternative,
-- the variables bound from the constructor, and the expression to be executed given that binding.
-- The default alternative is @(DEFAULT, [], rhs)@
275 276

-- If you edit this type, you may need to update the GHC formalism
277
-- See Note [GHC Formalism] in coreSyn/CoreLint.hs
278 279 280
type Alt b = (AltCon, [b], Expr b)

-- | A case alternative constructor (i.e. pattern match)
281 282

-- If you edit this type, you may need to update the GHC formalism
283
-- See Note [GHC Formalism] in coreSyn/CoreLint.hs
284
data AltCon
285 286 287 288 289
  = DataAlt DataCon   --  ^ A plain data constructor: @case e of { Foo x -> ... }@.
                      -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@

  | LitAlt  Literal   -- ^ A literal: @case e of { 1 -> ... }@
                      -- Invariant: always an *unlifted* literal
290 291
                      -- See Note [Literal alternatives]

292
  | DEFAULT           -- ^ Trivial alternative: @case e of { _ -> ... }@
niteria's avatar
niteria committed
293
   deriving (Eq, Data)
294

295 296 297
-- This instance is a bit shady. It can only be used to compare AltCons for
-- a single type constructor. Fortunately, it seems quite unlikely that we'll
-- ever need to compare AltCons for different type constructors.
298
-- The instance adheres to the order described in [CoreSyn case invariants]
299 300 301 302
instance Ord AltCon where
  compare (DataAlt con1) (DataAlt con2) =
    ASSERT( dataConTyCon con1 == dataConTyCon con2 )
    compare (dataConTag con1) (dataConTag con2)
303 304
  compare (DataAlt _) _ = GT
  compare _ (DataAlt _) = LT
305
  compare (LitAlt l1) (LitAlt l2) = compare l1 l2
306
  compare (LitAlt _) DEFAULT = GT
307
  compare DEFAULT DEFAULT = EQ
308
  compare DEFAULT _ = LT
309

310
-- | Binding, used for top level bindings in a module and local bindings in a @let@.
311 312

-- If you edit this type, you may need to update the GHC formalism
313
-- See Note [GHC Formalism] in coreSyn/CoreLint.hs
314
data Bind b = NonRec b (Expr b)
315
            | Rec [(b, (Expr b))]
316
  deriving Data
317

Austin Seipp's avatar
Austin Seipp committed
318
{-
319 320 321 322 323 324 325 326 327 328 329 330 331
Note [Shadowing]
~~~~~~~~~~~~~~~~
While various passes attempt to rename on-the-fly in a manner that
avoids "shadowing" (thereby simplifying downstream optimizations),
neither the simplifier nor any other pass GUARANTEES that shadowing is
avoided. Thus, all passes SHOULD work fine even in the presence of
arbitrary shadowing in their inputs.

In particular, scrutinee variables `x` in expressions of the form
`Case e x t` are often renamed to variables with a prefix
"wild_". These "wild" variables may appear in the body of the
case-expression, and further, may be shadowed within the body.

Gabor Greif's avatar
Gabor Greif committed
332
So the Unique in a Var is not really unique at all.  Still, it's very
333 334 335 336 337 338
useful to give a constant-time equality/ordering for Vars, and to give
a key that can be used to make sets of Vars (VarSet), or mappings from
Vars to other things (VarEnv).   Moreover, if you do want to eliminate
shadowing, you can give a new Unique to an Id without changing its
printable name, which makes debugging easier.

339 340 341 342 343
Note [Literal alternatives]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
We have one literal, a literal Integer, that is lifted, and we don't
allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
344
(see #5603) if you say
345 346 347 348 349 350 351 352
    case 3 of
      S# x -> ...
      J# _ _ -> ...
(where S#, J# are the constructors for Integer) we don't want the
simplifier calling findAlt with argument (LitAlt 3).  No no.  Integer
literals are an opaque encoding of an algebraic data type, not of
an unlifted literal, like all the others.

Ben Gamari's avatar
Ben Gamari committed
353
Also, we do not permit case analysis with literal patterns on floating-point
354
types. See #9238 and Note [Rules for floating-point comparisons] in
Ben Gamari's avatar
Ben Gamari committed
355
PrelRules for the rationale for this restriction.
356

357 358
-------------------------- CoreSyn INVARIANTS ---------------------------

359
Note [Variable occurrences in Core]
360
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
361 362 363 364 365 366 367
Variable /occurrences/ are never CoVars, though /bindings/ can be.
All CoVars appear in Coercions.

For example
  \(c :: Age~#Int) (d::Int). d |> (sym c)
Here 'c' is a CoVar, which is lambda-bound, but it /occurs/ in
a Coercion, (sym c).
368 369 370

Note [CoreSyn letrec invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
371 372 373 374 375 376 377
The right hand sides of all top-level and recursive @let@s
/must/ be of lifted type (see "Type#type_classification" for
the meaning of /lifted/ vs. /unlifted/).

There is one exception to this rule, top-level @let@s are
allowed to bind primitive string literals: see
Note [CoreSyn top-level string literals].
378

379 380 381 382 383 384 385 386 387 388 389 390 391
Note [CoreSyn top-level string literals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As an exception to the usual rule that top-level binders must be lifted,
we allow binding primitive string literals (of type Addr#) of type Addr# at the
top level. This allows us to share string literals earlier in the pipeline and
crucially allows other optimizations in the Core2Core pipeline to fire.
Consider,

  f n = let a::Addr# = "foo"#
        in \x -> blah

In order to be able to inline `f`, we would like to float `a` to the top.
Another option would be to inline `a`, but that would lead to duplicating string
392
literals, which we want to avoid. See #8472.
393 394 395 396 397

The solution is simply to allow top-level unlifted binders. We can't allow
arbitrary unlifted expression at the top-level though, unlifted binders cannot
be thunks, so we just allow string literals.

398 399 400
We allow the top-level primitive string literals to be wrapped in Ticks
in the same way they can be wrapped when nested in an expression.
CoreToSTG currently discards Ticks around top-level primitive string literals.
401
See #14779.
402

403 404 405 406 407 408 409 410
Also see Note [Compilation plan for top-level string literals].

Note [Compilation plan for top-level string literals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Here is a summary on how top-level string literals are handled by various
parts of the compilation pipeline.

* In the source language, there is no way to bind a primitive string literal
411
  at the top level.
412 413 414 415 416 417 418 419 420 421 422 423

* In Core, we have a special rule that permits top-level Addr# bindings. See
  Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
  new top-level string literals.

* In STG, top-level string literals are explicitly represented in the syntax
  tree.

* A top-level string literal may end up exported from a module. In this case,
  in the object file, the content of the exported literal is given a label with
  the _bytes suffix.

424 425
Note [CoreSyn let/app invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
426
The let/app invariant
Javran Cheng's avatar
Javran Cheng committed
427
     the right hand side of a non-recursive 'Let', and
428 429
     the argument of an 'App',
    /may/ be of unlifted type, but only if
lukemaurer's avatar
lukemaurer committed
430 431
    the expression is ok-for-speculation
    or the 'Let' is for a join point.
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446

This means that the let can be floated around
without difficulty. For example, this is OK:

   y::Int# = x +# 1#

But this is not, as it may affect termination if the
expression is floated out:

   y::Int# = fac 4#

In this situation you should use @case@ rather than a @let@. The function
'CoreUtils.needsCaseBinding' can help you determine which to generate, or
alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
which will generate a @case@ if necessary
447

448 449
The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
coreSyn/MkCore.
450

451 452 453
For discussion of some implications of the let/app invariant primops see
Note [Checking versus non-checking primops] in PrimOp.

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
Note [Case expression invariants]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Case expressions are one of the more complicated elements of the Core
language, and come with a number of invariants.  All of them should be
checked by Core Lint.

1. The list of alternatives may be empty;
   See Note [Empty case alternatives]

2. The 'DEFAULT' case alternative must be first in the list,
   if it occurs at all.  Checked in CoreLint.checkCaseAlts.

3. The remaining cases are in order of (strictly) increasing
     tag  (for 'DataAlts') or
     lit  (for 'LitAlts').
   This makes finding the relevant constructor easy, and makes
   comparison easier too.   Checked in CoreLint.checkCaseAlts.

4. The list of alternatives must be exhaustive. An /exhaustive/ case
   does not necessarily mention all constructors:

   @
        data Foo = Red | Green | Blue
        ... case x of
              Red   -> True
              other -> f (case x of
                              Green -> ...
                              Blue  -> ... ) ...
   @

   The inner case does not need a @Red@ alternative, because @x@
   can't be @Red@ at that program point.

   This is not checked by Core Lint -- it's very hard to do so.
   E.g. suppose that inner case was floated out, thus:
         let a = case x of
                   Green -> ...
                   Blue  -> ... )
         case x of
           Red   -> True
           other -> f a
   Now it's really hard to see that the Green/Blue case is
   exhaustive.  But it is.

   If you have a case-expression that really /isn't/ exhaustive,
   we may generate seg-faults.  Consider the Green/Blue case
   above.  Since there are only two branches we may generate
   code that tests for Green, and if not Green simply /assumes/
   Blue (since, if the case is exhaustive, that's all that
   remains).  Of course, if it's not Blue and we start fetching
   fields that should be in a Blue constructor, we may die
   horribly. See also Note [Core Lint guarantee] in CoreLint.

5. Floating-point values must not be scrutinised against literals.
   See #9238 and Note [Rules for floating-point comparisons]
   in PrelRules for rationale.  Checked in lintCaseExpr;
   see the call to isFloatingTy.

6. The 'ty' field of (Case scrut bndr ty alts) is the type of the
   /entire/ case expression.  Checked in lintAltExpr.
   See also Note [Why does Case have a 'Type' field?].

7. The type of the scrutinee must be the same as the type
   of the case binder, obviously.  Checked in lintCaseExpr.

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
Note [CoreSyn type and coercion invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We allow a /non-recursive/, /non-top-level/ let to bind type and
coercion variables.  These can be very convenient for postponing type
substitutions until the next run of the simplifier.

* A type variable binding must have a RHS of (Type ty)

* A coercion variable binding must have a RHS of (Coercion co)

  It is possible to have terms that return a coercion, but we use
  case-binding for those; e.g.
     case (eq_sel d) of (co :: a ~# b) -> blah
  where eq_sel :: (a~b) -> (a~#b)

  Or even even
      case (df @Int) of (co :: a ~# b) -> blah
  Which is very exotic, and I think never encountered; but see
  Note [Equality superclasses in quantified constraints]
  in TcCanonical

540 541
Note [CoreSyn case invariants]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
542
See #case_invariants#
543

544 545
Note [Levity polymorphism invariants]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
546 547
The levity-polymorphism invariants are these (as per "Levity Polymorphism",
PLDI '17):
548

549 550 551 552
* The type of a term-binder must not be levity-polymorphic,
  unless it is a let(rec)-bound join point
     (see Note [Invariants on join points])

553 554 555 556 557
* The type of the argument of an App must not be levity-polymorphic.

A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.

For example
558
  \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
559 560
is illegal because x's type has kind (TYPE r), which has 'r' free.

561 562 563
See Note [Levity polymorphism checking] in DsMonad to see where these
invariants are established for user-written code.

564
Note [CoreSyn let goal]
565
~~~~~~~~~~~~~~~~~~~~~~~
566 567 568 569
* The simplifier tries to ensure that if the RHS of a let is a constructor
  application, its arguments are trivial, so that the constructor can be
  inlined vigorously.

570 571
Note [Type let]
~~~~~~~~~~~~~~~
572
See #type_let#
573

574 575
Note [Empty case alternatives]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Simon Peyton Jones's avatar
Simon Peyton Jones committed
576 577 578 579 580 581 582
The alternatives of a case expression should be exhaustive.  But
this exhaustive list can be empty!

* A case expression can have empty alternatives if (and only if) the
  scrutinee is bound to raise an exception or diverge. When do we know
  this?  See Note [Bottoming expressions] in CoreUtils.

Gabor Greif's avatar
Gabor Greif committed
583
* The possibility of empty alternatives is one reason we need a type on
Simon Peyton Jones's avatar
Simon Peyton Jones committed
584 585 586 587 588 589 590 591
  the case expression: if the alternatives are empty we can't get the
  type from the alternatives!

* In the case of empty types (see Note [Bottoming expressions]), say
    data T
  we do NOT want to replace
    case (x::T) of Bool {}   -->   error Bool "Inaccessible case"
  because x might raise an exception, and *that*'s what we want to see!
592
  (#6067 is an example.) To preserve semantics we'd have to say
Simon Peyton Jones's avatar
Simon Peyton Jones committed
593 594 595 596 597 598 599 600 601
     x `seq` error Bool "Inaccessible case"
  but the 'seq' is just a case, so we are back to square 1.  Or I suppose
  we could say
     x |> UnsafeCoerce T Bool
  but that loses all trace of the fact that this originated with an empty
  set of alternatives.

* We can use the empty-alternative construct to coerce error values from
  one type to another.  For example
602 603 604

    f :: Int -> Int
    f n = error "urk"
605

606 607 608
    g :: Int -> (# Char, Bool #)
    g x = case f x of { 0 -> ..., n -> ... }

Simon Peyton Jones's avatar
Simon Peyton Jones committed
609
  Then if we inline f in g's RHS we get
610
    case (error Int "urk") of (# Char, Bool #) { ... }
Simon Peyton Jones's avatar
Simon Peyton Jones committed
611
  and we can discard the alternatives since the scrutinee is bottom to give
612 613
    case (error Int "urk") of (# Char, Bool #) {}

Simon Peyton Jones's avatar
Simon Peyton Jones committed
614 615 616 617 618 619 620 621 622 623 624
  This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
  if for no other reason that we don't need to instantiate the (~) at an
  unboxed type.

* We treat a case expression with empty alternatives as trivial iff
  its scrutinee is (see CoreUtils.exprIsTrivial).  This is actually
  important; see Note [Empty case is trivial] in CoreUtils

* An empty case is replaced by its scrutinee during the CoreToStg
  conversion; remember STG is un-typed, so there is no need for
  the empty case to do the type conversion.
625

lukemaurer's avatar
lukemaurer committed
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
Note [Join points]
~~~~~~~~~~~~~~~~~~
In Core, a *join point* is a specially tagged function whose only occurrences
are saturated tail calls. A tail call can appear in these places:

  1. In the branches (not the scrutinee) of a case
  2. Underneath a let (value or join point)
  3. Inside another join point

We write a join-point declaration as
  join j @a @b x y = e1 in e2,
like a let binding but with "join" instead (or "join rec" for "let rec"). Note
that we put the parameters before the = rather than using lambdas; this is
because it's relevant how many parameters the join point takes *as a join
point.* This number is called the *join arity,* distinct from arity because it
counts types as well as values. Note that a join point may return a lambda! So
  join j x = x + 1
is different from
  join j = \x -> x + 1
The former has join arity 1, while the latter has join arity 0.

The identifier for a join point is called a join id or a *label.* An invocation
is called a *jump.* We write a jump using the jump keyword:

  jump j 3

The words *label* and *jump* are evocative of assembly code (or Cmm) for a
reason: join points are indeed compiled as labeled blocks, and jumps become
actual jumps (plus argument passing and stack adjustment). There is no closure
allocated and only a fraction of the function-call overhead. Hence we would
like as many functions as possible to become join points (see OccurAnal) and
the type rules for join points ensure we preserve the properties that make them
efficient.

In the actual AST, a join point is indicated by the IdDetails of the binder: a
local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
join arity.

For more details, see the paper:

  Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
  without continuations." Submitted to PLDI'17.

  https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/

Note [Invariants on join points]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Join points must follow these invariants:

  1. All occurrences must be tail calls. Each of these tail calls must pass the
     same number of arguments, counting both types and values; we call this the
     "join arity" (to distinguish from regular arity, which only counts values).
678

679 680
     See Note [Join points are less general than the paper]

lukemaurer's avatar
lukemaurer committed
681
  2. For join arity n, the right-hand side must begin with at least n lambdas.
682 683
     No ticks, no casts, just lambdas!  C.f. CoreUtils.joinRhsArity.

684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
     2a. Moreover, this same constraint applies to any unfolding of
         the binder.  Reason: if we want to push a continuation into
         the RHS we must push it into the unfolding as well.

     2b. The Arity (in the IdInfo) of a join point is the number of value
         binders in the top n lambdas, where n is the join arity.

         So arity <= join arity; the former counts only value binders
         while the latter counts all binders.
         e.g. Suppose $j has join arity 1
               let j = \x y. e in case x of { A -> j 1; B -> j 2 }
         Then its ordinary arity is also 1, not 2.

         The arity of a join point isn't very important; but short of setting
         it to zero, it is helpful to have an invariant.  E.g. #17294.
699

lukemaurer's avatar
lukemaurer committed
700 701
  3. If the binding is recursive, then all other bindings in the recursive group
     must also be join points.
702

lukemaurer's avatar
lukemaurer committed
703 704 705
  4. The binding's type must not be polymorphic in its return type (as defined
     in Note [The polymorphism rule of join points]).

706 707 708 709 710 711 712 713
However, join points have simpler invariants in other ways

  5. A join point can have an unboxed type without the RHS being
     ok-for-speculation (i.e. drop the let/app invariant)
     e.g.  let j :: Int# = factorial x in ...

  6. A join point can have a levity-polymorphic RHS
     e.g.  let j :: r :: TYPE l = fail void# in ...
714
     This happened in an intermediate program #13394
715

lukemaurer's avatar
lukemaurer committed
716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
Examples:

  join j1  x = 1 + x in jump j (jump j x)  -- Fails 1: non-tail call
  join j1' x = 1 + x in if even a
                          then jump j1 a
                          else jump j1 a b -- Fails 1: inconsistent calls
  join j2  x = flip (+) x in j2 1 2        -- Fails 2: not enough lambdas
  join j2' x = \y -> x + y in j3 1         -- Passes: extra lams ok
  join j @a (x :: a) = x                   -- Fails 4: polymorphic in ret type

Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
point must have an exact call as its LHS.

Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
binding isn't a tail call. Since a let-bound value can't invoke a free join
point, then, they can't be mutually recursive. (A Core binding group *can*
include spurious extra bindings if the occurrence analyser hasn't run, so
invariant 3 does still need to be checked.) For the rigorous definition of
"tail call", see Section 3 of the paper (Note [Join points]).

Invariant 4 is subtle; see Note [The polymorphism rule of join points].

738 739 740 741 742 743 744 745 746 747
Invariant 6 is to enable code like this:

  f = \(r :: RuntimeRep) (a :: TYPE r) (x :: T).
      join j :: a
           j = error @r @a "bloop"
      in case x of
           A -> j
           B -> j
           C -> error @r @a "blurp"

lukemaurer's avatar
lukemaurer committed
748 749 750 751
Core Lint will check these invariants, anticipating that any binder whose
OccInfo is marked AlwaysTailCalled will become a join point as soon as the
simplifier (or simpleOptPgm) runs.

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
Note [Join points are less general than the paper]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the paper "Compiling without continuations", this expression is
perfectly valid:

    join { j = \_ -> e }
    in (case blah of       )
       (  True  -> j void# ) arg
       (  False -> blah    )

assuming 'j' has arity 1.   Here the call to 'j' does not look like a
tail call, but actually everything is fine. See Section 3, "Managing \Delta"
in the paper.

In GHC, however, we adopt a slightly more restrictive subset, in which
join point calls must be tail calls.  I think we /could/ loosen it up, but
in fact the simplifier ensures that we always get tail calls, and it makes
the back end a bit easier I think.  Generally, just less to think about;
nothing deeper than that.

lukemaurer's avatar
lukemaurer committed
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
Note [The type of a join point]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A join point has the same type it would have as a function. That is, if it takes
an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
String`. Natural as this may seem, it can be awkward. A join point shouldn't be
thought to "return" in the same sense a function does---a jump is one-way. This
is crucial for understanding how case-of-case interacts with join points:

  case (join
          j :: Int -> Bool -> String
          j x y = ...
        in
          jump j z w) of
    "" -> True
    _  -> False

The simplifier will pull the case into the join point (see Note [Case-of-case
and join points] in Simplify):

  join
    j :: Int -> Bool -> Bool -- changed!
    j x y = case ... of "" -> True
                        _  -> False
  in
    jump j z w

The body of the join point now returns a Bool, so the label `j` has to have its
type updated accordingly. Inconvenient though this may be, it has the advantage
that 'CoreUtils.exprType' can still return a type for any expression, including
a jump.

This differs from the paper (see Note [Invariants on join points]). In the
paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
carries the "return type" as a parameter, exactly the way other non-returning
functions like `error` work:

  case (join
          j :: Int -> Bool -> forall a. a
          j x y = ...
        in
          jump j z w @String) of
    "" -> True
    _  -> False

Now we can move the case inward and we only have to change the jump:

  join
    j :: Int -> Bool -> forall a. a
    j x y = case ... of "" -> True
                        _  -> False
  in
    jump j z w @Bool

(Core Lint would still check that the body of the join point has the right type;
that type would simply not be reflected in the join id.)

Note [The polymorphism rule of join points]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Invariant 4 of Note [Invariants on join points] forbids a join point to be
polymorphic in its return type. That is, if its type is

  forall a1 ... ak. t1 -> ... -> tn -> r

where its join arity is k+n, none of the type parameters ai may occur free in r.

837
In some way, this falls out of the fact that given
lukemaurer's avatar
lukemaurer committed
838

839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
  join
     j @a1 ... @ak x1 ... xn = e1
  in e2

then all calls to `j` are in tail-call positions of `e`, and expressions in
tail-call positions in `e` have the same type as `e`.
Therefore the type of `e1` -- the return type of the join point -- must be the
same as the type of e2.
Since the type variables aren't bound in `e2`, its type can't include them, and
thus neither can the type of `e1`.

This unfortunately prevents the `go` in the following code from being a
join-point:

  iter :: forall a. Int -> (a -> a) -> a -> a
  iter @a n f x = go @a n f x
    where
      go :: forall a. Int -> (a -> a) -> a -> a
      go @a 0 _ x = x
      go @a n f x = go @a (n-1) f (f x)

In this case, a static argument transformation would fix that (see
ticket #14620):

  iter :: forall a. Int -> (a -> a) -> a -> a
  iter @a n f x = go' @a n f x
    where
      go' :: Int -> (a -> a) -> a -> a
      go' 0 _ x = x
      go' n f x = go' (n-1) f (f x)

In general, loopification could be employed to do that (see #14068.)

Can we simply drop the requirement, and allow `go` to be a join-point? We
could, and it would work. But we could not longer apply the case-of-join-point
transformation universally. This transformation would do:
lukemaurer's avatar
lukemaurer committed
875

876 877 878 879
  case (join go @a n f x = case n of 0 -> x
                                     n -> go @a (n-1) f (f x)
        in go @Bool n neg True) of
    True -> e1; False -> e2
lukemaurer's avatar
lukemaurer committed
880

881
 ===>
lukemaurer's avatar
lukemaurer committed
882

883 884 885
  join go @a n f x = case n of 0 -> case x of True -> e1; False -> e2
                          n -> go @a (n-1) f (f x)
  in go @Bool n neg True
lukemaurer's avatar
lukemaurer committed
886

887
but that is ill-typed, as `x` is type `a`, not `Bool`.
lukemaurer's avatar
lukemaurer committed
888 889


890
This also justifies why we do not consider the `e` in `e |> co` to be in
891 892 893
tail position: A cast changes the type, but the type must be the same. But
operationally, casts are vacuous, so this is a bit unfortunate! See #14610 for
ideas how to fix this.
894

Austin Seipp's avatar
Austin Seipp committed
895
************************************************************************
896 897 898 899 900 901
*                                                                      *
            In/Out type synonyms
*                                                                      *
********************************************************************* -}

{- Many passes apply a substitution, and it's very handy to have type
Gabor Greif's avatar
Gabor Greif committed
902
   synonyms to remind us whether or not the substitution has been applied -}
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922

-- Pre-cloning or substitution
type InBndr     = CoreBndr
type InType     = Type
type InKind     = Kind
type InBind     = CoreBind
type InExpr     = CoreExpr
type InAlt      = CoreAlt
type InArg      = CoreArg
type InCoercion = Coercion

-- Post-cloning or substitution
type OutBndr     = CoreBndr
type OutType     = Type
type OutKind     = Kind
type OutCoercion = Coercion
type OutBind     = CoreBind
type OutExpr     = CoreExpr
type OutAlt      = CoreAlt
type OutArg      = CoreArg
Ningning Xie's avatar
Ningning Xie committed
923
type MOutCoercion = MCoercion
924 925 926


{- *********************************************************************
Austin Seipp's avatar
Austin Seipp committed
927
*                                                                      *
Simon Peyton Jones's avatar
Simon Peyton Jones committed
928
              Ticks
Austin Seipp's avatar
Austin Seipp committed
929 930 931
*                                                                      *
************************************************************************
-}
Simon Peyton Jones's avatar
Simon Peyton Jones committed
932

933
-- | Allows attaching extra information to points in expressions
934 935

-- If you edit this type, you may need to update the GHC formalism
936
-- See Note [GHC Formalism] in coreSyn/CoreLint.hs
937 938 939 940 941 942 943 944 945 946
data Tickish id =
    -- | An @{-# SCC #-}@ profiling annotation, either automatically
    -- added by the desugarer as a result of -auto-all, or added by
    -- the user.
    ProfNote {
      profNoteCC    :: CostCentre, -- ^ the cost centre
      profNoteCount :: !Bool,      -- ^ bump the entry count?
      profNoteScope :: !Bool       -- ^ scopes over the enclosed expression
                                   -- (i.e. not just a tick)
    }
947

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970
  -- | A "tick" used by HPC to track the execution of each
  -- subexpression in the original source code.
  | HpcTick {
      tickModule :: Module,
      tickId     :: !Int
    }

  -- | A breakpoint for the GHCi debugger.  This behaves like an HPC
  -- tick, but has a list of free variables which will be available
  -- for inspection in GHCi when the program stops at the breakpoint.
  --
  -- NB. we must take account of these Ids when (a) counting free variables,
  -- and (b) substituting (don't substitute for them)
  | Breakpoint
    { breakpointId     :: !Int
    , breakpointFVs    :: [id]  -- ^ the order of this list is important:
                                -- it matches the order of the lists in the
                                -- appropriate entry in HscTypes.ModBreaks.
                                --
                                -- Careful about substitution!  See
                                -- Note [substTickish] in CoreSubst.
    }

Peter Wortmann's avatar
Peter Wortmann committed
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985
  -- | A source note.
  --
  -- Source notes are pure annotations: Their presence should neither
  -- influence compilation nor execution. The semantics are given by
  -- causality: The presence of a source note means that a local
  -- change in the referenced source code span will possibly provoke
  -- the generated code to change. On the flip-side, the functionality
  -- of annotated code *must* be invariant against changes to all
  -- source code *except* the spans referenced in the source notes
  -- (see "Causality of optimized Haskell" paper for details).
  --
  -- Therefore extending the scope of any given source note is always
  -- valid. Note that it is still undesirable though, as this reduces
  -- their usefulness for debugging and profiling. Therefore we will
  -- generally try only to make use of this property where it is
986
  -- necessary to enable optimizations.
Peter Wortmann's avatar
Peter Wortmann committed
987 988 989 990 991 992
  | SourceNote
    { sourceSpan :: RealSrcSpan -- ^ Source covered
    , sourceName :: String      -- ^ Name for source location
                                --   (uses same names as CCs)
    }

993
  deriving (Eq, Ord, Data)
994

Simon Marlow's avatar
Simon Marlow committed
995 996 997 998
-- | A "counting tick" (where tickishCounts is True) is one that
-- counts evaluations in some way.  We cannot discard a counting tick,
-- and the compiler should preserve the number of counting ticks as
-- far as possible.
999
--
Gabor Greif's avatar
Gabor Greif committed
1000
-- However, we still allow the simplifier to increase or decrease
1001 1002 1003 1004 1005 1006
-- sharing, so in practice the actual number of ticks may vary, except
-- that we never change the value from zero to non-zero or vice versa.
tickishCounts :: Tickish id -> Bool
tickishCounts n@ProfNote{} = profNoteCount n
tickishCounts HpcTick{}    = True
tickishCounts Breakpoint{} = True
Peter Wortmann's avatar
Peter Wortmann committed
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
tickishCounts _            = False


-- | Specifies the scoping behaviour of ticks. This governs the
-- behaviour of ticks that care about the covered code and the cost
-- associated with it. Important for ticks relating to profiling.
data TickishScoping =
    -- | No scoping: The tick does not care about what code it
    -- covers. Transformations can freely move code inside as well as
    -- outside without any additional annotation obligations
    NoScope

    -- | Soft scoping: We want all code that is covered to stay
    -- covered.  Note that this scope type does not forbid
1021
    -- transformations from happening, as long as all results of
Peter Wortmann's avatar
Peter Wortmann committed
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
    -- the transformations are still covered by this tick or a copy of
    -- it. For example
    --
    --   let x = tick<...> (let y = foo in bar) in baz
    --     ===>
    --   let x = tick<...> bar; y = tick<...> foo in baz
    --
    -- Is a valid transformation as far as "bar" and "foo" is
    -- concerned, because both still are scoped over by the tick.
    --
    -- Note though that one might object to the "let" not being
    -- covered by the tick any more. However, we are generally lax
    -- with this - constant costs don't matter too much, and given
    -- that the "let" was effectively merged we can view it as having
    -- lost its identity anyway.
    --
    -- Also note that this scoping behaviour allows floating a tick
    -- "upwards" in pretty much any situation. For example:
    --
    --   case foo of x -> tick<...> bar
    --     ==>
    --   tick<...> case foo of x -> bar
    --
1045
    -- While this is always legal, we want to make a best effort to
Peter Wortmann's avatar
Peter Wortmann committed
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
    -- only make us of this where it exposes transformation
    -- opportunities.
  | SoftScope

    -- | Cost centre scoping: We don't want any costs to move to other
    -- cost-centre stacks. This means we not only want no code or cost
    -- to get moved out of their cost centres, but we also object to
    -- code getting associated with new cost-centre ticks - or
    -- changing the order in which they get applied.
    --
    -- A rule of thumb is that we don't want any code to gain new
    -- annotations. However, there are notable exceptions, for
    -- example:
    --
    --   let f = \y -> foo in tick<...> ... (f x) ...
    --     ==>
    --   tick<...> ... foo[x/y] ...
    --
    -- In-lining lambdas like this is always legal, because inlining a
    -- function does not change the cost-centre stack when the
    -- function is called.
  | CostCentreScope

  deriving (Eq)

-- | Returns the intended scoping rule for a Tickish
tickishScoped :: Tickish id -> TickishScoping
tickishScoped n@ProfNote{}
  | profNoteScope n        = CostCentreScope
  | otherwise              = NoScope
tickishScoped HpcTick{}    = NoScope
tickishScoped Breakpoint{} = CostCentreScope
1078 1079 1080
   -- Breakpoints are scoped: eventually we're going to do call
   -- stacks, but also this helps prevent the simplifier from moving
   -- breakpoints around and changing their result type (see #1531).
Peter Wortmann's avatar
Peter Wortmann committed
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
tickishScoped SourceNote{} = SoftScope

-- | Returns whether the tick scoping rule is at least as permissive
-- as the given scoping rule.
tickishScopesLike :: Tickish id -> TickishScoping -> Bool
tickishScopesLike t scope = tickishScoped t `like` scope
  where NoScope         `like` _               = True
        _               `like` NoScope         = False
        SoftScope       `like` _               = True
        _               `like` SoftScope       = False
        CostCentreScope `like` _               = True

-- | Returns @True@ for ticks that can be floated upwards easily even
-- where it might change execution counts, such as:
--
--   Just (tick<...> foo)
--     ==>
--   tick<...> (Just foo)
--
-- This is a combination of @tickishSoftScope@ and
-- @tickishCounts@. Note that in principle splittable ticks can become
-- floatable using @mkNoTick@ -- even though there's currently no
-- tickish for which that is the case.
tickishFloatable :: Tickish id -> Bool
tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)

-- | Returns @True@ for a tick that is both counting /and/ scoping and
-- can be split into its (tick, scope) parts using 'mkNoScope' and
-- 'mkNoTick' respectively.
tickishCanSplit :: Tickish id -> Bool
tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
                   = True
tickishCanSplit _  = False
1114

Simon Marlow's avatar
Simon Marlow committed
1115
mkNoCount :: Tickish id -> Tickish id
Peter Wortmann's avatar
Peter Wortmann committed
1116 1117 1118 1119
mkNoCount n | not (tickishCounts n)   = n
            | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
mkNoCount n@ProfNote{}                = n {profNoteCount = False}
mkNoCount _                           = panic "mkNoCount: Undefined split!"
1120 1121

mkNoScope :: Tickish id -> Tickish id
Peter Wortmann's avatar
Peter Wortmann committed
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
mkNoScope n | tickishScoped n == NoScope  = n
            | not (tickishCanSplit n)     = panic "mkNoScope: Cannot split!"
mkNoScope n@ProfNote{}                    = n {profNoteScope = False}
mkNoScope _                               = panic "mkNoScope: Undefined split!"

-- | Return @True@ if this source annotation compiles to some backend
-- code. Without this flag, the tickish is seen as a simple annotation
-- that does not have any associated evaluation code.
--
-- What this means that we are allowed to disregard the tick if doing
-- so means that we can skip generating any code in the first place. A
-- typical example is top-level bindings:
--
--   foo = tick<...> \y -> ...
--     ==>
--   foo = \y -> tick<...> ...
--
-- Here there is just no operational difference between the first and
-- the second version. Therefore code generation should simply
-- translate the code as if it found the latter.
1142
tickishIsCode :: Tickish id -> Bool
Peter Wortmann's avatar
Peter Wortmann committed
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193
tickishIsCode SourceNote{} = False
tickishIsCode _tickish     = True  -- all the rest for now


-- | Governs the kind of expression that the tick gets placed on when
-- annotating for example using @mkTick@. If we find that we want to
-- put a tickish on an expression ruled out here, we try to float it
-- inwards until we find a suitable expression.
data TickishPlacement =

    -- | Place ticks exactly on run-time expressions. We can still
    -- move the tick through pure compile-time constructs such as
    -- other ticks, casts or type lambdas. This is the most
    -- restrictive placement rule for ticks, as all tickishs have in
    -- common that they want to track runtime processes. The only
    -- legal placement rule for counting ticks.
    PlaceRuntime

    -- | As @PlaceRuntime@, but we float the tick through all
    -- lambdas. This makes sense where there is little difference
    -- between annotating the lambda and annotating the lambda's code.
  | PlaceNonLam

    -- | In addition to floating through lambdas, cost-centre style
    -- tickishs can also be moved from constructors, non-function
    -- variables and literals. For example:
    --
    --   let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
    --
    -- Neither the constructor application, the variable or the
    -- literal are likely to have any cost worth mentioning. And even
    -- if y names a thunk, the call would not care about the
    -- evaluation context. Therefore removing all annotations in the
    -- above example is safe.
  | PlaceCostCentre

  deriving (Eq)

-- | Placement behaviour we want for the ticks
tickishPlace :: Tickish id -> TickishPlacement
tickishPlace n@ProfNote{}
  | profNoteCount n        = PlaceRuntime
  | otherwise              = PlaceCostCentre
tickishPlace HpcTick{}     = PlaceRuntime
tickishPlace Breakpoint{}  = PlaceRuntime
tickishPlace SourceNote{}  = PlaceNonLam

-- | Returns whether one tick "contains" the other one, therefore
-- making the second tick redundant.
tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
niteria's avatar
niteria committed
1194 1195
  = containsSpan sp1 sp2 && n1 == n2
    -- compare the String last
Peter Wortmann's avatar
Peter Wortmann committed
1196 1197
tickishContains t1 t2
  = t1 == t2
1198

1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
{-
************************************************************************
*                                                                      *
                Orphans
*                                                                      *
************************************************************************
-}

-- | Is this instance an orphan?  If it is not an orphan, contains an 'OccName'
-- witnessing the instance's non-orphanhood.
-- See Note [Orphans]
data IsOrphan
  = IsOrphan
  | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
                      -- In that case, the instance is fingerprinted as part
                      -- of the definition of 'n's definition
1215
    deriving Data
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226

-- | Returns true if 'IsOrphan' is orphan.
isOrphan :: IsOrphan -> Bool
isOrphan IsOrphan = True
isOrphan _ = False

-- | Returns true if 'IsOrphan' is not an orphan.
notOrphan :: IsOrphan -> Bool
notOrphan NotOrphan{} = True
notOrphan _ = False

1227
chooseOrphanAnchor :: NameSet -> IsOrphan
1228 1229
-- Something (rule, instance) is relate to all the Names in this
-- list. Choose one of them to be an "anchor" for the orphan.  We make
1230
-- the choice deterministic to avoid gratuitous changes in the ABI
1231
-- hash (#4012).  Specifically, use lexicographic comparison of
1232 1233 1234 1235 1236
-- OccName rather than comparing Uniques
--
-- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
--
chooseOrphanAnchor local_names
1237 1238
  | isEmptyNameSet local_names = IsOrphan
  | otherwise                  = NotOrphan (minimum occs)
1239
  where
David Feuer's avatar
David Feuer committed
1240
    occs = map nameOccName $ nonDetEltsUniqSet local_names
1241
    -- It's OK to use nonDetEltsUFM here, see comments above
1242

1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
instance Binary IsOrphan where
    put_ bh IsOrphan = putByte bh 0
    put_ bh (NotOrphan n) = do
        putByte bh 1
        put_ bh n
    get bh = do
        h <- getByte bh
        case h of
            0 -> return IsOrphan
            _ -> do
                n <- get bh
                return $ NotOrphan n

{-
Note [Orphans]
~~~~~~~~~~~~~~
Class instances, rules, and family instances are divided into orphans
and non-orphans.  Roughly speaking, an instance/rule is an orphan if
its left hand side mentions nothing defined in this module.  Orphan-hood
has two major consequences

 * A module that contains orphans is called an "orphan module".  If
1265 1266
   the module being compiled depends (transitively) on an orphan
   module M, then M.hi is read in regardless of whether M is otherwise
1267 1268 1269 1270 1271 1272 1273 1274 1275
   needed. This is to ensure that we don't miss any instance decls in
   M.  But it's painful, because it means we need to keep track of all
   the orphan modules below us.

 * A non-orphan is not finger-printed separately.  Instead, for
   fingerprinting purposes it is treated as part of the entity it
   mentions on the LHS.  For example
      data T = T1 | T2
      instance Eq T where ....
1276
   The instance (Eq T) is incorporated as part of T's fingerprint.
1277

1278
   In contrast, orphans are all fingerprinted together in the
1279 1280
   mi_orph_hash field of the ModIface.

1281
   See GHC.Iface.Utils.addFingerprints.
1282 1283 1284 1285 1286 1287 1288

Orphan-hood is computed
  * For class instances:
      when we make a ClsInst
    (because it is needed during instance lookup)

  * For rules and family instances:
1289 1290
       when we generate an IfaceRule (GHC.Iface.Utils.coreRuleToIfaceRule)
                     or IfaceFamInst (GHC.Iface.Utils.instanceToIfaceInst)
1291 1292
-}

Austin Seipp's avatar
Austin Seipp committed
1293 1294 1295
{-
************************************************************************
*                                                                      *
1296
\subsection{Transformation rules}
Austin Seipp's avatar
Austin Seipp committed
1297 1298
*                                                                      *
************************************************************************
1299 1300 1301

The CoreRule type and its friends are dealt with mainly in CoreRules,
but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
Austin Seipp's avatar
Austin Seipp committed
1302
-}
1303

1304 1305
-- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
type RuleBase = NameEnv [CoreRule]
Gabor Greif's avatar
Gabor Greif committed
1306
        -- The rules are unordered;
1307 1308
        -- we sort out any overlaps on lookup

1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
-- | A full rule environment which we can apply rules from.  Like a 'RuleBase',
-- but it also includes the set of visible orphans we use to filter out orphan
-- rules which are not visible (even though we can see them...)
data RuleEnv
    = RuleEnv { re_base          :: RuleBase
              , re_visible_orphs :: ModuleSet
              }

mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)

emptyRuleEnv :: RuleEnv
emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet

1323 1324 1325 1326 1327 1328 1329
-- | A 'CoreRule' is:
--
-- * \"Local\" if the function it is a rule for is defined in the
--   same module as the rule itself.
--
-- * \"Orphan\" if nothing on the LHS is defined in the same module
--   as the rule itself
1330
data CoreRule
1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350
  = Rule {
        ru_name :: RuleName,            -- ^ Name of the rule, for communication with the user
        ru_act  :: Activation,          -- ^ When the rule is active

        -- Rough-matching stuff
        -- see comments with InstEnv.ClsInst( is_cls, is_rough )
        ru_fn    :: Name,               -- ^ Name of the 'Id.Id' at the head of this rule
        ru_rough :: [Maybe Name],       -- ^ Name at the head of each argument to the left hand side

        -- Proper-matching stuff
        -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
        ru_bndrs :: [CoreBndr],         -- ^ Variables quantified over
        ru_args  :: [CoreExpr],         -- ^ Left hand side arguments

        -- And the right-hand side
        ru_rhs   :: CoreExpr,           -- ^ Right hand side of the rule
                                        -- Occurrence info is guaranteed correct
                                        -- See Note [OccInfo in unfoldings and rules]

        -- Locality
Simon Peyton Jones's avatar
Simon Peyton Jones committed
1351 1352
        ru_auto :: Bool,   -- ^ @True@  <=> this rule is auto-generated
                           --               (notably by Specialise or SpecConstr)
Gabor Greif's avatar
Gabor Greif committed
1353
                           --   @False@ <=> generated at the user's behest
1354
                           -- See Note [Trimming auto-rules] in GHC.Iface.Tidy
Simon Peyton Jones's avatar
Simon Peyton Jones committed
1355
                           -- for the sole purpose of this field.
1356

Simon Peyton Jones's avatar
Simon Peyton Jones committed
1357
        ru_origin :: !Module,   -- ^ 'Module' the rule was defined in, used
1358 1359
                                -- to test if we should see an orphan rule.

Simon Peyton Jones's avatar
Simon Peyton Jones committed
1360
        ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1361

1362 1363 1364
        ru_local :: Bool        -- ^ @True@ iff the fn at the head of the rule is
                                -- defined in the same module as the rule
                                -- and is not an implicit 'Id' (like a record selector,
1365 1366 1367 1368 1369
                                -- class operation, or data constructor).  This
                                -- is different from 'ru_orphan', where a rule
                                -- can avoid being an orphan if *any* Name in
                                -- LHS of the rule was defined in the same
                                -- module as the rule.
1370
    }
1371

1372 1373
  -- | Built-in rules are used for constant folding
  -- and suchlike.  They have no free variables.
1374 1375
  -- A built-in rule is always visible (there is no such thing as
  -- an orphan built-in rule.)
1376 1377 1378 1379 1380 1381 1382 1383 1384
  | BuiltinRule {
        ru_name  :: RuleName,   -- ^ As above
        ru_fn    :: Name,       -- ^ As above
        ru_nargs :: Int,        -- ^ Number of arguments that 'ru_try' consumes,
                                -- if it fires, including type arguments
        ru_try   :: RuleFun
                -- ^ This function does the rewrite.  It given too many
                -- arguments, it simply discards them; the returned 'CoreExpr'
                -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1385
    }
1386
                -- See Note [Extra args in rule matching] in Rules.hs
1387

1388 1389 1390
type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
type InScopeEnv = (InScopeSet, IdUnfoldingFun)

1391 1392 1393 1394 1395
type IdUnfoldingFun = Id -> Unfolding
-- A function that embodies how to unfold an Id if you need
-- to do that in the Rule.  The reason we need to pass this info in
-- is that whether an Id is unfoldable depends on the simplifier phase

twanvl's avatar
twanvl committed
1396
isBuiltinRule :: CoreRule -> Bool
1397
isBuiltinRule (BuiltinRule {}) = True
1398
isBuiltinRule _                = False
1399

1400 1401 1402 1403
isAutoRule :: CoreRule -> Bool
isAutoRule (BuiltinRule {}) = False
isAutoRule (Rule { ru_auto = is_auto }) = is_auto

1404
-- | The number of arguments the 'ru_fn' must be applied
1405
-- to before the rule can match on it
1406 1407 1408 1409
ruleArity :: CoreRule -> Int
ruleArity (BuiltinRule {ru_nargs = n}) = n
ruleArity (Rule {ru_args = args})      = length args

1410 1411
ruleName :: CoreRule -> RuleName
ruleName = ru_name
1412

1413 1414 1415 1416
ruleModule :: CoreRule -> Maybe Module
ruleModule Rule { ru_origin } = Just ru_origin
ruleModule BuiltinRule {} = Nothing

1417 1418 1419
ruleActivation :: CoreRule -> Activation
ruleActivation (BuiltinRule { })       = AlwaysActive
ruleActivation (Rule { ru_act = act }) = act
1420 1421

-- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1422 1423
ruleIdName :: CoreRule -> Name
ruleIdName = ru_fn
1424

1425 1426
isLocalRule :: CoreRule -> Bool
isLocalRule = ru_local
1427

1428
-- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1429 1430
setRuleIdName :: Name -> CoreRule -> CoreRule
setRuleIdName nm ru = ru { ru_fn = nm }
1431

Austin Seipp's avatar
Austin Seipp committed
1432 1433 1434
{-
************************************************************************
*                                                                      *
1435
                Unfoldings
Austin Seipp's avatar
Austin Seipp committed
1436 1437
*                                                                      *
************************************************************************
1438

1439
The @Unfolding@ type is declared here to avoid numerous loops
Austin Seipp's avatar
Austin Seipp committed
1440
-}
1441

1442 1443 1444
-- | Records the /unfolding/ of an identifier, which is approximately the form the
-- identifier would have if we substituted its definition in for the identifier.
-- This type should be treated as abstract everywhere except in "CoreUnfold"
1445
data Unfolding
1446 1447 1448 1449
  = NoUnfolding        -- ^ We have no information about the unfolding.

  | BootUnfolding      -- ^ We have no information about the unfolding, because
                       -- this 'Id' came from an @hi-boot@ file.
1450
                       -- See Note [Inlining and hs-boot files] in GHC.CoreToIface
1451
                       -- for what this is used for.
1452 1453

  | OtherCon [AltCon]  -- ^ It ain't one of these constructors.
1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466
                       -- @OtherCon xs@ also indicates that something has been evaluated
                       -- and hence there's no point in re-evaluating it.
                       -- @OtherCon []@ is used even for non-data-type values
                       -- to indicated evaluated-ness.  Notably:
                       --
                       -- > data C = C !(Int -> Int)
                       -- > case x of { C f -> ... }
                       --
                       -- Here, @f@ gets an @OtherCon []@ unfolding.

  | DFunUnfolding {     -- The Unfolding of a DFunId
                        -- See Note [DFun unfoldings]
                        --     df = /\a1..am. \d1..dn. MkD t1 .. tk
1467
                        --                                 (op1 a1..am d1..dn)
1468
                        --                                 (op2 a1..am d1..dn)
1469 1470 1471 1472
        df_bndrs :: [Var],      -- The bound variables [a1..m],[d1..dn]
        df_con   :: DataCon,    -- The dictionary data constructor (never a newtype datacon)
        df_args  :: [CoreExpr]  -- Args of the data con: types, superclasses and methods,
    }                           -- in positional order
1473

1474
  | CoreUnfolding {             -- An unfolding for an Id with no pragma,
1475
                                -- or perhaps a NOINLINE pragma
1476
                                -- (For NOINLINE, the phase, if any, is in the