DataCon.lhs 39.8 KB
Newer Older
1
%
Simon Marlow's avatar
Simon Marlow committed
2
% (c) The University of Glasgow 2006
3 4
% (c) The GRASP/AQUA Project, Glasgow University, 1998
%
5
\section[DataCon]{@DataCon@: Data Constructors}
6 7

\begin{code}
Ian Lynagh's avatar
Ian Lynagh committed
8 9 10 11 12 13 14
{-# OPTIONS -fno-warn-tabs #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and
-- detab the module (please do the detabbing in a separate patch). See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
-- for details

15
module DataCon (
batterseapower's avatar
batterseapower committed
16
        -- * Main data types
17
	DataCon, DataConRep(..), HsBang(..), StrictnessMark(..),
batterseapower's avatar
batterseapower committed
18 19 20 21
	ConTag,
	
	-- ** Type construction
	mkDataCon, fIRST_TAG,
22
        buildAlgTyCon, 
batterseapower's avatar
batterseapower committed
23 24
	
	-- ** Type deconstruction
25
	dataConRepType, dataConSig, dataConFullSig,
26 27
	dataConName, dataConIdentity, dataConTag, dataConTyCon, 
        dataConOrigTyCon, dataConUserType,
28
	dataConUnivTyVars, dataConExTyVars, dataConAllTyVars, 
29
	dataConEqSpec, eqSpecPreds, dataConTheta,
30
	dataConStupidTheta,  
31
	dataConInstArgTys, dataConOrigArgTys, dataConOrigResTy,
32
	dataConInstOrigArgTys, dataConRepArgTys, 
33
	dataConFieldLabels, dataConFieldType,
34
	dataConStrictMarks, 
35
	dataConSourceArity, dataConRepArity, dataConRepRepArity,
36
	dataConIsInfix,
37
	dataConWorkId, dataConWrapId, dataConWrapId_maybe, dataConImplicitIds,
38
	dataConRepStrictness, dataConRepBangs, dataConBoxer,
batterseapower's avatar
batterseapower committed
39 40
	
	-- ** Predicates on DataCons
41
	isNullarySrcDataCon, isNullaryRepDataCon, isTupleDataCon, isUnboxedTupleCon,
42
	isVanillaDataCon, classDataCon, dataConCannotMatch,
43
        isBanged, isMarkedStrict, eqHsBang,
44

45
        -- ** Promotion related functions
46
        promoteKind, promoteDataCon, promoteDataCon_maybe
47 48 49 50
    ) where

#include "HsVersions.h"

51
import {-# SOURCE #-} MkId( DataConBoxer )
Simon Marlow's avatar
Simon Marlow committed
52
import Type
53
import TypeRep( Type(..) )  -- Used in promoteType
54
import PrelNames( liftedTypeKindTyConKey )
55
import ForeignCall( CType )
56
import Coercion
57
import Kind
58
import Unify
Simon Marlow's avatar
Simon Marlow committed
59 60 61 62
import TyCon
import Class
import Name
import Var
63
import Outputable
Simon Marlow's avatar
Simon Marlow committed
64 65 66
import Unique
import ListSetOps
import Util
batterseapower's avatar
batterseapower committed
67
import BasicTypes
68
import FastString
69
import Module
70
import VarEnv
71

72
import qualified Data.Data as Data
73
import qualified Data.Typeable
74
import Data.Maybe
75 76
import Data.Char
import Data.Word
77 78 79
\end{code}


80 81 82
Data constructor representation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider the following Haskell data type declaration
83

84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
	data T = T !Int ![Int]

Using the strictness annotations, GHC will represent this as

	data T = T Int# [Int]

That is, the Int has been unboxed.  Furthermore, the Haskell source construction

	T e1 e2

is translated to

	case e1 of { I# x -> 
	case e2 of { r ->
	T x r }}

That is, the first argument is unboxed, and the second is evaluated.  Finally,
pattern matching is translated too:

	case e of { T a b -> ... }

becomes

	case e of { T a' b -> let a = I# a' in ... }

To keep ourselves sane, we name the different versions of the data constructor
differently, as follows.


Note [Data Constructor Naming]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115
Each data constructor C has two, and possibly up to four, Names associated with it:
116

117 118 119 120 121 122
		   OccName   Name space	  Name of   Notes
 ---------------------------------------------------------------------------
 The "data con itself" 	 C     DataName	  DataCon   In dom( GlobalRdrEnv )
 The "worker data con"	 C     VarName	  Id        The worker
 The "wrapper data con"	 $WC   VarName	  Id        The wrapper
 The "newtype coercion"  :CoT  TcClsName  TyCon
123 124 125 126 127 128
 
EVERY data constructor (incl for newtypes) has the former two (the
data con itself, and its worker.  But only some data constructors have a
wrapper (see Note [The need for a wrapper]).

Each of these three has a distinct Unique.  The "data con itself" name
129 130 131 132 133 134
appears in the output of the renamer, and names the Haskell-source
data constructor.  The type checker translates it into either the wrapper Id
(if it exists) or worker Id (otherwise).

The data con has one or two Ids associated with it:

135 136
The "worker Id", is the actual data constructor.
* Every data constructor (newtype or data type) has a worker
137

138
* The worker is very like a primop, in that it has no binding.
139

140 141
* For a *data* type, the worker *is* the data constructor;
  it has no unfolding
142

143 144 145 146
* For a *newtype*, the worker has a compulsory unfolding which 
  does a cast, e.g.
	newtype T = MkT Int
	The worker for MkT has unfolding
batterseapower's avatar
batterseapower committed
147
		\\(x:Int). x `cast` sym CoT
148 149
  Here CoT is the type constructor, witnessing the FC axiom
	axiom CoT : T = Int
150

batterseapower's avatar
batterseapower committed
151
The "wrapper Id", \$WC, goes as follows
152 153 154 155 156 157 158 159

* Its type is exactly what it looks like in the source program. 

* It is an ordinary function, and it gets a top-level binding 
  like any other function.

* The wrapper Id isn't generated for a data type if there is
  nothing for the wrapper to do.  That is, if its defn would be
batterseapower's avatar
batterseapower committed
160
	\$wC = C
161

162 163
Note [The need for a wrapper]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
164 165 166 167
Why might the wrapper have anything to do?  Two reasons:

* Unboxing strict fields (with -funbox-strict-fields)
	data T = MkT !(Int,Int)
batterseapower's avatar
batterseapower committed
168 169
	\$wMkT :: (Int,Int) -> T
	\$wMkT (x,y) = MkT x y
170 171 172 173 174 175 176 177 178 179 180 181
  Notice that the worker has two fields where the wapper has 
  just one.  That is, the worker has type
		MkT :: Int -> Int -> T

* Equality constraints for GADTs
	data T a where { MkT :: a -> T [a] }

  The worker gets a type with explicit equality
  constraints, thus:
	MkT :: forall a b. (a=[b]) => b -> T a

  The wrapper has the programmer-specified type:
batterseapower's avatar
batterseapower committed
182 183
	\$wMkT :: a -> T [a]
	\$wMkT a x = MkT [a] a [a] x
184
  The third argument is a coerion
185
	[a] :: [a]~[a]
186

187 188
INVARIANT: the dictionary constructor for a class
	   never has a wrapper.
189 190


191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
A note about the stupid context
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Data types can have a context:
	
	data (Eq a, Ord b) => T a b = T1 a b | T2 a

and that makes the constructors have a context too
(notice that T2's context is "thinned"):

	T1 :: (Eq a, Ord b) => a -> b -> T a b
	T2 :: (Eq a) => a -> T a b

Furthermore, this context pops up when pattern matching
(though GHC hasn't implemented this, but it is in H98, and
I've fixed GHC so that it now does):

	f (T2 x) = x
gets inferred type
	f :: Eq a => T a b -> a

I say the context is "stupid" because the dictionaries passed
are immediately discarded -- they do nothing and have no benefit.
It's a flaw in the language.

215 216 217 218 219 220 221 222 223
	Up to now [March 2002] I have put this stupid context into the
	type of the "wrapper" constructors functions, T1 and T2, but
	that turned out to be jolly inconvenient for generics, and
	record update, and other functions that build values of type T
	(because they don't have suitable dictionaries available).

	So now I've taken the stupid context out.  I simply deal with
	it separately in the type checker on occurrences of a
	constructor, either in an expression or in a pattern.
224

225 226 227 228 229 230
	[May 2003: actually I think this decision could evasily be
	reversed now, and probably should be.  Generics could be
	disabled for types with a stupid context; record updates now
	(H98) needs the context too; etc.  It's an unforced change, so
	I'm leaving it for now --- but it does seem odd that the
	wrapper doesn't include the stupid context.]
231

232 233 234 235
[July 04] With the advent of generalised data types, it's less obvious
what the "stupid context" is.  Consider
	C :: forall a. Ord a => a -> a -> T (Foo a)
Does the C constructor in Core contain the Ord dictionary?  Yes, it must:
236

237 238 239 240
	f :: T b -> Ordering
	f = /\b. \x:T b. 
	    case x of
		C a (d:Ord a) (p:a) (q:a) -> compare d p q
241

242
Note that (Foo a) might not be an instance of Ord.
243

244 245 246 247 248 249 250
%************************************************************************
%*									*
\subsection{Data constructors}
%*									*
%************************************************************************

\begin{code}
batterseapower's avatar
batterseapower committed
251
-- | A data constructor
252
data DataCon
253
  = MkData {
254 255
	dcName    :: Name,	-- This is the name of the *source data con*
				-- (see "Note [Data Constructor Naming]" above)
256
	dcUnique :: Unique, 	-- Cached from Name
batterseapower's avatar
batterseapower committed
257
	dcTag    :: ConTag,     -- ^ Tag, used for ordering 'DataCon's
258 259 260

	-- Running example:
	--
261 262
	-- 	*** As declared by the user
	--  data T a where
263
	--    MkT :: forall x y. (x~y,Ord x) => x -> y -> T (x,y)
264

265 266
	-- 	*** As represented internally
	--  data T a where
267
	--    MkT :: forall a. forall x y. (a~(x,y),x~y,Ord x) => x -> y -> T a
268
	-- 
269 270 271
	-- The next six fields express the type of the constructor, in pieces
	-- e.g.
	--
272 273
	--	dcUnivTyVars  = [a]
	--	dcExTyVars    = [x,y]
274
	--	dcEqSpec      = [a~(x,y)]
275
	--	dcOtherTheta  = [x~y, Ord x]	
Simon Peyton Jones's avatar
Simon Peyton Jones committed
276
	--	dcOrigArgTys  = [x,y]
277
	--	dcRepTyCon       = T
278 279 280 281

	dcVanilla :: Bool,	-- True <=> This is a vanilla Haskell 98 data constructor
				--	    Its type is of form
				--	        forall a1..an . t1 -> ... tm -> T a1..an
282
				-- 	    No existentials, no coercions, nothing.
283
				-- That is: dcExTyVars = dcEqSpec = dcOtherTheta = []
284 285 286 287 288
		-- NB 1: newtypes always have a vanilla data con
		-- NB 2: a vanilla constructor can still be declared in GADT-style 
		--	 syntax, provided its type looks like the above.
		--       The declaration format is held in the TyCon (algTcGadtSyntax)

simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
289
	dcUnivTyVars :: [TyVar],	-- Universally-quantified type vars [a,b,c]
290
					-- INVARIANT: length matches arity of the dcRepTyCon
simonpj@microsoft.com's avatar
simonpj@microsoft.com committed
291
					---           result type of (rep) data con is exactly (T a b c)
292

293 294
	dcExTyVars   :: [TyVar],	-- Existentially-quantified type vars 
		-- In general, the dcUnivTyVars are NOT NECESSARILY THE SAME AS THE TYVARS
295 296 297 298
		-- FOR THE PARENT TyCon. With GADTs the data con might not even have 
		-- the same number of type variables.
		-- [This is a change (Oct05): previously, vanilla datacons guaranteed to
		--  have the same type variables as their parent TyCon, but that seems ugly.]
299

300 301 302
	-- INVARIANT: the UnivTyVars and ExTyVars all have distinct OccNames
	-- Reason: less confusing, and easier to generate IfaceSyn

303
	dcEqSpec :: [(TyVar,Type)],	-- Equalities derived from the result type, 
Ian Lynagh's avatar
Ian Lynagh committed
304
					-- _as written by the programmer_
305 306
		-- This field allows us to move conveniently between the two ways
		-- of representing a GADT constructor's type:
307
		--	MkT :: forall a b. (a ~ [b]) => b -> T a
308
		--	MkT :: forall b. b -> T [b]
309
		-- Each equality is of the form (a ~ ty), where 'a' is one of 
310 311
		-- the universally quantified type variables
					
312 313 314
		-- The next two fields give the type context of the data constructor
		-- 	(aside from the GADT constraints, 
		--	 which are given by the dcExpSpec)
315 316
		-- In GADT form, this is *exactly* what the programmer writes, even if
		-- the context constrains only universally quantified variables
317
		--	MkT :: forall a b. (a ~ b, Ord b) => a -> T a b
318
	dcOtherTheta :: ThetaType,  -- The other constraints in the data con's type
319
		                    -- other than those in the dcEqSpec
320 321 322 323

	dcStupidTheta :: ThetaType,	-- The context of the data type declaration 
					--	data Eq a => T a = ...
					-- or, rather, a "thinned" version thereof
324 325 326 327
		-- "Thinned", because the Report says
		-- to eliminate any constraints that don't mention
		-- tyvars free in the arg types for this constructor
		--
328 329
		-- INVARIANT: the free tyvars of dcStupidTheta are a subset of dcUnivTyVars
		-- Reason: dcStupidTeta is gotten by thinning the stupid theta from the tycon
330
		-- 
331 332 333 334
		-- "Stupid", because the dictionaries aren't used for anything.  
		-- Indeed, [as of March 02] they are no longer in the type of 
		-- the wrapper Id, because that makes it harder to use the wrap-id 
		-- to rebuild values after record selection or in generics.
335

336
	dcOrigArgTys :: [Type],		-- Original argument types
337
					-- (before unboxing and flattening of strict fields)
338
	dcOrigResTy :: Type,		-- Original result type, as seen by the user
339 340 341 342
		-- NB: for a data instance, the original user result type may 
		-- differ from the DataCon's representation TyCon.  Example
		--	data instance T [a] where MkT :: a -> T [a]
		-- The OrigResTy is T [a], but the dcRepTyCon might be :T123
343

344
	-- Now the strictness annotations and field labels of the constructor
345
        -- See Note [Bangs on data constructor arguments]
346
	dcArgBangs :: [HsBang],
347
		-- Strictness annotations as decided by the compiler.  
348 349
		-- Matches 1-1 with dcOrigArgTys
		-- Hence length = dataConSourceArity dataCon
350 351 352

	dcFields  :: [FieldLabel],
		-- Field labels for this constructor, in the
353
		-- same order as the dcOrigArgTys; 
354
		-- length = 0 (if not a record) or dataConSourceArity.
355

356 357 358 359 360
	-- The curried worker function that corresponds to the constructor:
	-- It doesn't have an unfolding; the code generator saturates these Ids
	-- and allocates a real constructor when it finds one.
	dcWorkId :: Id,

361
	-- Constructor representation
362
        dcRep      :: DataConRep,
363

364 365 366
        -- Cached
        dcRepArity    :: Arity,  -- == length dataConRepArgTys
        dcSourceArity :: Arity,  -- == length dcOrigArgTys
367

368 369 370
	-- Result type of constructor is T t1..tn
	dcRepTyCon  :: TyCon,		-- Result tycon, T

371
	dcRepType   :: Type,	-- Type of the constructor
372
				-- 	forall a x y. (a~(x,y), x~y, Ord x) =>
373
                                --        x -> y -> T a
374
				-- (this is *not* of the constructor wrapper Id:
375
				--  see Note [Data con representation] below)
376 377
	-- Notice that the existential type parameters come *second*.  
	-- Reason: in a case expression we may find:
378 379
	--	case (e :: T t) of
        --        MkT x y co1 co2 (d:Ord x) (v:r) (w:F s) -> ...
380
	-- It's convenient to apply the rep-type of MkT to 't', to get
381
	--	forall x y. (t~(x,y), x~y, Ord x) => x -> y -> T t
382
	-- and use that to check the pattern.  Mind you, this is really only
383
	-- used in CoreLint.
384 385


386
	dcInfix :: Bool,	-- True <=> declared infix
387 388
				-- Used for Template Haskell and 'deriving' only
				-- The actual fixity is stored elsewhere
389 390

        dcPromoted :: Maybe TyCon    -- The promoted TyCon if this DataCon is promotable
391
                                     -- See Note [Promoted data constructors] in TyCon
392
  }
393
  deriving Data.Typeable.Typeable
394

395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
data DataConRep 
  = NoDataConRep              -- No wrapper

  | DCR { dcr_wrap_id :: Id   -- Takes src args, unboxes/flattens, 
                              -- and constructs the representation

        , dcr_boxer   :: DataConBoxer

        , dcr_arg_tys :: [Type]  -- Final, representation argument types, 
                                 -- after unboxing and flattening,
                                 -- and *including* all evidence args

        , dcr_stricts :: [StrictnessMark]  -- 1-1 with dcr_arg_tys
		-- See also Note [Data-con worker strictness] in MkId.lhs

        , dcr_bangs :: [HsBang]  -- The actual decisions made (including failures)
                                 -- 1-1 with orig_arg_tys
412 413
                                 -- See Note [Bangs on data constructor arguments]

414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
    }
-- Algebraic data types always have a worker, and
-- may or may not have a wrapper, depending on whether
-- the wrapper does anything.  
--
-- Data types have a worker with no unfolding
-- Newtypes just have a worker, which has a compulsory unfolding (just a cast)

-- _Neither_ the worker _nor_ the wrapper take the dcStupidTheta dicts as arguments

-- The wrapper (if it exists) takes dcOrigArgTys as its arguments
-- The worker takes dataConRepArgTys as its arguments
-- If the worker is absent, dataConRepArgTys is the same as dcOrigArgTys

-- The 'NoDataConRep' case is important
-- Not only is this efficient,
-- but it also ensures that the wrapper is replaced
-- by the worker (because it *is* the worker)
-- even when there are no args. E.g. in
-- 		f (:) x
-- the (:) *is* the worker.
-- This is really important in rule matching,
-- (We could match on the wrappers,
-- but that makes it less likely that rules will match
-- when we bring bits of unfoldings together.)

440 441 442 443
-------------------------
-- HsBang describes what the *programmer* wrote
-- This info is retained in the DataCon.dcStrictMarks field
data HsBang 
444 445 446 447 448
  = HsUserBang   -- The user's source-code request
       (Maybe Bool)       -- Just True    {-# UNPACK #-}
                          -- Just False   {-# NOUNPACK #-}
                          -- Nothing      no pragma
       Bool               -- True <=> '!' specified
449

450 451
  | HsNoBang	          -- Lazy field
                          -- HsUserBang Nothing False means the same as HsNoBang
452 453 454 455

  | HsUnpack              -- Definite commitment: this field is strict and unboxed
       (Maybe Coercion)   --    co :: arg-ty ~ product-ty

456
  | HsStrict              -- Definite commitment: this field is strict but not unboxed
457 458 459 460 461 462
  deriving (Data.Data, Data.Typeable)

-------------------------
-- StrictnessMark is internal only, used to indicate strictness 
-- of the DataCon *worker* fields
data StrictnessMark = MarkedStrict | NotMarkedStrict	
463 464
\end{code}

465 466
Note [Data con representation]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
467
The dcRepType field contains the type of the representation of a contructor
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
This may differ from the type of the contructor *Id* (built
by MkId.mkDataConId) for two reasons:
	a) the constructor Id may be overloaded, but the dictionary isn't stored
	   e.g.    data Eq a => T a = MkT a a

	b) the constructor may store an unboxed version of a strict field.

Here's an example illustrating both:
	data Ord a => T a = MkT Int! a
Here
	T :: Ord a => Int -> a -> T a
but the rep type is
	Trep :: Int# -> a -> T a
Actually, the unboxed part isn't implemented yet!

483 484 485 486 487
Note [Bangs on data constructor arguments]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider
  data T = MkT !Int {-# UNPACK #-} !Int Bool
Its dcArgBangs field records the *users* specifications, in this case
488 489 490
    [ HsUserBang Nothing True
    , HsUserBang (Just True) True
    , HsNoBang]
491 492 493 494 495 496 497 498 499 500 501 502 503
See the declaration of HsBang in BasicTypes

The dcr_bangs field of the dcRep field records the *actual, decided*
representation of the data constructor.  Without -O this might be
    [HsStrict, HsStrict, HsNoBang]
With -O it might be
    [HsStrict, HsUnpack, HsNoBang]
With -funbox-small-strict-fields it might be
    [HsUnpack, HsUnpack, HsNoBang]

For imported data types, the dcArgBangs field is just the same as the
dcr_bangs field; we don't know what the user originally said.

504

505 506 507 508 509 510
%************************************************************************
%*									*
\subsection{Instances}
%*									*
%************************************************************************

511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
\begin{code}
instance Eq DataCon where
    a == b = getUnique a == getUnique b
    a /= b = getUnique a /= getUnique b

instance Ord DataCon where
    a <= b = getUnique a <= getUnique b
    a <	 b = getUnique a <  getUnique b
    a >= b = getUnique a >= getUnique b
    a >	 b = getUnique a > getUnique b
    compare a b = getUnique a `compare` getUnique b

instance Uniquable DataCon where
    getUnique = dcUnique

instance NamedThing DataCon where
    getName = dcName

instance Outputable DataCon where
    ppr con = ppr (dataConName con)

532 533 534 535
instance OutputableBndr DataCon where
    pprInfixOcc con = pprInfixName (dataConName con)
    pprPrefixOcc con = pprPrefixName (dataConName con)

536 537 538 539 540
instance Data.Data DataCon where
    -- don't traverse?
    toConstr _   = abstractConstr "DataCon"
    gunfold _ _  = error "gunfold"
    dataTypeOf _ = mkNoRepType "DataCon"
541 542

instance Outputable HsBang where
543 544 545 546 547 548 549 550 551 552
    ppr HsNoBang               = empty
    ppr (HsUserBang prag bang) = pp_unpk prag <+> ppWhen bang (char '!')
    ppr (HsUnpack Nothing)     = ptext (sLit "Unpk")
    ppr (HsUnpack (Just co))   = ptext (sLit "Unpk") <> parens (ppr co)
    ppr HsStrict               = ptext (sLit "SrictNotUnpacked")

pp_unpk :: Maybe Bool -> SDoc
pp_unpk Nothing      = empty
pp_unpk (Just True)  = ptext (sLit "{-# UNPACK #-}")
pp_unpk (Just False) = ptext (sLit "{-# NOUNPACK #-}")
553 554 555 556 557 558 559

instance Outputable StrictnessMark where
  ppr MarkedStrict     = ptext (sLit "!")
  ppr NotMarkedStrict  = empty


eqHsBang :: HsBang -> HsBang -> Bool
560
eqHsBang HsNoBang             HsNoBang             = True
561
eqHsBang HsStrict             HsStrict             = True
562
eqHsBang (HsUserBang u1 b1)   (HsUserBang u2 b2)   = u1==u2 && b1==b2
563 564 565 566 567
eqHsBang (HsUnpack Nothing)   (HsUnpack Nothing)   = True
eqHsBang (HsUnpack (Just c1)) (HsUnpack (Just c2)) = eqType (coercionType c1) (coercionType c2)
eqHsBang _ _ = False

isBanged :: HsBang -> Bool
568 569 570
isBanged HsNoBang                  = False
isBanged (HsUserBang Nothing bang) = bang
isBanged _                         = True
571 572 573 574

isMarkedStrict :: StrictnessMark -> Bool
isMarkedStrict NotMarkedStrict = False
isMarkedStrict _               = True   -- All others are strict
575 576
\end{code}

577 578 579

%************************************************************************
%*									*
580
\subsection{Construction}
581 582 583
%*									*
%************************************************************************

584
\begin{code}
batterseapower's avatar
batterseapower committed
585
-- | Build a new data constructor
586
mkDataCon :: Name 
batterseapower's avatar
batterseapower committed
587
	  -> Bool	        -- ^ Is the constructor declared infix?
588
	  -> [HsBang]           -- ^ Strictness annotations written in the source file
589 590
	  -> [FieldLabel]       -- ^ Field labels for the constructor, if it is a record, 
				--   otherwise empty
batterseapower's avatar
batterseapower committed
591 592 593 594
	  -> [TyVar]            -- ^ Universally quantified type variables
	  -> [TyVar]            -- ^ Existentially quantified type variables
	  -> [(TyVar,Type)]     -- ^ GADT equalities
	  -> ThetaType          -- ^ Theta-type occuring before the arguments proper
595 596 597 598 599
	  -> [Type]             -- ^ Original argument types
	  -> Type		-- ^ Original result type
	  -> TyCon              -- ^ Representation type constructor
	  -> ThetaType          -- ^ The "stupid theta", context of the data declaration 
				--   e.g. @data Eq a => T a ...@
600 601
          -> Id                 -- ^ Worker Id
	  -> DataConRep         -- ^ Representation
602 603 604
	  -> DataCon
  -- Can get the tag from the TyCon

605
mkDataCon name declared_infix
606
	  arg_stricts	-- Must match orig_arg_tys 1-1
607
	  fields
608 609
	  univ_tvs ex_tvs 
	  eq_spec theta
610
	  orig_arg_tys orig_res_ty rep_tycon
611
	  stupid_theta work_id rep
612 613 614 615 616 617 618 619
-- Warning: mkDataCon is not a good place to check invariants. 
-- If the programmer writes the wrong result type in the decl, thus:
--	data T a where { MkT :: S }
-- then it's possible that the univ_tvs may hit an assertion failure
-- if you pull on univ_tvs.  This case is checked by checkValidDataCon,
-- so the error is detected properly... it's just that asaertions here
-- are a little dodgy.

620
  = con
621
  where
622
    is_vanilla = null ex_tvs && null eq_spec && null theta
623
    con = MkData {dcName = name, dcUnique = nameUnique name, 
624 625 626
		  dcVanilla = is_vanilla, dcInfix = declared_infix,
	  	  dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs, 
		  dcEqSpec = eq_spec, 
627
		  dcOtherTheta = theta,
628
		  dcStupidTheta = stupid_theta, 
629
		  dcOrigArgTys = orig_arg_tys, dcOrigResTy = orig_res_ty,
630
		  dcRepTyCon = rep_tycon, 
631 632 633 634 635 636
		  dcArgBangs = arg_stricts, 
		  dcFields = fields, dcTag = tag, dcRepType = rep_ty,
		  dcWorkId = work_id,
                  dcRep = rep, 
                  dcSourceArity = length orig_arg_tys,
                  dcRepArity = length rep_arg_tys,
637
                  dcPromoted = mb_promoted }
638

639 640 641
	-- The 'arg_stricts' passed to mkDataCon are simply those for the
	-- source-language arguments.  We add extra ones for the
	-- dictionary arguments right here.
642

643
    tag = assoc "mkDataCon" (tyConDataCons rep_tycon `zip` [fIRST_TAG..]) con
644 645 646 647
    rep_arg_tys = dataConRepArgTys con
    rep_ty = mkForAllTys univ_tvs $ mkForAllTys ex_tvs $ 
	     mkFunTys rep_arg_tys $
	     mkTyConApp rep_tycon (mkTyVarTys univ_tvs)
648

649
    mb_promoted   -- See Note [Promoted data constructors] in TyCon
650 651 652
      | isJust (promotableTyCon_maybe rep_tycon)
          -- The TyCon is promotable only if all its datacons
          -- are, so the promoteType for prom_kind should succeed
653
      = Just (mkPromotedDataCon con name (getUnique name) prom_kind roles)
654 655 656
      | otherwise 
      = Nothing          
    prom_kind = promoteType (dataConUserType con)
657 658
    roles = map (const Nominal)          (univ_tvs ++ ex_tvs) ++
            map (const Representational) orig_arg_tys
659

660
eqSpecPreds :: [(TyVar,Type)] -> ThetaType
661
eqSpecPreds spec = [ mkEqPred (mkTyVarTy tv) ty | (tv,ty) <- spec ]
662 663
\end{code}

Simon Peyton Jones's avatar
Simon Peyton Jones committed
664 665 666 667 668 669 670 671 672
Note [Unpack equality predicates]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If we have a GADT with a contructor C :: (a~[b]) => b -> T a
we definitely want that equality predicate *unboxed* so that it
takes no space at all.  This is easily done: just give it
an UNPACK pragma. The rest of the unpack/repack code does the
heavy lifting.  This one line makes every GADT take a word less
space for each equality predicate, so it's pretty important!

673
\begin{code}
batterseapower's avatar
batterseapower committed
674
-- | The 'Name' of the 'DataCon', giving it a unique, rooted identification
675 676 677
dataConName :: DataCon -> Name
dataConName = dcName

batterseapower's avatar
batterseapower committed
678
-- | The tag used for ordering 'DataCon's
679 680 681
dataConTag :: DataCon -> ConTag
dataConTag  = dcTag

batterseapower's avatar
batterseapower committed
682
-- | The type constructor that we are building via this data constructor
683
dataConTyCon :: DataCon -> TyCon
684
dataConTyCon = dcRepTyCon
685

686 687 688 689 690 691 692 693
-- | The original type constructor used in the definition of this data
-- constructor.  In case of a data family instance, that will be the family
-- type constructor.
dataConOrigTyCon :: DataCon -> TyCon
dataConOrigTyCon dc 
  | Just (tc, _) <- tyConFamInst_maybe (dcRepTyCon dc) = tc
  | otherwise                                          = dcRepTyCon dc

batterseapower's avatar
batterseapower committed
694 695
-- | The representation type of the data constructor, i.e. the sort
-- type that will represent values of this type at runtime
696 697
dataConRepType :: DataCon -> Type
dataConRepType = dcRepType
698

batterseapower's avatar
batterseapower committed
699
-- | Should the 'DataCon' be presented infix?
700 701 702
dataConIsInfix :: DataCon -> Bool
dataConIsInfix = dcInfix

batterseapower's avatar
batterseapower committed
703
-- | The universally-quantified type variables of the constructor
704 705 706
dataConUnivTyVars :: DataCon -> [TyVar]
dataConUnivTyVars = dcUnivTyVars

batterseapower's avatar
batterseapower committed
707
-- | The existentially-quantified type variables of the constructor
708 709 710
dataConExTyVars :: DataCon -> [TyVar]
dataConExTyVars = dcExTyVars

batterseapower's avatar
batterseapower committed
711
-- | Both the universal and existentiatial type variables of the constructor
712 713 714 715
dataConAllTyVars :: DataCon -> [TyVar]
dataConAllTyVars (MkData { dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs })
  = univ_tvs ++ ex_tvs

batterseapower's avatar
batterseapower committed
716 717
-- | Equalities derived from the result type of the data constructor, as written
-- by the programmer in any GADT declaration
718 719 720
dataConEqSpec :: DataCon -> [(TyVar,Type)]
dataConEqSpec = dcEqSpec

721 722 723 724
-- | The *full* constraints on the constructor type
dataConTheta :: DataCon -> ThetaType
dataConTheta (MkData { dcEqSpec = eq_spec, dcOtherTheta = theta }) 
  = eqSpecPreds eq_spec ++ theta
725

batterseapower's avatar
batterseapower committed
726 727 728 729
-- | Get the Id of the 'DataCon' worker: a function that is the "actual"
-- constructor and has no top level binding in the program. The type may
-- be different from the obvious one written in the source program. Panics
-- if there is no such 'Id' for this 'DataCon'
730
dataConWorkId :: DataCon -> Id
731
dataConWorkId dc = dcWorkId dc
732

batterseapower's avatar
batterseapower committed
733 734 735 736
-- | Get the Id of the 'DataCon' wrapper: a function that wraps the "actual"
-- constructor so it has the type visible in the source program: c.f. 'dataConWorkId'.
-- Returns Nothing if there is no wrapper, which occurs for an algebraic data constructor 
-- and also for a newtype (whose constructor is inlined compulsorily)
737
dataConWrapId_maybe :: DataCon -> Maybe Id
738 739 740
dataConWrapId_maybe dc = case dcRep dc of
                           NoDataConRep -> Nothing
                           DCR { dcr_wrap_id = wrap_id } -> Just wrap_id
741

batterseapower's avatar
batterseapower committed
742 743 744
-- | Returns an Id which looks like the Haskell-source constructor by using
-- the wrapper if it exists (see 'dataConWrapId_maybe') and failing over to
-- the worker (see 'dataConWorkId')
745
dataConWrapId :: DataCon -> Id
746 747 748
dataConWrapId dc = case dcRep dc of
                     NoDataConRep-> dcWorkId dc    -- worker=wrapper
                     DCR { dcr_wrap_id = wrap_id } -> wrap_id
749

batterseapower's avatar
batterseapower committed
750 751
-- | Find all the 'Id's implicitly brought into scope by the data constructor. Currently,
-- the union of the 'dataConWorkId' and the 'dataConWrapId'
752
dataConImplicitIds :: DataCon -> [Id]
753 754 755 756
dataConImplicitIds (MkData { dcWorkId = work, dcRep = rep})
  = case rep of
       NoDataConRep               -> [work]
       DCR { dcr_wrap_id = wrap } -> [wrap,work]
757

batterseapower's avatar
batterseapower committed
758
-- | The labels for the fields of this particular 'DataCon'
759 760 761
dataConFieldLabels :: DataCon -> [FieldLabel]
dataConFieldLabels = dcFields

batterseapower's avatar
batterseapower committed
762
-- | Extract the type for any given labelled field of the 'DataCon'
763
dataConFieldType :: DataCon -> FieldLabel -> Type
764 765 766 767
dataConFieldType con label
  = case lookup label (dcFields con `zip` dcOrigArgTys con) of
      Just ty -> ty
      Nothing -> pprPanic "dataConFieldType" (ppr con <+> ppr label)
768

batterseapower's avatar
batterseapower committed
769 770
-- | The strictness markings decided on by the compiler.  Does not include those for
-- existential dictionaries.  The list is in one-to-one correspondence with the arity of the 'DataCon'
771
dataConStrictMarks :: DataCon -> [HsBang]
772
dataConStrictMarks = dcArgBangs
773

batterseapower's avatar
batterseapower committed
774
-- | Source-level arity of the data constructor
775
dataConSourceArity :: DataCon -> Arity
776
dataConSourceArity (MkData { dcSourceArity = arity }) = arity
777

batterseapower's avatar
batterseapower committed
778 779 780
-- | Gives the number of actual fields in the /representation/ of the 
-- data constructor. This may be more than appear in the source code;
-- the extra ones are the existentially quantified dictionaries
781
dataConRepArity :: DataCon -> Arity
782 783
dataConRepArity (MkData { dcRepArity = arity }) = arity

784

785 786 787 788 789
-- | The number of fields in the /representation/ of the constructor
-- AFTER taking into account the unpacking of any unboxed tuple fields
dataConRepRepArity :: DataCon -> RepArity
dataConRepRepArity dc = typeRepArity (dataConRepArity dc) (dataConRepType dc)

batterseapower's avatar
batterseapower committed
790 791
-- | Return whether there are any argument types for this 'DataCon's original source type
isNullarySrcDataCon :: DataCon -> Bool
792
isNullarySrcDataCon dc = null (dcOrigArgTys dc)
batterseapower's avatar
batterseapower committed
793 794 795

-- | Return whether there are any argument types for this 'DataCon's runtime representation type
isNullaryRepDataCon :: DataCon -> Bool
796
isNullaryRepDataCon dc = dataConRepArity dc == 0
797

798
dataConRepStrictness :: DataCon -> [StrictnessMark]
batterseapower's avatar
batterseapower committed
799 800
-- ^ Give the demands on the arguments of a
-- Core constructor application (Con dc args)
801 802 803 804 805 806 807 808 809 810 811 812
dataConRepStrictness dc = case dcRep dc of
                            NoDataConRep -> [NotMarkedStrict | _ <- dataConRepArgTys dc]
                            DCR { dcr_stricts = strs } -> strs

dataConRepBangs :: DataCon -> [HsBang]
dataConRepBangs dc = case dcRep dc of
                       NoDataConRep -> dcArgBangs dc
                       DCR { dcr_bangs = bangs } -> bangs

dataConBoxer :: DataCon -> Maybe DataConBoxer
dataConBoxer (MkData { dcRep = DCR { dcr_boxer = boxer } }) = Just boxer
dataConBoxer _ = Nothing 
813

batterseapower's avatar
batterseapower committed
814 815 816 817 818 819 820 821 822 823
-- | The \"signature\" of the 'DataCon' returns, in order:
--
-- 1) The result of 'dataConAllTyVars',
--
-- 2) All the 'ThetaType's relating to the 'DataCon' (coercion, dictionary, implicit
--    parameter - whatever)
--
-- 3) The type arguments to the constructor
--
-- 4) The /original/ result type of the 'DataCon'
824
dataConSig :: DataCon -> ([TyVar], ThetaType, [Type], Type)
825 826
dataConSig (MkData {dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs, 
		    dcEqSpec = eq_spec, dcOtherTheta  = theta, 
827
		    dcOrigArgTys = arg_tys, dcOrigResTy = res_ty})
828
  = (univ_tvs ++ ex_tvs, eqSpecPreds eq_spec ++ theta, arg_tys, res_ty)
829

batterseapower's avatar
batterseapower committed
830 831 832 833 834 835 836 837 838 839
-- | The \"full signature\" of the 'DataCon' returns, in order:
--
-- 1) The result of 'dataConUnivTyVars'
--
-- 2) The result of 'dataConExTyVars'
--
-- 3) The result of 'dataConEqSpec'
--
-- 4) The result of 'dataConDictTheta'
--
840 841
-- 5) The original argument types to the 'DataCon' (i.e. before 
--    any change of the representation of the type)
batterseapower's avatar
batterseapower committed
842 843
--
-- 6) The original result type of the 'DataCon'
844
dataConFullSig :: DataCon 
845 846 847
	       -> ([TyVar], [TyVar], [(TyVar,Type)], ThetaType, [Type], Type)
dataConFullSig (MkData {dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs, 
			dcEqSpec = eq_spec, dcOtherTheta = theta,
848
			dcOrigArgTys = arg_tys, dcOrigResTy = res_ty})
849
  = (univ_tvs, ex_tvs, eq_spec, theta, arg_tys, res_ty)
850 851 852

dataConOrigResTy :: DataCon -> Type
dataConOrigResTy dc = dcOrigResTy dc
853

batterseapower's avatar
batterseapower committed
854 855 856
-- | The \"stupid theta\" of the 'DataCon', such as @data Eq a@ in:
--
-- > data Eq a => T a = ...
857 858 859
dataConStupidTheta :: DataCon -> ThetaType
dataConStupidTheta dc = dcStupidTheta dc

860
dataConUserType :: DataCon -> Type
batterseapower's avatar
batterseapower committed
861 862 863 864 865 866 867
-- ^ The user-declared type of the data constructor
-- in the nice-to-read form:
--
-- > T :: forall a b. a -> b -> T [a]
--
-- rather than:
--
868
-- > T :: forall a c. forall b. (c~[a]) => a -> b -> T c
batterseapower's avatar
batterseapower committed
869
--
870 871
-- NB: If the constructor is part of a data instance, the result type
-- mentions the family tycon, not the internal one.
872 873
dataConUserType  (MkData { dcUnivTyVars = univ_tvs, 
			   dcExTyVars = ex_tvs, dcEqSpec = eq_spec,
874
			   dcOtherTheta = theta, dcOrigArgTys = arg_tys,
875
			   dcOrigResTy = res_ty })
876
  = mkForAllTys ((univ_tvs `minusList` map fst eq_spec) ++ ex_tvs) $
batterseapower's avatar
batterseapower committed
877
    mkFunTys theta $
878
    mkFunTys arg_tys $
879
    res_ty
880

batterseapower's avatar
batterseapower committed
881 882 883 884 885
-- | Finds the instantiated types of the arguments required to construct a 'DataCon' representation
-- NB: these INCLUDE any dictionary args
--     but EXCLUDE the data-declaration context, which is discarded
-- It's all post-flattening etc; this is a representation type
dataConInstArgTys :: DataCon	-- ^ A datacon with no existentials or equality constraints
886 887
				-- However, it can have a dcTheta (notably it can be a 
				-- class dictionary, with superclasses)
batterseapower's avatar
batterseapower committed
888 889
	      	  -> [Type] 	-- ^ Instantiated at these types
	      	  -> [Type]
890
dataConInstArgTys dc@(MkData {dcUnivTyVars = univ_tvs, dcEqSpec = eq_spec,
mnislaih's avatar
mnislaih committed
891
			      dcExTyVars = ex_tvs}) inst_tys
892 893 894
 = ASSERT2( length univ_tvs == length inst_tys
          , ptext (sLit "dataConInstArgTys") <+> ppr dc $$ ppr univ_tvs $$ ppr inst_tys)
   ASSERT2( null ex_tvs && null eq_spec, ppr dc )
895
   map (substTyWith univ_tvs inst_tys) (dataConRepArgTys dc)
896

897 898
-- | Returns just the instantiated /value/ argument types of a 'DataCon',
-- (excluding dictionary args)
899 900 901 902
dataConInstOrigArgTys 
	:: DataCon	-- Works for any DataCon
	-> [Type]	-- Includes existential tyvar args, but NOT
			-- equality constraints or dicts
batterseapower's avatar
batterseapower committed
903
	-> [Type]
904 905
-- For vanilla datacons, it's all quite straightforward
-- But for the call in MatchCon, we really do want just the value args
906
dataConInstOrigArgTys dc@(MkData {dcOrigArgTys = arg_tys,
mnislaih's avatar
mnislaih committed
907 908
			          dcUnivTyVars = univ_tvs, 
			          dcExTyVars = ex_tvs}) inst_tys
909
  = ASSERT2( length tyvars == length inst_tys
Ian Lynagh's avatar
Ian Lynagh committed
910
          , ptext (sLit "dataConInstOrigArgTys") <+> ppr dc $$ ppr tyvars $$ ppr inst_tys )
911 912 913
    map (substTyWith tyvars inst_tys) arg_tys
  where
    tyvars = univ_tvs ++ ex_tvs
914 915
\end{code}

916
\begin{code}
batterseapower's avatar
batterseapower committed
917 918
-- | Returns the argument types of the wrapper, excluding all dictionary arguments
-- and without substituting for any type variables
919 920 921
dataConOrigArgTys :: DataCon -> [Type]
dataConOrigArgTys dc = dcOrigArgTys dc

922
-- | Returns the arg types of the worker, including *all* evidence, after any 
batterseapower's avatar
batterseapower committed
923
-- flattening has been done and without substituting for any type variables
924
dataConRepArgTys :: DataCon -> [Type]
925 926 927 928 929 930 931
dataConRepArgTys (MkData { dcRep = rep 
                         , dcEqSpec = eq_spec
                         , dcOtherTheta = theta
		         , dcOrigArgTys = orig_arg_tys })
  = case rep of
      NoDataConRep -> ASSERT( null eq_spec ) theta ++ orig_arg_tys
      DCR { dcr_arg_tys = arg_tys } -> arg_tys
932 933
\end{code}

934
\begin{code}
batterseapower's avatar
batterseapower committed
935 936
-- | The string @package:module.name@ identifying a constructor, which is attached
-- to its info table and used by the GHCi debugger and the heap profiler
937
dataConIdentity :: DataCon -> [Word8]
batterseapower's avatar
batterseapower committed
938
-- We want this string to be UTF-8, so we get the bytes directly from the FastStrings.
939 940 941 942
dataConIdentity dc = bytesFS (packageIdFS (modulePackageId mod)) ++ 
                  fromIntegral (ord ':') : bytesFS (moduleNameFS (moduleName mod)) ++
                  fromIntegral (ord '.') : bytesFS (occNameFS (nameOccName name))
  where name = dataConName dc
943
        mod  = ASSERT( isExternalName name ) nameModule name
944 945
\end{code}

946
\begin{code}
947 948
isTupleDataCon :: DataCon -> Bool
isTupleDataCon (MkData {dcRepTyCon = tc}) = isTupleTyCon tc
949 950
	
isUnboxedTupleCon :: DataCon -> Bool
951
isUnboxedTupleCon (MkData {dcRepTyCon = tc}) = isUnboxedTupleTyCon tc
952

batterseapower's avatar
batterseapower committed
953
-- | Vanilla 'DataCon's are those that are nice boring Haskell 98 constructors
954 955
isVanillaDataCon :: DataCon -> Bool
isVanillaDataCon dc = dcVanilla dc
956
\end{code}
957

958 959 960 961
\begin{code}
classDataCon :: Class -> DataCon
classDataCon clas = case tyConDataCons (classTyCon clas) of
		      (dict_constr:no_more) -> ASSERT( null no_more ) dict_constr 
962
		      [] -> panic "classDataCon"