ByteCodeGen.lhs 49.1 KB
Newer Older
1
%
2
% (c) The University of Glasgow 2002
3 4 5 6
%
\section[ByteCodeGen]{Generate bytecode from Core}

\begin{code}
7
module ByteCodeGen ( UnlinkedBCO, byteCodeGen, coreExprToBCOs ) where
8 9 10

#include "HsVersions.h"

11 12 13
import ByteCodeInstr
import ByteCodeFFI	( mkMarshalCode, moan64 )
import ByteCodeAsm	( CompiledByteCode(..), UnlinkedBCO, 
14 15 16
			  assembleBCO, assembleBCOs, iNTERP_STACK_CHECK_THRESH )
import ByteCodeLink	( lookupStaticPtr )

17
import Outputable
18
import Name		( Name, getName, mkSystemVarName )
19 20
import Id
import FiniteMap
21
import ForeignCall	( ForeignCall(..), CCallTarget(..), CCallSpec(..) )
22
import HscTypes		( TypeEnv, typeEnvTyCons, typeEnvClasses )
23
import CoreUtils	( exprType )
24
import CoreSyn
25
import PprCore		( pprCoreExpr )
26
import Literal		( Literal(..), literalType )
27
import PrimOp		( PrimOp(..) )
28
import CoreFVs		( freeVars )
29
import Type		( isUnLiftedType, splitTyConApp_maybe )
30
import DataCon		( DataCon, dataConTag, fIRST_TAG, dataConTyCon, 
31
                          isUnboxedTupleCon, isNullaryRepDataCon, dataConWorkId,
32
			  dataConRepArity )
33 34
import TyCon		( TyCon, tyConFamilySize, isDataTyCon, 
			  tyConDataCons, isUnboxedTupleTyCon )
35
import Class		( Class, classTyCon )
36
import Type		( Type, repType, splitFunTys, dropForAlls, pprType )
37
import Util
38
import DataCon		( dataConRepArity )
39
import Var		( isTyVar )
40
import VarSet		( VarSet, varSetElems )
41
import TysPrim		( arrayPrimTyCon, mutableArrayPrimTyCon,
42 43
			  byteArrayPrimTyCon, mutableByteArrayPrimTyCon
			)
44
import DynFlags	( DynFlags, DynFlag(..) )
45
import ErrUtils		( showPass, dumpIfSet_dyn )
46
import Unique		( mkPseudoUniqueE )
47
import FastString	( FastString(..), unpackFS )
48
import Panic		( GhcException(..) )
49 50
import SMRep		( typeCgRep, arrWordsHdrSize, arrPtrsHdrSize, StgWord, 
			  CgRep(..), cgRepSizeW, isFollowableArg, idCgRep )
51
import Bitmap		( intsToReverseBitmap, mkBitmap )
52
import OrdList
53
import Constants	( wORD_SIZE )
54

55
import Data.List	( intersperse, sortBy, zip4, zip6, partition )
56 57
import Foreign		( Ptr, castPtr, mallocBytes, pokeByteOff, Word8,
			  withForeignPtr )
58
import Foreign.C	( CInt )
59
import Control.Exception	( throwDyn )
60

61
import GHC.Exts		( Int(..), ByteArray# )
62

63
import Control.Monad	( when )
64
import Data.Char	( ord, chr )
65

66 67
-- -----------------------------------------------------------------------------
-- Generating byte code for a complete module 
68

69
byteCodeGen :: DynFlags
70
            -> [CoreBind]
71
	    -> [TyCon]
72
            -> IO CompiledByteCode
73
byteCodeGen dflags binds tycs
74 75
   = do showPass dflags "ByteCodeGen"

76 77
        let flatBinds = [ (bndr, freeVars rhs) 
			| (bndr, rhs) <- flattenBinds binds]
78

79
        (BcM_State final_ctr mallocd, proto_bcos)
80
           <- runBc (mapM schemeTopBind flatBinds)
81

sof's avatar
sof committed
82
        when (notNull mallocd)
83 84
             (panic "ByteCodeGen.byteCodeGen: missing final emitBc?")

85
        dumpIfSet_dyn dflags Opt_D_dump_BCOs
86
           "Proto-BCOs" (vcat (intersperse (char ' ') (map ppr proto_bcos)))
87

88
        assembleBCOs proto_bcos tycs
89
        
90 91
-- -----------------------------------------------------------------------------
-- Generating byte code for an expression
92

93 94 95 96
-- Returns: (the root BCO for this expression, 
--           a list of auxilary BCOs resulting from compiling closures)
coreExprToBCOs :: DynFlags
	       -> CoreExpr
97
               -> IO UnlinkedBCO
98 99
coreExprToBCOs dflags expr
 = do showPass dflags "ByteCodeGen"
100 101 102

      -- create a totally bogus name for the top-level BCO; this
      -- should be harmless, since it's never used for anything
103
      let invented_name  = mkSystemVarName (mkPseudoUniqueE 0) FSLIT("ExprTopLevel")
104 105
          invented_id    = mkLocalId invented_name (panic "invented_id's type")
	  
106
      (BcM_State final_ctr mallocd, proto_bco) 
107
         <- runBc (schemeTopBind (invented_id, freeVars expr))
108

sof's avatar
sof committed
109
      when (notNull mallocd)
110 111
           (panic "ByteCodeGen.coreExprToBCOs: missing final emitBc?")

112
      dumpIfSet_dyn dflags Opt_D_dump_BCOs "Proto-BCOs" (ppr proto_bco)
113

114
      assembleBCO proto_bco
115 116


117 118
-- -----------------------------------------------------------------------------
-- Compilation schema for the bytecode generator
119 120 121

type BCInstrList = OrdList BCInstr

122 123 124 125 126 127
type Sequel = Int	-- back off to this depth before ENTER

-- Maps Ids to the offset from the stack _base_ so we don't have
-- to mess with it after each push/pop.
type BCEnv = FiniteMap Id Int	-- To find vars on the stack

128 129 130 131 132 133
ppBCEnv :: BCEnv -> SDoc
ppBCEnv p
   = text "begin-env"
     $$ nest 4 (vcat (map pp_one (sortBy cmp_snd (fmToList p))))
     $$ text "end-env"
     where
134
        pp_one (var, offset) = int offset <> colon <+> ppr var <+> ppr (idCgRep var)
135
        cmp_snd x y = compare (snd x) (snd y)
136

137 138
-- Create a BCO and do a spot of peephole optimisation on the insns
-- at the same time.
139 140 141 142 143 144 145
mkProtoBCO
   :: name
   -> BCInstrList
   -> Either  [AnnAlt Id VarSet] (AnnExpr Id VarSet)
   -> Int
   -> Int
   -> [StgWord]
146
   -> Bool   	-- True <=> is a return point, rather than a function
147 148
   -> [Ptr ()]
   -> ProtoBCO name
149 150
mkProtoBCO nm instrs_ordlist origin arity bitmap_size bitmap
  is_ret mallocd_blocks
151 152 153 154 155 156 157 158 159
   = ProtoBCO {
	protoBCOName = nm,
	protoBCOInstrs = maybe_with_stack_check,
	protoBCOBitmap = bitmap,
	protoBCOBitmapSize = bitmap_size,
	protoBCOArity = arity,
	protoBCOExpr = origin,
	protoBCOPtrs = mallocd_blocks
      }
160
     where
161 162 163 164 165 166 167 168
        -- Overestimate the stack usage (in words) of this BCO,
        -- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit
        -- stack check.  (The interpreter always does a stack check
        -- for iNTERP_STACK_CHECK_THRESH words at the start of each
        -- BCO anyway, so we only need to add an explicit on in the
        -- (hopefully rare) cases when the (overestimated) stack use
        -- exceeds iNTERP_STACK_CHECK_THRESH.
        maybe_with_stack_check
169 170 171 172
	   | is_ret = peep_d
		-- don't do stack checks at return points;
		-- everything is aggregated up to the top BCO
		-- (which must be a function)
173 174 175 176
           | stack_overest >= 65535
           = pprPanic "mkProtoBCO: stack use won't fit in 16 bits" 
                      (int stack_overest)
           | stack_overest >= iNTERP_STACK_CHECK_THRESH
177
           = STKCHECK stack_overest : peep_d
178 179 180 181 182 183 184 185
           | otherwise
           = peep_d	-- the supposedly common case
             
        stack_overest = sum (map bciStackUse peep_d)

        -- Merge local pushes
        peep_d = peep (fromOL instrs_ordlist)

186 187 188
        peep (PUSH_L off1 : PUSH_L off2 : PUSH_L off3 : rest)
           = PUSH_LLL off1 (off2-1) (off3-2) : peep rest
        peep (PUSH_L off1 : PUSH_L off2 : rest)
189
           = PUSH_LL off1 (off2-1) : peep rest
190 191 192 193 194
        peep (i:rest)
           = i : peep rest
        peep []
           = []

195
argBits :: [CgRep] -> [Bool]
196 197
argBits [] = []
argBits (rep : args)
198 199
  | isFollowableArg rep = False : argBits args
  | otherwise = take (cgRepSizeW rep) (repeat True) ++ argBits args
200

201 202 203 204 205 206 207 208 209
-- -----------------------------------------------------------------------------
-- schemeTopBind

-- Compile code for the right-hand side of a top-level binding

schemeTopBind :: (Id, AnnExpr Id VarSet) -> BcM (ProtoBCO Name)


schemeTopBind (id, rhs)
210
  | Just data_con <- isDataConWorkId_maybe id,
211
    isNullaryRepDataCon data_con
212
  = 	-- Special case for the worker of a nullary data con.
213
	-- It'll look like this:	Nil = /\a -> Nil a
214
	-- If we feed it into schemeR, we'll get 
215
	--	Nil = Nil
216 217
	-- because mkConAppCode treats nullary constructor applications
	-- by just re-using the single top-level definition.  So
218
	-- for the worker itself, we must allocate it directly.
219
    emitBc (mkProtoBCO (getName id) (toOL [PACK data_con 0, ENTER])
220
                       (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-})
221 222 223 224

  | otherwise
  = schemeR [{- No free variables -}] (id, rhs)

225 226
-- -----------------------------------------------------------------------------
-- schemeR
227

228 229 230 231 232
-- Compile code for a right-hand side, to give a BCO that,
-- when executed with the free variables and arguments on top of the stack,
-- will return with a pointer to the result on top of the stack, after
-- removing the free variables and arguments.
--
233 234
-- Park the resulting BCO in the monad.  Also requires the
-- variable to which this value was bound, so as to give the
235 236 237 238 239 240 241 242
-- resulting BCO a name. 

schemeR :: [Id] 		-- Free vars of the RHS, ordered as they
				-- will appear in the thunk.  Empty for
				-- top-level things, which have no free vars.
	-> (Id, AnnExpr Id VarSet)
	-> BcM (ProtoBCO Name)
schemeR fvs (nm, rhs) 
243
{-
244 245 246 247 248 249 250 251
   | trace (showSDoc (
              (char ' '
               $$ (ppr.filter (not.isTyVar).varSetElems.fst) rhs
               $$ pprCoreExpr (deAnnotate rhs)
               $$ char ' '
              ))) False
   = undefined
   | otherwise
252 253
-}
   = schemeR_wrk fvs nm rhs (collect [] rhs)
254

255 256 257
collect xs (_, AnnNote note e) = collect xs e
collect xs (_, AnnLam x e)     = collect (if isTyVar x then xs else (x:xs)) e
collect xs (_, not_lambda)     = (reverse xs, not_lambda)
258

259
schemeR_wrk fvs nm original_body (args, body)
260 261 262
   = let 
	 all_args  = reverse args ++ fvs
	 arity     = length all_args
263
	 -- all_args are the args in reverse order.  We're compiling a function
264 265 266 267
	 -- \fv1..fvn x1..xn -> e 
	 -- i.e. the fvs come first

         szsw_args = map idSizeW all_args
268
         szw_args  = sum szsw_args
269
         p_init    = listToFM (zip all_args (mkStackOffsets 0 szsw_args))
270 271

	 -- make the arg bitmap
272
	 bits = argBits (reverse (map idCgRep all_args))
273 274
	 bitmap_size = length bits
	 bitmap = mkBitmap bits
275 276
     in
     schemeE szw_args 0 p_init body 		`thenBc` \ body_code ->
277
     emitBc (mkProtoBCO (getName nm) body_code (Right original_body)
278
		arity bitmap_size bitmap False{-not alts-})
279

280

281 282 283 284 285 286 287 288 289 290 291 292
fvsToEnv :: BCEnv -> VarSet -> [Id]
-- Takes the free variables of a right-hand side, and
-- delivers an ordered list of the local variables that will
-- be captured in the thunk for the RHS
-- The BCEnv argument tells which variables are in the local
-- environment: these are the ones that should be captured
--
-- The code that constructs the thunk, and the code that executes
-- it, have to agree about this layout
fvsToEnv p fvs = [v | v <- varSetElems fvs, 
		      isId v,		-- Could be a type variable
		      v `elemFM` p]
293

294 295
-- -----------------------------------------------------------------------------
-- schemeE
296 297 298

-- Compile code to apply the given expression to the remaining args
-- on the stack, returning a HNF.
299
schemeE :: Int -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
300 301

-- Delegate tail-calls to schemeT.
302 303
schemeE d s p e@(AnnApp f a) 
   = schemeT d s p e
304

305
schemeE d s p e@(AnnVar v)
306 307
   | not (isUnLiftedType v_type)
   =  -- Lifted-type thing; push it in the normal way
308
     schemeT d s p e
309

310
   | otherwise
311 312
   = -- Returning an unlifted value.  
     -- Heave it on the stack, SLIDE, and RETURN.
313
     pushAtom d p (AnnVar v)	`thenBc` \ (push, szw) ->
314 315
     returnBc (push 			-- value onto stack
               `appOL`  mkSLIDE szw (d-s) 	-- clear to sequel
316
               `snocOL` RETURN_UBX v_rep)	-- go
317
   where
318
      v_type = idType v
319
      v_rep = typeCgRep v_type
320

321
schemeE d s p (AnnLit literal)
322
   = pushAtom d p (AnnLit literal)	`thenBc` \ (push, szw) ->
323
     let l_rep = typeCgRep (literalType literal)
324
     in  returnBc (push 			-- value onto stack
325
                   `appOL`  mkSLIDE szw (d-s) 	-- clear to sequel
326
                   `snocOL` RETURN_UBX l_rep)	-- go
327

328

329 330
schemeE d s p (AnnLet (AnnNonRec x (_,rhs)) (_,body))
   | (AnnVar v, args_r_to_l) <- splitApp rhs,
331
     Just data_con <- isDataConWorkId_maybe v,
332
     dataConRepArity data_con == length args_r_to_l
333
   = 	-- Special case for a non-recursive let whose RHS is a 
334
	-- saturatred constructor application.
335 336 337 338
	-- Just allocate the constructor and carry on
     mkConAppCode d s p data_con args_r_to_l	`thenBc` \ alloc_code ->
     schemeE (d+1) s (addToFM p x d) body	`thenBc` \ body_code ->
     returnBc (alloc_code `appOL` body_code)
339 340 341

-- General case for let.  Generates correct, if inefficient, code in
-- all situations.
342
schemeE d s p (AnnLet binds (_,body))
343 344
   = let (xs,rhss) = case binds of AnnNonRec x rhs  -> ([x],[rhs])
                                   AnnRec xs_n_rhss -> unzip xs_n_rhss
345
         n_binds = length xs
346

347
         fvss  = map (fvsToEnv p' . fst) rhss
348

sof's avatar
sof committed
349 350
         -- Sizes of free vars
         sizes = map (\rhs_fvs -> sum (map idSizeW rhs_fvs)) fvss
351 352 353

	 -- the arity of each rhs
	 arities = map (length . fst . collect []) rhss
354 355 356 357 358

         -- This p', d' defn is safe because all the items being pushed
         -- are ptrs, so all have size 1.  d' and p' reflect the stack
         -- after the closures have been allocated in the heap (but not
         -- filled in), and pointers to them parked on the stack.
359 360
         p'    = addListToFM p (zipE xs (mkStackOffsets d (nOfThem n_binds 1)))
         d'    = d + n_binds
361 362 363
         zipE  = zipEqual "schemeE"

         -- ToDo: don't build thunks for things with no free variables
364 365 366 367 368 369
         build_thunk dd [] size bco off arity
            = returnBc (PUSH_BCO bco `consOL` unitOL (mkap (off+size) size))
	   where 
		mkap | arity == 0 = MKAP
		     | otherwise  = MKPAP
         build_thunk dd (fv:fvs) size bco off arity = do
370
              (push_code, pushed_szw) <- pushAtom dd p' (AnnVar fv) 
371
              more_push_code <- build_thunk (dd+pushed_szw) fvs size bco off arity
372 373
              returnBc (push_code `appOL` more_push_code)

374 375 376
         alloc_code = toOL (zipWith mkAlloc sizes arities)
	   where mkAlloc sz 0     = ALLOC_AP sz
		 mkAlloc sz arity = ALLOC_PAP arity sz
377

378
	 compile_bind d' fvs x rhs size arity off = do
379
		bco <- schemeR fvs (x,rhs)
380
		build_thunk d' fvs size bco off arity
381

382
	 compile_binds = 
383 384 385
	    [ compile_bind d' fvs x rhs size arity n
	    | (fvs, x, rhs, size, arity, n) <- 
		zip6 fvss xs rhss sizes arities [n_binds, n_binds-1 .. 1]
386 387
	    ]
     in do
388
     body_code <- schemeE d' s p' body
389 390
     thunk_codes <- sequence compile_binds
     returnBc (alloc_code `appOL` concatOL thunk_codes `appOL` body_code)
391 392


393

394
schemeE d s p (AnnCase scrut bndr _ [(DataAlt dc, [bind1, bind2], rhs)])
395
   | isUnboxedTupleCon dc, VoidArg <- typeCgRep (idType bind1)
396
	-- Convert 
397
	--	case .... of x { (# VoidArg'd-thing, a #) -> ... }
398 399 400 401 402 403 404
	-- to
	-- 	case .... of a { DEFAULT -> ... }
	-- becuse the return convention for both are identical.
	--
	-- Note that it does not matter losing the void-rep thing from the
	-- envt (it won't be bound now) because we never look such things up.

405
   = --trace "automagic mashing of case alts (# VoidArg, a #)" $
406 407
     doCase d s p scrut bind2 [(DEFAULT, [], rhs)] True{-unboxed tuple-}

408 409
   | isUnboxedTupleCon dc, VoidArg <- typeCgRep (idType bind2)
   = --trace "automagic mashing of case alts (# a, VoidArg #)" $
410
     doCase d s p scrut bind1 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
411

412
schemeE d s p (AnnCase scrut bndr _ [(DataAlt dc, [bind1], rhs)])
413
   | isUnboxedTupleCon dc
414 415 416 417
	-- Similarly, convert
	--	case .... of x { (# a #) -> ... }
	-- to
	--	case .... of a { DEFAULT -> ... }
418 419
   = --trace "automagic mashing of case alts (# a #)"  $
     doCase d s p scrut bind1 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
420

421
schemeE d s p (AnnCase scrut bndr _ alts)
422
   = doCase d s p scrut bndr alts False{-not an unboxed tuple-}
423

424
schemeE d s p (AnnNote note (_, body))
425 426 427 428
   = schemeE d s p body

schemeE d s p other
   = pprPanic "ByteCodeGen.schemeE: unhandled case" 
429
               (pprCoreExpr (deAnnotate' other))
430 431


432 433 434 435 436
-- Compile code to do a tail call.  Specifically, push the fn,
-- slide the on-stack app back down to the sequel depth,
-- and enter.  Four cases:
--
-- 0.  (Nasty hack).
437
--     An application "GHC.Prim.tagToEnum# <type> unboxed-int".
438 439
--     The int will be on the stack.  Generate a code sequence
--     to convert it to the relevant constructor, SLIDE and ENTER.
440
--
441
-- 1.  The fn denotes a ccall.  Defer to generateCCall.
442
--
443
-- 2.  (Another nasty hack).  Spot (# a::VoidArg, b #) and treat
444
--     it simply as  b  -- since the representations are identical
445
--     (the VoidArg takes up zero stack space).  Also, spot
446
--     (# b #) and treat it as  b.
447
--
448
-- 3.  Application of a constructor, by defn saturated.
449 450 451
--     Split the args into ptrs and non-ptrs, and push the nonptrs, 
--     then the ptrs, and then do PACK and RETURN.
--
452
-- 4.  Otherwise, it must be a function call.  Push the args
453
--     right to left, SLIDE and ENTER.
454 455

schemeT :: Int 		-- Stack depth
456 457
        -> Sequel 	-- Sequel depth
        -> BCEnv 	-- stack env
458
        -> AnnExpr' Id VarSet 
459
        -> BcM BCInstrList
460

461
schemeT d s p app
462

463 464 465
--   | trace ("schemeT: env in = \n" ++ showSDocDebug (ppBCEnv p)) False
--   = panic "schemeT ?!?!"

466
--   | trace ("\nschemeT\n" ++ showSDoc (pprCoreExpr (deAnnotate' app)) ++ "\n") False
467 468
--   = error "?!?!" 

469
   -- Case 0
470
   | Just (arg, constr_names) <- maybe_is_tagToEnum_call
471
   = pushAtom d p arg	 		`thenBc` \ (push, arg_words) ->
472 473 474 475 476
     implement_tagToId constr_names	`thenBc` \ tagToId_sequence ->
     returnBc (push `appOL`  tagToId_sequence            
                    `appOL`  mkSLIDE 1 (d+arg_words-s)
                    `snocOL` ENTER)

477
   -- Case 1
478 479 480
   | Just (CCall ccall_spec) <- isFCallId_maybe fn
   = generateCCall d s p ccall_spec fn args_r_to_l

481
   -- Case 2: Constructor application
482
   | Just con <- maybe_saturated_dcon,
483 484
     isUnboxedTupleCon con
   = case args_r_to_l of
485
	[arg1,arg2] | isVoidArgAtom arg1 -> 
486
		  unboxedTupleReturn d s p arg2
487
	[arg1,arg2] | isVoidArgAtom arg2 -> 
488 489 490 491
		  unboxedTupleReturn d s p arg1
	_other -> unboxedTupleException

   -- Case 3: Ordinary data constructor
492
   | Just con <- maybe_saturated_dcon
493 494 495 496 497 498
   = mkConAppCode d s p con args_r_to_l	`thenBc` \ alloc_con ->
     returnBc (alloc_con	 `appOL` 
               mkSLIDE 1 (d - s) `snocOL`
               ENTER)

   -- Case 4: Tail call of function 
499
   | otherwise
500
   = doTailCall d s p fn args_r_to_l
501

502 503 504 505
   where
      -- Detect and extract relevant info for the tagToEnum kludge.
      maybe_is_tagToEnum_call
         = let extract_constr_Names ty
506 507 508 509 510 511 512 513
		 | Just (tyc, []) <- splitTyConApp_maybe (repType ty),
		   isDataTyCon tyc
		   = map (getName . dataConWorkId) (tyConDataCons tyc)
		   -- NOTE: use the worker name, not the source name of
		   -- the DataCon.  See DataCon.lhs for details.
		 | otherwise
		   = panic "maybe_is_tagToEnum_call.extract_constr_Ids"
           in
514
           case app of
515
              (AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType t)) arg)
516
                 -> case isPrimOpId_maybe v of
517 518
                       Just TagToEnumOp -> Just (snd arg, extract_constr_Names t)
		       other		-> Nothing
519 520
              other -> Nothing

521 522 523 524
	-- Extract the args (R->L) and fn
	-- The function will necessarily be a variable, 
	-- because we are compiling a tail call
      (AnnVar fn, args_r_to_l) = splitApp app
525

526
      -- Only consider this to be a constructor application iff it is
527
      -- saturated.  Otherwise, we'll call the constructor wrapper.
528 529
      n_args = length args_r_to_l
      maybe_saturated_dcon  
530
	= case isDataConWorkId_maybe fn of
531 532
		Just con | dataConRepArity con == n_args -> Just con
		_ -> Nothing
533

534
-- -----------------------------------------------------------------------------
535 536 537 538 539 540 541 542 543
-- Generate code to build a constructor application, 
-- leaving it on top of the stack

mkConAppCode :: Int -> Sequel -> BCEnv
	     -> DataCon 		-- The data constructor
	     -> [AnnExpr' Id VarSet] 	-- Args, in *reverse* order
	     -> BcM BCInstrList

mkConAppCode orig_d s p con []	-- Nullary constructor
544
  = ASSERT( isNullaryRepDataCon con )
545
    returnBc (unitOL (PUSH_G (getName (dataConWorkId con))))
546 547 548 549 550 551
	-- Instead of doing a PACK, which would allocate a fresh
	-- copy of this constructor, use the single shared version.

mkConAppCode orig_d s p con args_r_to_l 
  = ASSERT( dataConRepArity con == length args_r_to_l )
    do_pushery orig_d (non_ptr_args ++ ptr_args)
552 553 554
 where
	-- The args are already in reverse order, which is the way PACK
	-- expects them to be.  We must push the non-ptrs after the ptrs.
555
      (ptr_args, non_ptr_args) = partition isPtrAtom args_r_to_l
556 557

      do_pushery d (arg:args)
558
         = pushAtom d p arg			`thenBc` \ (push, arg_words) ->
559 560
           do_pushery (d+arg_words) args	`thenBc` \ more_push_code ->
           returnBc (push `appOL` more_push_code)
561
      do_pushery d []
562 563 564 565
         = returnBc (unitOL (PACK con n_arg_words))
	 where
	   n_arg_words = d - orig_d

566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

-- -----------------------------------------------------------------------------
-- Returning an unboxed tuple with one non-void component (the only
-- case we can handle).
--
-- Remember, we don't want to *evaluate* the component that is being
-- returned, even if it is a pointed type.  We always just return.

unboxedTupleReturn
	:: Int -> Sequel -> BCEnv
	-> AnnExpr' Id VarSet -> BcM BCInstrList
unboxedTupleReturn d s p arg = do
  (push, sz) <- pushAtom d p arg
  returnBc (push `appOL`
	    mkSLIDE sz (d-s) `snocOL`
	    RETURN_UBX (atomRep arg))

-- -----------------------------------------------------------------------------
-- Generate code for a tail-call

doTailCall
	:: Int -> Sequel -> BCEnv
	-> Id -> [AnnExpr' Id VarSet]
	-> BcM BCInstrList
doTailCall init_d s p fn args
591
  = do_pushes init_d args (map atomRep args)
592 593
  where
  do_pushes d [] reps = do
594
	ASSERT( null reps ) return ()
595
        (push_fn, sz) <- pushAtom d p (AnnVar fn)
596
	ASSERT( sz == 1 ) return ()
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
	returnBc (push_fn `appOL` (
		  mkSLIDE ((d-init_d) + 1) (init_d - s) `appOL`
		  unitOL ENTER))
  do_pushes d args reps = do
      let (push_apply, n, rest_of_reps) = findPushSeq reps
	  (these_args, rest_of_args) = splitAt n args
      (next_d, push_code) <- push_seq d these_args
      instrs <- do_pushes (next_d + 1) rest_of_args rest_of_reps 
		--                ^^^ for the PUSH_APPLY_ instruction
      returnBc (push_code `appOL` (push_apply `consOL` instrs))

  push_seq d [] = return (d, nilOL)
  push_seq d (arg:args) = do
    (push_code, sz) <- pushAtom d p arg 
    (final_d, more_push_code) <- push_seq (d+sz) args
    return (final_d, push_code `appOL` more_push_code)

-- v. similar to CgStackery.findMatch, ToDo: merge
615
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: rest)
616
  = (PUSH_APPLY_PPPPPP, 6, rest)
617
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: rest)
618
  = (PUSH_APPLY_PPPPP, 5, rest)
619
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: rest)
620
  = (PUSH_APPLY_PPPP, 4, rest)
621
findPushSeq (PtrArg: PtrArg: PtrArg: rest)
622
  = (PUSH_APPLY_PPP, 3, rest)
623
findPushSeq (PtrArg: PtrArg: rest)
624
  = (PUSH_APPLY_PP, 2, rest)
625
findPushSeq (PtrArg: rest)
626
  = (PUSH_APPLY_P, 1, rest)
627
findPushSeq (VoidArg: rest)
628
  = (PUSH_APPLY_V, 1, rest)
629
findPushSeq (NonPtrArg: rest)
630
  = (PUSH_APPLY_N, 1, rest)
631
findPushSeq (FloatArg: rest)
632
  = (PUSH_APPLY_F, 1, rest)
633
findPushSeq (DoubleArg: rest)
634
  = (PUSH_APPLY_D, 1, rest)
635
findPushSeq (LongArg: rest)
636 637 638 639 640 641 642 643 644 645 646
  = (PUSH_APPLY_L, 1, rest)
findPushSeq _
  = panic "ByteCodeGen.findPushSeq"

-- -----------------------------------------------------------------------------
-- Case expressions

doCase  :: Int -> Sequel -> BCEnv
	-> AnnExpr Id VarSet -> Id -> [AnnAlt Id VarSet]
	-> Bool  -- True <=> is an unboxed tuple case, don't enter the result
	-> BcM BCInstrList
647 648
doCase d s p (_,scrut)
 bndr alts is_unboxed_tuple
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
  = let
        -- Top of stack is the return itbl, as usual.
        -- underneath it is the pointer to the alt_code BCO.
        -- When an alt is entered, it assumes the returned value is
        -- on top of the itbl.
        ret_frame_sizeW = 2

	-- An unlifted value gets an extra info table pushed on top
	-- when it is returned.
	unlifted_itbl_sizeW | isAlgCase = 0
	  		    | otherwise = 1

	-- depth of stack after the return value has been pushed
	d_bndr = d + ret_frame_sizeW + idSizeW bndr

	-- depth of stack after the extra info table for an unboxed return
	-- has been pushed, if any.  This is the stack depth at the
	-- continuation.
        d_alts = d_bndr + unlifted_itbl_sizeW

        -- Env in which to compile the alts, not including
        -- any vars bound by the alts themselves
        p_alts = addToFM p bndr (d_bndr - 1)

	bndr_ty = idType bndr
        isAlgCase = not (isUnLiftedType bndr_ty) && not is_unboxed_tuple

        -- given an alt, return a discr and code for it.
677
	codeALt alt@(DEFAULT, _, (_,rhs))
678 679
	   = schemeE d_alts s p_alts rhs	`thenBc` \ rhs_code ->
	     returnBc (NoDiscr, rhs_code)
680
        codeAlt alt@(discr, bndrs, (_,rhs))
681 682 683 684 685 686 687
	   -- primitive or nullary constructor alt: no need to UNPACK
	   | null real_bndrs = do
		rhs_code <- schemeE d_alts s p_alts rhs
                returnBc (my_discr alt, rhs_code)
	   -- algebraic alt with some binders
           | ASSERT(isAlgCase) otherwise =
             let
688
		 (ptrs,nptrs) = partition (isFollowableArg.idCgRep) real_bndrs
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
		 ptr_sizes    = map idSizeW ptrs
		 nptrs_sizes  = map idSizeW nptrs
		 bind_sizes   = ptr_sizes ++ nptrs_sizes
		 size         = sum ptr_sizes + sum nptrs_sizes
		 -- the UNPACK instruction unpacks in reverse order...
		 p' = addListToFM p_alts 
			(zip (reverse (ptrs ++ nptrs))
			  (mkStackOffsets d_alts (reverse bind_sizes)))
	     in do
	     rhs_code <- schemeE (d_alts+size) s p' rhs
             return (my_discr alt, unitOL (UNPACK size) `appOL` rhs_code)
	   where
	     real_bndrs = filter (not.isTyVar) bndrs


        my_discr (DEFAULT, binds, rhs) = NoDiscr {-shouldn't really happen-}
        my_discr (DataAlt dc, binds, rhs) 
           | isUnboxedTupleCon dc
           = unboxedTupleException
           | otherwise
           = DiscrP (dataConTag dc - fIRST_TAG)
        my_discr (LitAlt l, binds, rhs)
           = case l of MachInt i     -> DiscrI (fromInteger i)
                       MachFloat r   -> DiscrF (fromRational r)
                       MachDouble r  -> DiscrD (fromRational r)
714
                       MachChar i    -> DiscrI (ord i)
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
                       _ -> pprPanic "schemeE(AnnCase).my_discr" (ppr l)

        maybe_ncons 
           | not isAlgCase = Nothing
           | otherwise 
           = case [dc | (DataAlt dc, _, _) <- alts] of
                []     -> Nothing
                (dc:_) -> Just (tyConFamilySize (dataConTyCon dc))

	-- the bitmap is relative to stack depth d, i.e. before the
	-- BCO, info table and return value are pushed on.
	-- This bit of code is v. similar to buildLivenessMask in CgBindery,
	-- except that here we build the bitmap from the known bindings of
	-- things that are pointers, whereas in CgBindery the code builds the
	-- bitmap from the free slots and unboxed bindings.
	-- (ToDo: merge?)
731
	bitmap = intsToReverseBitmap d{-size-} (sortLe (<=) rel_slots)
732 733 734 735
	  where
	  binds = fmToList p
	  rel_slots = concat (map spread binds)
	  spread (id, offset)
736
		| isFollowableArg (idCgRep id) = [ rel_offset ]
737 738 739 740 741 742 743 744 745
		| otherwise = []
		where rel_offset = d - offset - 1

     in do
     alt_stuff <- mapM codeAlt alts
     alt_final <- mkMultiBranch maybe_ncons alt_stuff
     let 
         alt_bco_name = getName bndr
         alt_bco = mkProtoBCO alt_bco_name alt_final (Left alts)
746
			0{-no arity-} d{-bitmap size-} bitmap True{-is alts-}
747 748 749 750 751 752 753
     -- in
--     trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
--	     "\n      bitmap = " ++ show bitmap) $ do
     scrut_code <- schemeE (d + ret_frame_sizeW) (d + ret_frame_sizeW) p scrut
     alt_bco' <- emitBc alt_bco
     let push_alts
	    | isAlgCase = PUSH_ALTS alt_bco'
754
	    | otherwise = PUSH_ALTS_UNLIFTED alt_bco' (typeCgRep bndr_ty)
755 756 757 758 759 760 761
     returnBc (push_alts `consOL` scrut_code)


-- -----------------------------------------------------------------------------
-- Deal with a CCall.

-- Taggedly push the args onto the stack R->L,
762 763
-- deferencing ForeignObj#s and adjusting addrs to point to
-- payloads in Ptr/Byte arrays.  Then, generate the marshalling
764 765 766
-- (machine) code for the ccall, and create bytecodes to call that and
-- then return in the right way.  

767
generateCCall :: Int -> Sequel 		-- stack and sequel depths
768
              -> BCEnv
769
              -> CCallSpec		-- where to call
770
              -> Id 			-- of target, for type info
771
              -> [AnnExpr' Id VarSet]	-- args (atoms)
772
              -> BcM BCInstrList
773

774 775 776
generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
   = let 
         -- useful constants
777
         addr_sizeW = cgRepSizeW NonPtrArg
778 779 780 781

         -- Get the args on the stack, with tags and suitably
         -- dereferenced for the CCall.  For each arg, return the
         -- depth to the first word of the bits for that arg, and the
782
         -- CgRep of what was actually pushed.
783

784
         pargs d [] = returnBc []
785
         pargs d (a:az) 
786 787 788
            = let arg_ty = repType (exprType (deAnnotate' a))

              in case splitTyConApp_maybe arg_ty of
789 790
                    -- Don't push the FO; instead push the Addr# it
                    -- contains.
791 792
		    Just (t, _)
		     | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
793
                       -> pargs (d + addr_sizeW) az	`thenBc` \ rest ->
794 795
                          parg_ArrayishRep arrPtrsHdrSize d p a
							`thenBc` \ code ->
796
                          returnBc ((code,NonPtrArg):rest)
797

798
		     | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
799
                       -> pargs (d + addr_sizeW) az	`thenBc` \ rest ->
800 801
                          parg_ArrayishRep arrWordsHdrSize d p a
							`thenBc` \ code ->
802
                          returnBc ((code,NonPtrArg):rest)
803

804 805
                    -- Default case: push taggedly, but otherwise intact.
                    other
806
                       -> pushAtom d p a		`thenBc` \ (code_a, sz_a) ->
807
                          pargs (d+sz_a) az		`thenBc` \ rest ->
808
                          returnBc ((code_a, atomRep a) : rest)
809 810 811 812

         -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
         -- the stack but then advance it over the headers, so as to
         -- point to the payload.
813
         parg_ArrayishRep hdrSize d p a
814
            = pushAtom d p a `thenBc` \ (push_fo, _) ->
815
              -- The ptr points at the header.  Advance it over the
816
              -- header and then pretend this is an Addr#.
817
              returnBc (push_fo `snocOL` SWIZZLE 0 hdrSize)
818

819
     in
820
         pargs d0 args_r_to_l			`thenBc` \ code_n_reps ->
821 822
     let
         (pushs_arg, a_reps_pushed_r_to_l) = unzip code_n_reps
823 824

         push_args    = concatOL pushs_arg
825
         d_after_args = d0 + sum (map cgRepSizeW a_reps_pushed_r_to_l)
826
         a_reps_pushed_RAW
827
            | null a_reps_pushed_r_to_l || head a_reps_pushed_r_to_l /= VoidArg
828 829 830 831 832 833 834 835 836
            = panic "ByteCodeGen.generateCCall: missing or invalid World token?"
            | otherwise
            = reverse (tail a_reps_pushed_r_to_l)

         -- Now: a_reps_pushed_RAW are the reps which are actually on the stack.
         -- push_args is the code to do that.
         -- d_after_args is the stack depth once the args are on.

         -- Get the result rep.
837
         (returns_void, r_rep)
838
            = case maybe_getCCallReturnRep (idType fn) of
839
                 Nothing -> (True,  VoidArg)
840 841 842 843
                 Just rr -> (False, rr) 
         {-
         Because the Haskell stack grows down, the a_reps refer to 
         lowest to highest addresses in that order.  The args for the call
844
         are on the stack.  Now push an unboxed Addr# indicating
845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
         the C function to call.  Then push a dummy placeholder for the 
         result.  Finally, emit a CCALL insn with an offset pointing to the 
         Addr# just pushed, and a literal field holding the mallocville
         address of the piece of marshalling code we generate.
         So, just prior to the CCALL insn, the stack looks like this 
         (growing down, as usual):
                 
            <arg_n>
            ...
            <arg_1>
            Addr# address_of_C_fn
            <placeholder-for-result#> (must be an unboxed type)

         The interpreter then calls the marshall code mentioned
         in the CCALL insn, passing it (& <placeholder-for-result#>), 
         that is, the addr of the topmost word in the stack.
         When this returns, the placeholder will have been
         filled in.  The placeholder is slid down to the sequel
         depth, and we RETURN.

         This arrangement makes it simple to do f-i-dynamic since the Addr#
866
         value is the first arg anyway.
867 868 869 870 871 872 873 874 875 876

         The marshalling code is generated specifically for this
         call site, and so knows exactly the (Haskell) stack
         offsets of the args, fn address and placeholder.  It
         copies the args to the C stack, calls the stacked addr,
         and parks the result back in the placeholder.  The interpreter
         calls it as a normal C call, assuming it has a signature
            void marshall_code ( StgWord* ptr_to_top_of_stack )
         -}
         -- resolve static address
877
         get_target_info
878 879
            = case target of
                 DynamicTarget
880
                    -> returnBc (False, panic "ByteCodeGen.generateCCall(dyn)")
881
                 StaticTarget target
882 883
                    -> ioToBc (lookupStaticPtr target) `thenBc` \res ->
                       returnBc (True, res)
884 885 886
     in
         get_target_info	`thenBc` \ (is_static, static_target_addr) ->
     let
887 888

         -- Get the arg reps, zapping the leading Addr# in the dynamic case
889
         a_reps --  | trace (showSDoc (ppr a_reps_pushed_RAW)) False = error "???"
890 891
                | is_static = a_reps_pushed_RAW
                | otherwise = if null a_reps_pushed_RAW 
892
                              then panic "ByteCodeGen.generateCCall: dyn with no args"
893
                              else tail a_reps_pushed_RAW
894 895 896 897

         -- push the Addr#
         (push_Addr, d_after_Addr)
            | is_static
898 899
            = (toOL [PUSH_UBX (Right static_target_addr) addr_sizeW],
               d_after_args + addr_sizeW)
900
            | otherwise	-- is already on the stack
901
            = (nilOL, d_after_args)
902 903

         -- Push the return placeholder.  For a call returning nothing,
904 905
         -- this is a VoidArg (tag).
         r_sizeW   = cgRepSizeW r_rep
906
         d_after_r = d_after_Addr + r_sizeW
907 908 909
         r_lit     = mkDummyLiteral r_rep
         push_r    = (if   returns_void 
                      then nilOL 
910
                      else unitOL (PUSH_UBX (Left r_lit) r_sizeW))
911