ByteCodeGen.lhs 49.3 KB
Newer Older
1
%
2
% (c) The University of Glasgow 2002
3 4 5 6
%
\section[ByteCodeGen]{Generate bytecode from Core}

\begin{code}
7
module ByteCodeGen ( UnlinkedBCO, byteCodeGen, coreExprToBCOs ) where
8 9 10

#include "HsVersions.h"

11 12 13
import ByteCodeInstr
import ByteCodeFFI	( mkMarshalCode, moan64 )
import ByteCodeAsm	( CompiledByteCode(..), UnlinkedBCO, 
14 15 16
			  assembleBCO, assembleBCOs, iNTERP_STACK_CHECK_THRESH )
import ByteCodeLink	( lookupStaticPtr )

17
import Outputable
18
import Name		( Name, getName, mkSystemName )
19 20
import Id
import FiniteMap
21
import ForeignCall	( ForeignCall(..), CCallTarget(..), CCallSpec(..) )
22
import HscTypes		( TypeEnv, typeEnvTyCons, typeEnvClasses )
23
import CoreUtils	( exprType )
24
import CoreSyn
25
import PprCore		( pprCoreExpr )
26
import Literal		( Literal(..), literalType )
27
import PrimOp		( PrimOp(..) )
28
import CoreFVs		( freeVars )
29
import Type		( isUnLiftedType, splitTyConApp_maybe )
30
import DataCon		( DataCon, dataConTag, fIRST_TAG, dataConTyCon, 
31
                          isUnboxedTupleCon, isNullaryDataCon, dataConWorkId,
32
			  dataConRepArity )
33
import TyCon		( tyConFamilySize, isDataTyCon, tyConDataCons,
34
			  isUnboxedTupleTyCon )
35
import Class		( Class, classTyCon )
36
import Type		( Type, repType, splitFunTys, dropForAlls, pprType )
37
import Util
38
import DataCon		( dataConRepArity )
39
import Var		( isTyVar )
40
import VarSet		( VarSet, varSetElems )
41
import TysPrim		( arrayPrimTyCon, mutableArrayPrimTyCon,
42 43
			  byteArrayPrimTyCon, mutableByteArrayPrimTyCon
			)
44 45
import CmdLineOpts	( DynFlags, DynFlag(..) )
import ErrUtils		( showPass, dumpIfSet_dyn )
46
import Unique		( mkPseudoUniqueE )
47
import FastString	( FastString(..), unpackFS )
48
import Panic		( GhcException(..) )
49 50
import SMRep		( typeCgRep, arrWordsHdrSize, arrPtrsHdrSize, StgWord, 
			  CgRep(..), cgRepSizeW, isFollowableArg, idCgRep )
51
import Bitmap		( intsToReverseBitmap, mkBitmap )
52
import OrdList
53
import Constants	( wORD_SIZE )
54

55
import Data.List	( intersperse, sortBy, zip4, zip5, partition )
56
import Foreign		( Ptr, castPtr, mallocBytes, pokeByteOff, Word8 )
57
import Foreign.C	( CInt )
58
import Control.Exception	( throwDyn )
59

60
import GHC.Exts		( Int(..), ByteArray# )
61

62
import Control.Monad	( when )
63
import Data.Char	( ord, chr )
64

65 66
-- -----------------------------------------------------------------------------
-- Generating byte code for a complete module 
67

68
byteCodeGen :: DynFlags
69 70
            -> [CoreBind]
	    -> TypeEnv
71
            -> IO CompiledByteCode
72
byteCodeGen dflags binds type_env
73
   = do showPass dflags "ByteCodeGen"
74 75 76
        let  local_tycons  = typeEnvTyCons  type_env
	     local_classes = typeEnvClasses type_env
	     tycs = local_tycons ++ map classTyCon local_classes
77

78 79
        let flatBinds = [ (bndr, freeVars rhs) 
			| (bndr, rhs) <- flattenBinds binds]
80

81
        (BcM_State final_ctr mallocd, proto_bcos)
82
           <- runBc (mapM schemeTopBind flatBinds)
83

sof's avatar
sof committed
84
        when (notNull mallocd)
85 86
             (panic "ByteCodeGen.byteCodeGen: missing final emitBc?")

87
        dumpIfSet_dyn dflags Opt_D_dump_BCOs
88
           "Proto-BCOs" (vcat (intersperse (char ' ') (map ppr proto_bcos)))
89

90
        assembleBCOs proto_bcos tycs
91
        
92 93
-- -----------------------------------------------------------------------------
-- Generating byte code for an expression
94

95 96 97 98
-- Returns: (the root BCO for this expression, 
--           a list of auxilary BCOs resulting from compiling closures)
coreExprToBCOs :: DynFlags
	       -> CoreExpr
99
               -> IO UnlinkedBCO
100 101
coreExprToBCOs dflags expr
 = do showPass dflags "ByteCodeGen"
102 103 104

      -- create a totally bogus name for the top-level BCO; this
      -- should be harmless, since it's never used for anything
105
      let invented_name  = mkSystemName (mkPseudoUniqueE 0) FSLIT("ExprTopLevel")
106 107
          invented_id    = mkLocalId invented_name (panic "invented_id's type")
	  
108
      (BcM_State final_ctr mallocd, proto_bco) 
109
         <- runBc (schemeTopBind (invented_id, freeVars expr))
110

sof's avatar
sof committed
111
      when (notNull mallocd)
112 113
           (panic "ByteCodeGen.coreExprToBCOs: missing final emitBc?")

114
      dumpIfSet_dyn dflags Opt_D_dump_BCOs "Proto-BCOs" (ppr proto_bco)
115

116
      assembleBCO proto_bco
117 118


119 120
-- -----------------------------------------------------------------------------
-- Compilation schema for the bytecode generator
121 122 123

type BCInstrList = OrdList BCInstr

124 125 126 127 128 129
type Sequel = Int	-- back off to this depth before ENTER

-- Maps Ids to the offset from the stack _base_ so we don't have
-- to mess with it after each push/pop.
type BCEnv = FiniteMap Id Int	-- To find vars on the stack

130 131 132 133 134 135
ppBCEnv :: BCEnv -> SDoc
ppBCEnv p
   = text "begin-env"
     $$ nest 4 (vcat (map pp_one (sortBy cmp_snd (fmToList p))))
     $$ text "end-env"
     where
136
        pp_one (var, offset) = int offset <> colon <+> ppr var <+> ppr (idCgRep var)
137
        cmp_snd x y = compare (snd x) (snd y)
138

139 140
-- Create a BCO and do a spot of peephole optimisation on the insns
-- at the same time.
141 142 143 144 145 146 147
mkProtoBCO
   :: name
   -> BCInstrList
   -> Either  [AnnAlt Id VarSet] (AnnExpr Id VarSet)
   -> Int
   -> Int
   -> [StgWord]
148
   -> Bool   	-- True <=> is a return point, rather than a function
149 150
   -> [Ptr ()]
   -> ProtoBCO name
151 152
mkProtoBCO nm instrs_ordlist origin arity bitmap_size bitmap
  is_ret mallocd_blocks
153 154 155 156 157 158 159 160 161
   = ProtoBCO {
	protoBCOName = nm,
	protoBCOInstrs = maybe_with_stack_check,
	protoBCOBitmap = bitmap,
	protoBCOBitmapSize = bitmap_size,
	protoBCOArity = arity,
	protoBCOExpr = origin,
	protoBCOPtrs = mallocd_blocks
      }
162
     where
163 164 165 166 167 168 169 170
        -- Overestimate the stack usage (in words) of this BCO,
        -- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit
        -- stack check.  (The interpreter always does a stack check
        -- for iNTERP_STACK_CHECK_THRESH words at the start of each
        -- BCO anyway, so we only need to add an explicit on in the
        -- (hopefully rare) cases when the (overestimated) stack use
        -- exceeds iNTERP_STACK_CHECK_THRESH.
        maybe_with_stack_check
171 172 173 174
	   | is_ret = peep_d
		-- don't do stack checks at return points;
		-- everything is aggregated up to the top BCO
		-- (which must be a function)
175 176 177 178
           | stack_overest >= 65535
           = pprPanic "mkProtoBCO: stack use won't fit in 16 bits" 
                      (int stack_overest)
           | stack_overest >= iNTERP_STACK_CHECK_THRESH
179
           = STKCHECK stack_overest : peep_d
180 181 182 183 184 185 186 187
           | otherwise
           = peep_d	-- the supposedly common case
             
        stack_overest = sum (map bciStackUse peep_d)

        -- Merge local pushes
        peep_d = peep (fromOL instrs_ordlist)

188 189 190
        peep (PUSH_L off1 : PUSH_L off2 : PUSH_L off3 : rest)
           = PUSH_LLL off1 (off2-1) (off3-2) : peep rest
        peep (PUSH_L off1 : PUSH_L off2 : rest)
191
           = PUSH_LL off1 (off2-1) : peep rest
192 193 194 195 196
        peep (i:rest)
           = i : peep rest
        peep []
           = []

197
argBits :: [CgRep] -> [Bool]
198 199
argBits [] = []
argBits (rep : args)
200 201
  | isFollowableArg rep = False : argBits args
  | otherwise = take (cgRepSizeW rep) (repeat True) ++ argBits args
202

203 204 205 206 207 208 209 210 211
-- -----------------------------------------------------------------------------
-- schemeTopBind

-- Compile code for the right-hand side of a top-level binding

schemeTopBind :: (Id, AnnExpr Id VarSet) -> BcM (ProtoBCO Name)


schemeTopBind (id, rhs)
212
  | Just data_con <- isDataConWorkId_maybe id,
213
    isNullaryDataCon data_con
214
  = 	-- Special case for the worker of a nullary data con.
215
	-- It'll look like this:	Nil = /\a -> Nil a
216
	-- If we feed it into schemeR, we'll get 
217
	--	Nil = Nil
218 219
	-- because mkConAppCode treats nullary constructor applications
	-- by just re-using the single top-level definition.  So
220
	-- for the worker itself, we must allocate it directly.
221
    emitBc (mkProtoBCO (getName id) (toOL [PACK data_con 0, ENTER])
222
                       (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-})
223 224 225 226

  | otherwise
  = schemeR [{- No free variables -}] (id, rhs)

227 228
-- -----------------------------------------------------------------------------
-- schemeR
229

230 231 232 233 234
-- Compile code for a right-hand side, to give a BCO that,
-- when executed with the free variables and arguments on top of the stack,
-- will return with a pointer to the result on top of the stack, after
-- removing the free variables and arguments.
--
235 236
-- Park the resulting BCO in the monad.  Also requires the
-- variable to which this value was bound, so as to give the
237 238 239 240 241 242 243 244
-- resulting BCO a name. 

schemeR :: [Id] 		-- Free vars of the RHS, ordered as they
				-- will appear in the thunk.  Empty for
				-- top-level things, which have no free vars.
	-> (Id, AnnExpr Id VarSet)
	-> BcM (ProtoBCO Name)
schemeR fvs (nm, rhs) 
245
{-
246 247 248 249 250 251 252 253
   | trace (showSDoc (
              (char ' '
               $$ (ppr.filter (not.isTyVar).varSetElems.fst) rhs
               $$ pprCoreExpr (deAnnotate rhs)
               $$ char ' '
              ))) False
   = undefined
   | otherwise
254 255
-}
   = schemeR_wrk fvs nm rhs (collect [] rhs)
256

257 258 259
collect xs (_, AnnNote note e) = collect xs e
collect xs (_, AnnLam x e)     = collect (if isTyVar x then xs else (x:xs)) e
collect xs (_, not_lambda)     = (reverse xs, not_lambda)
260

261
schemeR_wrk fvs nm original_body (args, body)
262 263 264
   = let 
	 all_args  = reverse args ++ fvs
	 arity     = length all_args
265
	 -- all_args are the args in reverse order.  We're compiling a function
266 267 268 269
	 -- \fv1..fvn x1..xn -> e 
	 -- i.e. the fvs come first

         szsw_args = map idSizeW all_args
270
         szw_args  = sum szsw_args
271
         p_init    = listToFM (zip all_args (mkStackOffsets 0 szsw_args))
272 273

	 -- make the arg bitmap
274
	 bits = argBits (reverse (map idCgRep all_args))
275 276
	 bitmap_size = length bits
	 bitmap = mkBitmap bits
277 278
     in
     schemeE szw_args 0 p_init body 		`thenBc` \ body_code ->
279
     emitBc (mkProtoBCO (getName nm) body_code (Right original_body)
280
		arity bitmap_size bitmap False{-not alts-})
281

282

283 284 285 286 287 288 289 290 291 292 293 294
fvsToEnv :: BCEnv -> VarSet -> [Id]
-- Takes the free variables of a right-hand side, and
-- delivers an ordered list of the local variables that will
-- be captured in the thunk for the RHS
-- The BCEnv argument tells which variables are in the local
-- environment: these are the ones that should be captured
--
-- The code that constructs the thunk, and the code that executes
-- it, have to agree about this layout
fvsToEnv p fvs = [v | v <- varSetElems fvs, 
		      isId v,		-- Could be a type variable
		      v `elemFM` p]
295

296 297
-- -----------------------------------------------------------------------------
-- schemeE
298 299 300

-- Compile code to apply the given expression to the remaining args
-- on the stack, returning a HNF.
301
schemeE :: Int -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
302 303

-- Delegate tail-calls to schemeT.
304 305
schemeE d s p e@(AnnApp f a) 
   = schemeT d s p e
306

307
schemeE d s p e@(AnnVar v)
308 309
   | not (isUnLiftedType v_type)
   =  -- Lifted-type thing; push it in the normal way
310
     schemeT d s p e
311

312
   | otherwise
313 314
   = -- Returning an unlifted value.  
     -- Heave it on the stack, SLIDE, and RETURN.
315
     pushAtom d p (AnnVar v)	`thenBc` \ (push, szw) ->
316 317
     returnBc (push 			-- value onto stack
               `appOL`  mkSLIDE szw (d-s) 	-- clear to sequel
318
               `snocOL` RETURN_UBX v_rep)	-- go
319
   where
320
      v_type = idType v
321
      v_rep = typeCgRep v_type
322

323
schemeE d s p (AnnLit literal)
324
   = pushAtom d p (AnnLit literal)	`thenBc` \ (push, szw) ->
325
     let l_rep = typeCgRep (literalType literal)
326
     in  returnBc (push 			-- value onto stack
327
                   `appOL`  mkSLIDE szw (d-s) 	-- clear to sequel
328
                   `snocOL` RETURN_UBX l_rep)	-- go
329

330

331 332
schemeE d s p (AnnLet (AnnNonRec x (_,rhs)) (_,body))
   | (AnnVar v, args_r_to_l) <- splitApp rhs,
333
     Just data_con <- isDataConWorkId_maybe v,
334
     dataConRepArity data_con == length args_r_to_l
335
   = 	-- Special case for a non-recursive let whose RHS is a 
336
	-- saturatred constructor application.
337 338 339 340
	-- Just allocate the constructor and carry on
     mkConAppCode d s p data_con args_r_to_l	`thenBc` \ alloc_code ->
     schemeE (d+1) s (addToFM p x d) body	`thenBc` \ body_code ->
     returnBc (alloc_code `appOL` body_code)
341 342 343

-- General case for let.  Generates correct, if inefficient, code in
-- all situations.
344
schemeE d s p (AnnLet binds (_,body))
345 346
   = let (xs,rhss) = case binds of AnnNonRec x rhs  -> ([x],[rhs])
                                   AnnRec xs_n_rhss -> unzip xs_n_rhss
347
         n_binds = length xs
348

349
         fvss  = map (fvsToEnv p' . fst) rhss
350

sof's avatar
sof committed
351 352
         -- Sizes of free vars
         sizes = map (\rhs_fvs -> sum (map idSizeW rhs_fvs)) fvss
353 354 355

	 -- the arity of each rhs
	 arities = map (length . fst . collect []) rhss
356 357 358 359 360

         -- This p', d' defn is safe because all the items being pushed
         -- are ptrs, so all have size 1.  d' and p' reflect the stack
         -- after the closures have been allocated in the heap (but not
         -- filled in), and pointers to them parked on the stack.
361 362
         p'    = addListToFM p (zipE xs (mkStackOffsets d (nOfThem n_binds 1)))
         d'    = d + n_binds
363 364 365
         zipE  = zipEqual "schemeE"

         -- ToDo: don't build thunks for things with no free variables
366 367
         build_thunk dd [] size bco off
            = returnBc (PUSH_BCO bco
sof's avatar
sof committed
368
                        `consOL` unitOL (MKAP (off+size) size))
369 370 371
         build_thunk dd (fv:fvs) size bco off = do
              (push_code, pushed_szw) <- pushAtom dd p' (AnnVar fv) 
              more_push_code <- build_thunk (dd+pushed_szw) fvs size bco off
372 373
              returnBc (push_code `appOL` more_push_code)

374 375 376
         alloc_code = toOL (zipWith mkAlloc sizes arities)
	   where mkAlloc sz 0     = ALLOC_AP sz
		 mkAlloc sz arity = ALLOC_PAP arity sz
377

378
	 compile_bind d' fvs x rhs size off = do
379
		bco <- schemeR fvs (x,rhs)
380
		build_thunk d' fvs size bco off
381

382 383 384 385 386 387
	 compile_binds = 
	    [ compile_bind d' fvs x rhs size n
	    | (fvs, x, rhs, size, n) <- 
		zip5 fvss xs rhss sizes [n_binds, n_binds-1 .. 1]
	    ]
     in do
388
     body_code <- schemeE d' s p' body
389 390
     thunk_codes <- sequence compile_binds
     returnBc (alloc_code `appOL` concatOL thunk_codes `appOL` body_code)
391 392


393

394
schemeE d s p (AnnCase scrut bndr [(DataAlt dc, [bind1, bind2], rhs)])
395
   | isUnboxedTupleCon dc, VoidArg <- typeCgRep (idType bind1)
396
	-- Convert 
397
	--	case .... of x { (# VoidArg'd-thing, a #) -> ... }
398 399 400 401 402 403 404
	-- to
	-- 	case .... of a { DEFAULT -> ... }
	-- becuse the return convention for both are identical.
	--
	-- Note that it does not matter losing the void-rep thing from the
	-- envt (it won't be bound now) because we never look such things up.

405
   = --trace "automagic mashing of case alts (# VoidArg, a #)" $
406 407
     doCase d s p scrut bind2 [(DEFAULT, [], rhs)] True{-unboxed tuple-}

408 409
   | isUnboxedTupleCon dc, VoidArg <- typeCgRep (idType bind2)
   = --trace "automagic mashing of case alts (# a, VoidArg #)" $
410
     doCase d s p scrut bind1 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
411

412
schemeE d s p (AnnCase scrut bndr [(DataAlt dc, [bind1], rhs)])
413
   | isUnboxedTupleCon dc
414 415 416 417
	-- Similarly, convert
	--	case .... of x { (# a #) -> ... }
	-- to
	--	case .... of a { DEFAULT -> ... }
418 419
   = --trace "automagic mashing of case alts (# a #)"  $
     doCase d s p scrut bind1 [(DEFAULT, [], rhs)] True{-unboxed tuple-}
420

421
schemeE d s p (AnnCase scrut bndr alts)
422
   = doCase d s p scrut bndr alts False{-not an unboxed tuple-}
423

424
schemeE d s p (AnnNote note (_, body))
425 426 427 428
   = schemeE d s p body

schemeE d s p other
   = pprPanic "ByteCodeGen.schemeE: unhandled case" 
429
               (pprCoreExpr (deAnnotate' other))
430 431


432 433 434 435 436
-- Compile code to do a tail call.  Specifically, push the fn,
-- slide the on-stack app back down to the sequel depth,
-- and enter.  Four cases:
--
-- 0.  (Nasty hack).
437
--     An application "GHC.Prim.tagToEnum# <type> unboxed-int".
438 439
--     The int will be on the stack.  Generate a code sequence
--     to convert it to the relevant constructor, SLIDE and ENTER.
440
--
441
-- 1.  The fn denotes a ccall.  Defer to generateCCall.
442
--
443
-- 2.  (Another nasty hack).  Spot (# a::VoidArg, b #) and treat
444
--     it simply as  b  -- since the representations are identical
445
--     (the VoidArg takes up zero stack space).  Also, spot
446
--     (# b #) and treat it as  b.
447
--
448
-- 3.  Application of a constructor, by defn saturated.
449 450 451
--     Split the args into ptrs and non-ptrs, and push the nonptrs, 
--     then the ptrs, and then do PACK and RETURN.
--
452
-- 4.  Otherwise, it must be a function call.  Push the args
453
--     right to left, SLIDE and ENTER.
454 455

schemeT :: Int 		-- Stack depth
456 457
        -> Sequel 	-- Sequel depth
        -> BCEnv 	-- stack env
458
        -> AnnExpr' Id VarSet 
459
        -> BcM BCInstrList
460

461
schemeT d s p app
462

463 464 465
--   | trace ("schemeT: env in = \n" ++ showSDocDebug (ppBCEnv p)) False
--   = panic "schemeT ?!?!"

466
--   | trace ("\nschemeT\n" ++ showSDoc (pprCoreExpr (deAnnotate' app)) ++ "\n") False
467 468
--   = error "?!?!" 

469
   -- Case 0
470
   | Just (arg, constr_names) <- maybe_is_tagToEnum_call
471
   = pushAtom d p arg	 		`thenBc` \ (push, arg_words) ->
472 473 474 475 476
     implement_tagToId constr_names	`thenBc` \ tagToId_sequence ->
     returnBc (push `appOL`  tagToId_sequence            
                    `appOL`  mkSLIDE 1 (d+arg_words-s)
                    `snocOL` ENTER)

477
   -- Case 1
478 479 480
   | Just (CCall ccall_spec) <- isFCallId_maybe fn
   = generateCCall d s p ccall_spec fn args_r_to_l

481
   -- Case 2: Constructor application
482
   | Just con <- maybe_saturated_dcon,
483 484
     isUnboxedTupleCon con
   = case args_r_to_l of
485
	[arg1,arg2] | isVoidArgAtom arg1 -> 
486
		  unboxedTupleReturn d s p arg2
487
	[arg1,arg2] | isVoidArgAtom arg2 -> 
488 489 490 491
		  unboxedTupleReturn d s p arg1
	_other -> unboxedTupleException

   -- Case 3: Ordinary data constructor
492
   | Just con <- maybe_saturated_dcon
493 494 495 496 497 498
   = mkConAppCode d s p con args_r_to_l	`thenBc` \ alloc_con ->
     returnBc (alloc_con	 `appOL` 
               mkSLIDE 1 (d - s) `snocOL`
               ENTER)

   -- Case 4: Tail call of function 
499
   | otherwise
500
   = doTailCall d s p fn args_r_to_l
501

502 503 504 505
   where
      -- Detect and extract relevant info for the tagToEnum kludge.
      maybe_is_tagToEnum_call
         = let extract_constr_Names ty
506 507 508 509 510 511 512 513
		 | Just (tyc, []) <- splitTyConApp_maybe (repType ty),
		   isDataTyCon tyc
		   = map (getName . dataConWorkId) (tyConDataCons tyc)
		   -- NOTE: use the worker name, not the source name of
		   -- the DataCon.  See DataCon.lhs for details.
		 | otherwise
		   = panic "maybe_is_tagToEnum_call.extract_constr_Ids"
           in
514
           case app of
515
              (AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType t)) arg)
516
                 -> case isPrimOpId_maybe v of
517 518
                       Just TagToEnumOp -> Just (snd arg, extract_constr_Names t)
		       other		-> Nothing
519 520
              other -> Nothing

521 522 523 524
	-- Extract the args (R->L) and fn
	-- The function will necessarily be a variable, 
	-- because we are compiling a tail call
      (AnnVar fn, args_r_to_l) = splitApp app
525

526
      -- Only consider this to be a constructor application iff it is
527
      -- saturated.  Otherwise, we'll call the constructor wrapper.
528 529
      n_args = length args_r_to_l
      maybe_saturated_dcon  
530
	= case isDataConWorkId_maybe fn of
531 532
		Just con | dataConRepArity con == n_args -> Just con
		_ -> Nothing
533

534
-- -----------------------------------------------------------------------------
535 536 537 538 539 540 541 542 543 544
-- Generate code to build a constructor application, 
-- leaving it on top of the stack

mkConAppCode :: Int -> Sequel -> BCEnv
	     -> DataCon 		-- The data constructor
	     -> [AnnExpr' Id VarSet] 	-- Args, in *reverse* order
	     -> BcM BCInstrList

mkConAppCode orig_d s p con []	-- Nullary constructor
  = ASSERT( isNullaryDataCon con )
545
    returnBc (unitOL (PUSH_G (getName (dataConWorkId con))))
546 547 548 549 550 551
	-- Instead of doing a PACK, which would allocate a fresh
	-- copy of this constructor, use the single shared version.

mkConAppCode orig_d s p con args_r_to_l 
  = ASSERT( dataConRepArity con == length args_r_to_l )
    do_pushery orig_d (non_ptr_args ++ ptr_args)
552 553 554
 where
	-- The args are already in reverse order, which is the way PACK
	-- expects them to be.  We must push the non-ptrs after the ptrs.
555
      (ptr_args, non_ptr_args) = partition isPtrAtom args_r_to_l
556 557

      do_pushery d (arg:args)
558
         = pushAtom d p arg			`thenBc` \ (push, arg_words) ->
559 560
           do_pushery (d+arg_words) args	`thenBc` \ more_push_code ->
           returnBc (push `appOL` more_push_code)
561
      do_pushery d []
562 563 564 565
         = returnBc (unitOL (PACK con n_arg_words))
	 where
	   n_arg_words = d - orig_d

566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

-- -----------------------------------------------------------------------------
-- Returning an unboxed tuple with one non-void component (the only
-- case we can handle).
--
-- Remember, we don't want to *evaluate* the component that is being
-- returned, even if it is a pointed type.  We always just return.

unboxedTupleReturn
	:: Int -> Sequel -> BCEnv
	-> AnnExpr' Id VarSet -> BcM BCInstrList
unboxedTupleReturn d s p arg = do
  (push, sz) <- pushAtom d p arg
  returnBc (push `appOL`
	    mkSLIDE sz (d-s) `snocOL`
	    RETURN_UBX (atomRep arg))

-- -----------------------------------------------------------------------------
-- Generate code for a tail-call

doTailCall
	:: Int -> Sequel -> BCEnv
	-> Id -> [AnnExpr' Id VarSet]
	-> BcM BCInstrList
doTailCall init_d s p fn args
591
  = do_pushes init_d args (map atomRep args)
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
  where
  do_pushes d [] reps = do
	ASSERTM( null reps )
        (push_fn, sz) <- pushAtom d p (AnnVar fn)
	ASSERTM( sz == 1 )
	returnBc (push_fn `appOL` (
		  mkSLIDE ((d-init_d) + 1) (init_d - s) `appOL`
		  unitOL ENTER))
  do_pushes d args reps = do
      let (push_apply, n, rest_of_reps) = findPushSeq reps
	  (these_args, rest_of_args) = splitAt n args
      (next_d, push_code) <- push_seq d these_args
      instrs <- do_pushes (next_d + 1) rest_of_args rest_of_reps 
		--                ^^^ for the PUSH_APPLY_ instruction
      returnBc (push_code `appOL` (push_apply `consOL` instrs))

  push_seq d [] = return (d, nilOL)
  push_seq d (arg:args) = do
    (push_code, sz) <- pushAtom d p arg 
    (final_d, more_push_code) <- push_seq (d+sz) args
    return (final_d, push_code `appOL` more_push_code)

-- v. similar to CgStackery.findMatch, ToDo: merge
615
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: rest)
616
  = (PUSH_APPLY_PPPPPPP, 7, rest)
617
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: rest)
618
  = (PUSH_APPLY_PPPPPP, 6, rest)
619
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: rest)
620
  = (PUSH_APPLY_PPPPP, 5, rest)
621
findPushSeq (PtrArg: PtrArg: PtrArg: PtrArg: rest)
622
  = (PUSH_APPLY_PPPP, 4, rest)
623
findPushSeq (PtrArg: PtrArg: PtrArg: rest)
624
  = (PUSH_APPLY_PPP, 3, rest)
625
findPushSeq (PtrArg: PtrArg: rest)
626
  = (PUSH_APPLY_PP, 2, rest)
627
findPushSeq (PtrArg: rest)
628
  = (PUSH_APPLY_P, 1, rest)
629
findPushSeq (VoidArg: rest)
630
  = (PUSH_APPLY_V, 1, rest)
631
findPushSeq (NonPtrArg: rest)
632
  = (PUSH_APPLY_N, 1, rest)
633
findPushSeq (FloatArg: rest)
634
  = (PUSH_APPLY_F, 1, rest)
635
findPushSeq (DoubleArg: rest)
636
  = (PUSH_APPLY_D, 1, rest)
637
findPushSeq (LongArg: rest)
638 639 640 641 642 643 644 645 646 647 648
  = (PUSH_APPLY_L, 1, rest)
findPushSeq _
  = panic "ByteCodeGen.findPushSeq"

-- -----------------------------------------------------------------------------
-- Case expressions

doCase  :: Int -> Sequel -> BCEnv
	-> AnnExpr Id VarSet -> Id -> [AnnAlt Id VarSet]
	-> Bool  -- True <=> is an unboxed tuple case, don't enter the result
	-> BcM BCInstrList
649 650
doCase d s p (_,scrut)
 bndr alts is_unboxed_tuple
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
  = let
        -- Top of stack is the return itbl, as usual.
        -- underneath it is the pointer to the alt_code BCO.
        -- When an alt is entered, it assumes the returned value is
        -- on top of the itbl.
        ret_frame_sizeW = 2

	-- An unlifted value gets an extra info table pushed on top
	-- when it is returned.
	unlifted_itbl_sizeW | isAlgCase = 0
	  		    | otherwise = 1

	-- depth of stack after the return value has been pushed
	d_bndr = d + ret_frame_sizeW + idSizeW bndr

	-- depth of stack after the extra info table for an unboxed return
	-- has been pushed, if any.  This is the stack depth at the
	-- continuation.
        d_alts = d_bndr + unlifted_itbl_sizeW

        -- Env in which to compile the alts, not including
        -- any vars bound by the alts themselves
        p_alts = addToFM p bndr (d_bndr - 1)

	bndr_ty = idType bndr
        isAlgCase = not (isUnLiftedType bndr_ty) && not is_unboxed_tuple

        -- given an alt, return a discr and code for it.
679
	codeALt alt@(DEFAULT, _, (_,rhs))
680 681
	   = schemeE d_alts s p_alts rhs	`thenBc` \ rhs_code ->
	     returnBc (NoDiscr, rhs_code)
682
        codeAlt alt@(discr, bndrs, (_,rhs))
683 684 685 686 687 688 689
	   -- primitive or nullary constructor alt: no need to UNPACK
	   | null real_bndrs = do
		rhs_code <- schemeE d_alts s p_alts rhs
                returnBc (my_discr alt, rhs_code)
	   -- algebraic alt with some binders
           | ASSERT(isAlgCase) otherwise =
             let
690
		 (ptrs,nptrs) = partition (isFollowableArg.idCgRep) real_bndrs
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
		 ptr_sizes    = map idSizeW ptrs
		 nptrs_sizes  = map idSizeW nptrs
		 bind_sizes   = ptr_sizes ++ nptrs_sizes
		 size         = sum ptr_sizes + sum nptrs_sizes
		 -- the UNPACK instruction unpacks in reverse order...
		 p' = addListToFM p_alts 
			(zip (reverse (ptrs ++ nptrs))
			  (mkStackOffsets d_alts (reverse bind_sizes)))
	     in do
	     rhs_code <- schemeE (d_alts+size) s p' rhs
             return (my_discr alt, unitOL (UNPACK size) `appOL` rhs_code)
	   where
	     real_bndrs = filter (not.isTyVar) bndrs


        my_discr (DEFAULT, binds, rhs) = NoDiscr {-shouldn't really happen-}
        my_discr (DataAlt dc, binds, rhs) 
           | isUnboxedTupleCon dc
           = unboxedTupleException
           | otherwise
           = DiscrP (dataConTag dc - fIRST_TAG)
        my_discr (LitAlt l, binds, rhs)
           = case l of MachInt i     -> DiscrI (fromInteger i)
                       MachFloat r   -> DiscrF (fromRational r)
                       MachDouble r  -> DiscrD (fromRational r)
716
                       MachChar i    -> DiscrI (ord i)
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
                       _ -> pprPanic "schemeE(AnnCase).my_discr" (ppr l)

        maybe_ncons 
           | not isAlgCase = Nothing
           | otherwise 
           = case [dc | (DataAlt dc, _, _) <- alts] of
                []     -> Nothing
                (dc:_) -> Just (tyConFamilySize (dataConTyCon dc))

	-- the bitmap is relative to stack depth d, i.e. before the
	-- BCO, info table and return value are pushed on.
	-- This bit of code is v. similar to buildLivenessMask in CgBindery,
	-- except that here we build the bitmap from the known bindings of
	-- things that are pointers, whereas in CgBindery the code builds the
	-- bitmap from the free slots and unboxed bindings.
	-- (ToDo: merge?)
733
	bitmap = intsToReverseBitmap d{-size-} (sortLt (<) rel_slots)
734 735 736 737
	  where
	  binds = fmToList p
	  rel_slots = concat (map spread binds)
	  spread (id, offset)
738
		| isFollowableArg (idCgRep id) = [ rel_offset ]
739 740 741 742 743 744 745 746 747
		| otherwise = []
		where rel_offset = d - offset - 1

     in do
     alt_stuff <- mapM codeAlt alts
     alt_final <- mkMultiBranch maybe_ncons alt_stuff
     let 
         alt_bco_name = getName bndr
         alt_bco = mkProtoBCO alt_bco_name alt_final (Left alts)
748
			0{-no arity-} d{-bitmap size-} bitmap True{-is alts-}
749 750 751 752 753 754 755
     -- in
--     trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
--	     "\n      bitmap = " ++ show bitmap) $ do
     scrut_code <- schemeE (d + ret_frame_sizeW) (d + ret_frame_sizeW) p scrut
     alt_bco' <- emitBc alt_bco
     let push_alts
	    | isAlgCase = PUSH_ALTS alt_bco'
756
	    | otherwise = PUSH_ALTS_UNLIFTED alt_bco' (typeCgRep bndr_ty)
757 758 759 760 761 762 763
     returnBc (push_alts `consOL` scrut_code)


-- -----------------------------------------------------------------------------
-- Deal with a CCall.

-- Taggedly push the args onto the stack R->L,
764 765
-- deferencing ForeignObj#s and adjusting addrs to point to
-- payloads in Ptr/Byte arrays.  Then, generate the marshalling
766 767 768
-- (machine) code for the ccall, and create bytecodes to call that and
-- then return in the right way.  

769
generateCCall :: Int -> Sequel 		-- stack and sequel depths
770
              -> BCEnv
771
              -> CCallSpec		-- where to call
772
              -> Id 			-- of target, for type info
773
              -> [AnnExpr' Id VarSet]	-- args (atoms)
774
              -> BcM BCInstrList
775

776 777 778
generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
   = let 
         -- useful constants
779
         addr_sizeW = cgRepSizeW NonPtrArg
780 781 782 783

         -- Get the args on the stack, with tags and suitably
         -- dereferenced for the CCall.  For each arg, return the
         -- depth to the first word of the bits for that arg, and the
784
         -- CgRep of what was actually pushed.
785

786
         pargs d [] = returnBc []
787
         pargs d (a:az) 
788 789 790
            = let arg_ty = repType (exprType (deAnnotate' a))

              in case splitTyConApp_maybe arg_ty of
791 792
                    -- Don't push the FO; instead push the Addr# it
                    -- contains.
793 794
		    Just (t, _)
		     | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
795
                       -> pargs (d + addr_sizeW) az	`thenBc` \ rest ->
796 797
                          parg_ArrayishRep arrPtrsHdrSize d p a
							`thenBc` \ code ->
798
                          returnBc ((code,NonPtrArg):rest)
799

800
		     | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
801
                       -> pargs (d + addr_sizeW) az	`thenBc` \ rest ->
802 803
                          parg_ArrayishRep arrWordsHdrSize d p a
							`thenBc` \ code ->
804
                          returnBc ((code,NonPtrArg):rest)
805

806 807
                    -- Default case: push taggedly, but otherwise intact.
                    other
808
                       -> pushAtom d p a		`thenBc` \ (code_a, sz_a) ->
809
                          pargs (d+sz_a) az		`thenBc` \ rest ->
810
                          returnBc ((code_a, atomRep a) : rest)
811 812 813 814

         -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
         -- the stack but then advance it over the headers, so as to
         -- point to the payload.
815
         parg_ArrayishRep hdrSize d p a
816
            = pushAtom d p a `thenBc` \ (push_fo, _) ->
817
              -- The ptr points at the header.  Advance it over the
818
              -- header and then pretend this is an Addr#.
819
              returnBc (push_fo `snocOL` SWIZZLE 0 hdrSize)
820

821
     in
822
         pargs d0 args_r_to_l			`thenBc` \ code_n_reps ->
823 824
     let
         (pushs_arg, a_reps_pushed_r_to_l) = unzip code_n_reps
825 826

         push_args    = concatOL pushs_arg
827
         d_after_args = d0 + sum (map cgRepSizeW a_reps_pushed_r_to_l)
828
         a_reps_pushed_RAW
829
            | null a_reps_pushed_r_to_l || head a_reps_pushed_r_to_l /= VoidArg
830 831 832 833 834 835 836 837 838
            = panic "ByteCodeGen.generateCCall: missing or invalid World token?"
            | otherwise
            = reverse (tail a_reps_pushed_r_to_l)

         -- Now: a_reps_pushed_RAW are the reps which are actually on the stack.
         -- push_args is the code to do that.
         -- d_after_args is the stack depth once the args are on.

         -- Get the result rep.
839
         (returns_void, r_rep)
840
            = case maybe_getCCallReturnRep (idType fn) of
841
                 Nothing -> (True,  VoidArg)
842 843 844 845
                 Just rr -> (False, rr) 
         {-
         Because the Haskell stack grows down, the a_reps refer to 
         lowest to highest addresses in that order.  The args for the call
846
         are on the stack.  Now push an unboxed Addr# indicating
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
         the C function to call.  Then push a dummy placeholder for the 
         result.  Finally, emit a CCALL insn with an offset pointing to the 
         Addr# just pushed, and a literal field holding the mallocville
         address of the piece of marshalling code we generate.
         So, just prior to the CCALL insn, the stack looks like this 
         (growing down, as usual):
                 
            <arg_n>
            ...
            <arg_1>
            Addr# address_of_C_fn
            <placeholder-for-result#> (must be an unboxed type)

         The interpreter then calls the marshall code mentioned
         in the CCALL insn, passing it (& <placeholder-for-result#>), 
         that is, the addr of the topmost word in the stack.
         When this returns, the placeholder will have been
         filled in.  The placeholder is slid down to the sequel
         depth, and we RETURN.

         This arrangement makes it simple to do f-i-dynamic since the Addr#
868
         value is the first arg anyway.
869 870 871 872 873 874 875 876 877 878

         The marshalling code is generated specifically for this
         call site, and so knows exactly the (Haskell) stack
         offsets of the args, fn address and placeholder.  It
         copies the args to the C stack, calls the stacked addr,
         and parks the result back in the placeholder.  The interpreter
         calls it as a normal C call, assuming it has a signature
            void marshall_code ( StgWord* ptr_to_top_of_stack )
         -}
         -- resolve static address
879
         get_target_info
880 881
            = case target of
                 DynamicTarget
882
                    -> returnBc (False, panic "ByteCodeGen.generateCCall(dyn)")
883
                 StaticTarget target
884 885
                    -> ioToBc (lookupStaticPtr target) `thenBc` \res ->
                       returnBc (True, res)
886 887 888
     in
         get_target_info	`thenBc` \ (is_static, static_target_addr) ->
     let
889 890

         -- Get the arg reps, zapping the leading Addr# in the dynamic case
891 892 893
         a_reps -- | trace (showSDoc (ppr a_reps_pushed_RAW)) False = error "???"
                | is_static = a_reps_pushed_RAW
                | otherwise = if null a_reps_pushed_RAW 
894
                              then panic "ByteCodeGen.generateCCall: dyn with no args"
895
                              else tail a_reps_pushed_RAW
896 897 898 899

         -- push the Addr#
         (push_Addr, d_after_Addr)
            | is_static
900 901
            = (toOL [PUSH_UBX (Right static_target_addr) addr_sizeW],
               d_after_args + addr_sizeW)
902
            | otherwise	-- is already on the stack
903
            = (nilOL, d_after_args)
904 905

         -- Push the return placeholder.  For a call returning nothing,
906 907
         -- this is a VoidArg (tag).
         r_sizeW   = cgRepSizeW r_rep
908
         d_after_r = d_after_Addr + r_sizeW
909 910 911
         r_lit     = mkDummyLiteral r_rep
         push_r    = (if   returns_void 
                      then nilOL 
912
                      else unitOL (PUSH_UBX (Left r_lit) r_sizeW))
913 914 915

         -- generate the marshalling code we're going to call
         r_offW       = 0 
916 917
         addr_offW    = r_sizeW
         arg1_offW    = r_sizeW + addr_sizeW
918
         args_offW    = map (arg1_offW +) 
919
                            (init (scanl (+) 0 (map cgRepSizeW a_reps)))
920 921 922 923
     in
         ioToBc (mkMarshalCode cconv
                    (r_offW, r_rep) addr_offW
                    (zip args_offW a_reps))	`thenBc` \ addr_of_marshaller ->
924
         recordMallocBc addr_of_marshaller	`thenBc_`
925
     let
926
	 -- Offset of the next stack frame down the stack.  The CCALL
927 928 929
 	 -- instruction needs to describe the chunk of stack containing
	 -- the ccall args to the GC, so it needs to know how large it
	 -- is.  See comment in Interpreter.c with the CCALL instruction.
930 931
	 stk_offset   = d_after_r - s

932
         -- do the call
933
         do_call      = unitOL (CCALL stk_offset (castPtr addr_of_marshaller))
934
         -- slide and return
935 936
         wrapup       = mkSLIDE r_sizeW (d_after_r - r_sizeW - s)
                        `snocOL` RETURN_UBX r_rep
937
     in
938
         --trace (show (arg1_offW, args_offW  ,  (map cgRepSizeW a_reps) )) $
939 940
         returnBc (
         push_args `appOL`
941
         push_Addr `appOL` push_r `appOL` do_call `appOL` wrapup
942
         )
943 944 945 946


-- Make a dummy literal, to be used as a placeholder for FFI return
-- values on the stack.
947
mkDummyLiteral :: CgRep -> Literal
948 949
mkDummyLiteral pr
   = case pr of
950 951 952
        NonPtrArg -> MachWord 0
        DoubleArg -> MachDouble 0
        FloatArg  -> MachFloat 0
953
        _         -> moan64 "mkDummyLiteral" (ppr pr)
954 955 956


-- Convert (eg) 
957 958
--     GHC.Prim.Char# -> GHC.Prim.State# GHC.Prim.RealWorld
--                   -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #)
959
--
960
-- to  Just IntRep
961
-- and check that an unboxed pair is returned wherein the first arg is VoidArg'd.
962 963 964
--
-- Alternatively, for call-targets returning nothing, convert
--
965 966
--     GHC.Prim.Char# -> GHC.Prim.State# GHC.Prim.RealWorld
--                   -> (# GHC.Prim.State# GHC.Prim.RealWorld #)
967
--
968
-- to  Nothing
969

970
maybe_getCCallReturnRep :: Type -> Maybe CgRep
971
maybe_getCCallReturnRep fn_ty
972
   = let (a_tys, r_ty) = splitFunTys (dropForAlls fn_ty)
973
         maybe_r_rep_to_go  
sof's avatar
sof committed
974
            = if isSingleton r_reps then Nothing else Just (r_reps !! 1)
975 976
         (r_tycon, r_reps) 
            = case splitTyConApp_maybe (repType r_ty) of
977
                      (Just (tyc, tys)) -> (tyc, map typeCgRep tys)
978
                      Nothing -> blargh
979 980
         ok = ( ( r_reps `lengthIs` 2 && VoidArg == head r_reps)
                || r_reps == [VoidArg] )
981 982 983
              && isUnboxedTupleTyCon r_tycon
              && case maybe_r_rep_to_go of
                    Nothing    -> True