Costs.lhs 20.6 KB
Newer Older
1
%
2
% (c) The GRASP/AQUA Project, Glasgow University, 1994-1996
3 4 5 6 7 8 9 10 11 12
%     Hans Wolfgang Loidl
%
% ---------------------------------------------------------------------------

\section[Costs]{Evaluating the costs of computing some abstract C code}

This module   provides all necessary  functions for   computing for a given
abstract~C Program the costs of executing that program. This is done by the
exported function:

13
\begin{quote}
14 15 16 17 18 19 20 21 22 23 24 25 26 27
 {\verb type CostRes = (Int, Int, Int, Int, Int)}
 {\verb costs :: AbstractC -> CostRes }
\end{quote}

The meaning of the result tuple is:
\begin{itemize}
 \item The first component ({\tt i}) counts the number of integer,
   arithmetic and bit-manipulating instructions.
 \item The second component ({\tt b}) counts the number of branches (direct
   branches as well as indirect ones).
 \item The third component ({\tt l}) counts the number of load instructions.
 \item The fourth component ({\tt s}) counts the number of store
   instructions.
 \item The fifth component ({\tt f}) counts the number of floating point
28
   instructions.
29
\end{itemize}
30

31 32 33 34 35 36 37 38 39 40 41
This function is needed in GrAnSim for parallelism.

These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):

\begin{pseudocode}

#define LOAD_COSTS		2
#define STORE_COSTS		2
#define INT_ARITHM_COSTS	1
#define GMP_ARITHM_COSTS	3 {- any clue for GMP costs ? -}
#define FLOAT_ARITHM_COSTS	3 {- any clue for float costs ? -}
42
#define BRANCH_COSTS		2
43 44 45 46

\end{pseudocode}

\begin{code}
47
#define ACCUM_COSTS(i,b,l,s,f)	(i+b+l+s+f)
48

49
#define NUM_REGS		10 {- PprAbsCSyn.lhs -}	      {- runtime/c-as-asm/CallWrap_C.lc -}
50 51
#define RESTORE_COSTS		(Cost (0, 0, NUM_REGS, 0, 0)  :: CostRes)
#define SAVE_COSTS		(Cost (0, 0, 0, NUM_REGS, 0)  :: CostRes)
52
#define CCALL_COSTS_GUESS	(Cost (50, 0, 0, 0, 0)	      :: CostRes)
53 54

module Costs( costs,
55
	      addrModeCosts, CostRes(Cost), nullCosts, Side(..)
56 57
    ) where

58
#include "HsVersions.h"
59

60
import AbsCSyn
61
import PrimOp		( primOpNeedsWrapper, PrimOp(..) )
62
import GlaExts		( trace )
63 64 65 66 67 68 69

-- --------------------------------------------------------------------------
data CostRes = Cost (Int, Int, Int, Int, Int)
	       deriving (Text)

nullCosts    = Cost (0, 0, 0, 0, 0) :: CostRes
initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
70
errorCosts   = Cost (-1, -1, -1, -1, -1)  -- just for debugging
71 72 73 74 75

oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes

instance Eq CostRes where
 (==) t1 t2 = i && b && l && s && f
76
	     where (i,b,l,s,f) = binOp' (==) t1 t2
77 78 79 80 81

instance Num CostRes where
 (+) = binOp (+)
 (-) = binOp (-)
 (*) = binOp (*)
82 83 84
 negate	 = mapOp negate
 abs	 = mapOp abs
 signum	 = mapOp signum
85 86 87 88 89 90 91 92

mapOp :: (Int -> Int) -> CostRes -> CostRes
mapOp g ( Cost (i, b, l, s, f) )  = Cost (g i, g b, g l, g s, g f)

foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
foldrOp o x  ( Cost (i1, b1, l1, s1, f1) )   =
	i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))

93
binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes
94
binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
95
	( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )
96

97
binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a)
98
binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
99
	 (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2)
100 101 102

-- --------------------------------------------------------------------------

103
data Side = Lhs | Rhs
104 105 106 107 108 109
	    deriving (Eq)

-- --------------------------------------------------------------------------

costs :: AbstractC -> CostRes

110
costs absC =
111
  case absC of
112
   AbsCNop			->  nullCosts
113

114
   AbsCStmts absC1 absC2	-> costs absC1 + costs absC2
115

116
   CAssign (CReg _) (CReg _)	-> Cost (1,0,0,0,0)   -- typ.: mov %reg1,%reg2
117

118
   CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)
119

120
   CAssign (CReg _) (CAddr _)	-> Cost (1,0,0,0,0)  -- typ.: add %reg1,<adr>,%reg2
121

122 123
   CAssign target_m source_m	-> addrModeCosts target_m Lhs +
				   addrModeCosts source_m Rhs
124

125
   CJump (CLbl _  _)		-> Cost (0,1,0,0,0)  -- no ld for call necessary
126

127
   CJump mode			-> addrModeCosts mode Rhs +
128 129
				   Cost (0,1,0,0,0)

130 131 132
   CFallThrough mode  -> addrModeCosts mode Rhs +		-- chu' 0.24
			 Cost (0,1,0,0,0)

133
   CReturn mode info  -> case info of
134 135
			  DirectReturn -> addrModeCosts mode Rhs +
					  Cost (0,1,0,0,0)
136

137
			    -- i.e. ld address to reg and call reg
138

139 140
			  DynamicVectoredReturn mode' ->
					addrModeCosts mode Rhs +
141
					addrModeCosts mode' Rhs +
142 143
					Cost (0,1,1,0,0)

144 145
			    {- generates code like this:
				JMP_(<mode>)[RVREL(<mode'>)];
146 147
			       i.e. 1 possb ld for mode'
				    1 ld for RVREL
148 149 150
				    1 possb ld for mode
				    1 call				-}

151 152
			  StaticVectoredReturn _ -> addrModeCosts mode Rhs +
						  Cost (0,1,1,0,0)
153 154

			    -- as above with mode' fixed to CLit
155 156
			    -- typically 2 ld + 1 call; 1st ld due
			    -- to CVal as mode
157 158 159 160

   CSwitch mode alts absC     -> nullCosts
				 {- for handling costs of all branches of
				    a CSwitch see PprAbsC.
161 162 163
				    Basically:
				     Costs for branch =
					Costs before CSwitch +
164 165 166 167 168
					addrModeCosts of head +
					Costs for 1 cond branch +
					Costs for body of branch
				 -}

169
   CCodeBlock _ absC	      -> costs absC
170 171 172 173 174 175

   CInitHdr cl_info reg_rel cost_centre inplace_upd -> initHdrCosts

			{- This is more fancy but superflous: The addr modes
			   are fixed and so the costs are const!

176
			argCosts + initHdrCosts
177 178
			where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
					 addrModeCosts base_lbl +    -- CLbl!
179
					 3*addrModeCosts (mkIntCLit 1{- any val -})
180 181 182
			-}
			{- this extends to something like
			    SET_SPEC_HDR(...)
183
			   For costing the args of this macro
184 185 186
			   see PprAbsC.lhs where args are inserted -}

   COpStmt modes_res primOp modes_args _ _ ->
187 188 189 190
	{-
	   let
		n = length modes_res
	   in
191
		(0, 0, n, n, 0) +
192 193 194
		primOpCosts primOp +
		if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
					     else nullCosts
195 196
	   -- ^^HWL
	-}
197 198 199 200 201
	foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res]  +
	foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args]  +
	primOpCosts primOp +
	if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
				     else nullCosts
202

203
   CSimultaneous absC	     -> costs absC
204

205 206 207
   CMacroStmt	macro modes  -> stmtMacroCosts macro modes

   CCallProfCtrMacro   _ _   -> nullCosts
208 209
				  {- we don't count profiling in GrAnSim -}

210
   CCallProfCCMacro    _ _   -> nullCosts
211 212 213
				  {- we don't count profiling in GrAnSim -}

  -- *** the next three [or so...] are DATA (those above are CODE) ***
214
  -- as they are data rather than code they all have nullCosts	       -- HWL
215 216

   CStaticClosure _ _ _ _    -> nullCosts
217

218
   CClosureInfoAndCode _ _ _ _ _ _ -> nullCosts
219 220 221 222 223 224 225 226 227 228 229 230

   CRetVector _ _ _	     -> nullCosts

   CRetUnVector _ _	     -> nullCosts

   CFlatRetVector _ _	     -> nullCosts

   CCostCentreDecl _ _	     -> nullCosts

   CClosureUpdInfo _	     -> nullCosts

   CSplitMarker		     -> nullCosts
231 232 233 234 235 236 237 238 239 240

-- ---------------------------------------------------------------------------

addrModeCosts :: CAddrMode -> Side -> CostRes

-- addrModeCosts _ _ = nullCosts

addrModeCosts addr_mode side =
  let
    lhs = side == Lhs
241
  in
242 243
  case addr_mode of
    CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
244
		       else Cost (0, 0, 1, 0, 0)
245 246

    CAddr _  -> if lhs then Cost (0, 0, 0, 1, 0)  -- ??unchecked
247
		       else Cost (0, 0, 1, 0, 0)
248

249
    CReg _   -> nullCosts	 {- loading from, storing to reg is free ! -}
250
				 {- for costing CReg->Creg ops see special -}
251
				 {- case in costs fct -}
252
    CTableEntry base_mode offset_mode kind ->
253
		addrModeCosts base_mode side +
254 255 256 257
		addrModeCosts offset_mode side +
		Cost (1,0,1,0,0)

    CTemp _ _  -> nullCosts	{- if lhs then Cost (0, 0, 0, 1, 0)
258
					  else Cost (0, 0, 1, 0, 0)  -}
259 260 261 262 263
	-- ``Temporaries'' correspond to local variables in C, and registers in
	-- native code.
	-- I assume they can be somewhat optimized by gcc -- HWL

    CLbl _ _   -> if lhs then Cost (0, 0, 0, 1, 0)
264
			 else Cost (2, 0, 0, 0, 0)
265
		  -- Rhs: typically: sethi %hi(lbl),%tmp_reg
266
		  --		     or	   %tmp_reg,%lo(lbl),%target_reg
267 268

    CUnVecLbl _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
269
			    else Cost (2, 0, 0, 0, 0)
270 271
		     -- same as CLbl

272
    --	Check the following 3 (checked form CLit on)
273 274

    CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
275
			     else Cost (0, 0, 1, 0, 0)
276 277

    CIntLike mode  -> if lhs then Cost (0, 0, 0, 1, 0)
278
			     else Cost (0, 0, 1, 0, 0)
279

280 281
    CString _	   -> if lhs then Cost (0, 0, 0, 1, 0)
			     else Cost (0, 0, 1, 0, 0)
282

283 284
    CLit    _	   -> if lhs then nullCosts	       -- should never occur
			     else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg
285

286 287
    CLitLit _  _   -> if lhs then nullCosts
			     else Cost (1, 0, 0, 0, 0)
288 289
		      -- same es CLit

290 291
    COffset _	   -> if lhs then nullCosts
			     else Cost (1, 0, 0, 0, 0)
292 293
		      -- same es CLit

294
    CCode absC	   -> costs absC
295 296 297

    CLabelledCode _ absC  ->  costs absC

298 299
    CJoinPoint _ _	  -> if lhs then Cost (0, 0, 0, 1, 0)
				    else Cost (0, 0, 1, 0, 0)
300 301 302 303 304 305 306 307 308

    CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list

    CCostCentre _ _ -> nullCosts

-- ---------------------------------------------------------------------------

exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes

309
exprMacroCosts side macro mode_list =
310
  let
311
    arg_costs = foldl (+) nullCosts
312 313 314 315 316
		      (map (\ x -> addrModeCosts x Rhs) mode_list)
  in
  arg_costs +
  case macro of
    INFO_PTR   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
317 318
				 else Cost (0, 0, 1, 0, 0)
    ENTRY_CODE -> nullCosts
319
    INFO_TAG   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
320
				 else Cost (0, 0, 1, 0, 0)
321
    EVAL_TAG   -> if side == Lhs then Cost (1, 0, 0, 1, 0)
322
				 else Cost (1, 0, 1, 0, 0)
323 324 325 326 327 328 329
		  -- costs of INFO_TAG + (1,0,0,0,0)

-- ---------------------------------------------------------------------------

stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes

stmtMacroCosts macro modes =
330 331 332
  let
    arg_costs =	  foldl (+) nullCosts
			[addrModeCosts mode Rhs | mode <- modes]
333 334
  in
  case macro of
335
    ARGS_CHK_A_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)	 {- StgMacros.lh  -}
336
		-- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
337
    ARGS_CHK_A		  ->  Cost (2, 1, 0, 0, 0)	 {- StgMacros.lh  -}
338
		-- p=probability of PAP (instead of AP): + p*(0,1,0,0,0)
339 340 341 342 343 344 345
    ARGS_CHK_B_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)	 {- StgMacros.lh  -}
    ARGS_CHK_B		  ->  Cost (2, 1, 0, 0, 0)	 {- StgMacros.lh  -}
    HEAP_CHK		  ->  Cost (2, 1, 0, 0, 0)	 {- StgMacros.lh  -}
    -- STK_CHK		     ->	 (2, 1, 0, 0, 0)       {- StgMacros.lh	-}
    STK_CHK		  ->  Cost (0, 0, 0, 0, 0)	 {- StgMacros.lh  -}
    UPD_CAF		  ->  Cost (7, 0, 1, 3, 0)	 {- SMupdate.lh	 -}
    UPD_IND		  ->  Cost (8, 2, 2, 0, 0)	 {- SMupdate.lh
346 347
				updatee in old-gen: Cost (4, 1, 1, 0, 0)
				updatee in new-gen: Cost (4, 1, 1, 0, 0)
348
				NB: we include costs fo checking if there is
349 350
				    a BQ, but we omit costs for awakening BQ
				    (these probably differ between old-gen and
351 352
				    new gen) -}
    UPD_INPLACE_NOPTRS	  ->  Cost (13, 3, 3, 2, 0)	  {- SMupdate.lh
353
				common for both:    Cost (4, 1, 1, 0, 0)
354
				updatee in old-gen: Cost (14, 3, 2, 4, 0)
355
				updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}
356
    UPD_INPLACE_PTRS	  ->  Cost (13, 3, 3, 2, 0)	  {- SMupdate.lh
357
				common for both:    Cost (4, 1, 1, 0, 0)
358
				updatee in old-gen: Cost (14, 3, 2, 4, 0)
359 360
				updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}

361 362 363 364 365 366 367 368
    UPD_BH_UPDATABLE	  ->  Cost (3, 0, 0, 1, 0)	 {- SMupdate.lh	 -}
    UPD_BH_SINGLE_ENTRY	  ->  Cost (3, 0, 0, 1, 0)	 {- SMupdate.lh	 -}
    PUSH_STD_UPD_FRAME	  ->  Cost (3, 0, 0, 4, 0)	 {- SMupdate.lh	 -}
    POP_STD_UPD_FRAME	  ->  Cost (1, 0, 3, 0, 0)	 {- SMupdate.lh	 -}
    SET_TAG		  ->  nullCosts		    {- COptRegs.lh -}
    GRAN_FETCH			->  nullCosts	  {- GrAnSim bookkeeping -}
    GRAN_RESCHEDULE		->  nullCosts	  {- GrAnSim bookkeeping -}
    GRAN_FETCH_AND_RESCHEDULE	->  nullCosts	  {- GrAnSim bookkeeping -}
369
    GRAN_YIELD		        ->  nullCosts	  {- GrAnSim bookkeeping -- added SOF -}
370
    THREAD_CONTEXT_SWITCH	->  nullCosts	  {- GrAnSim bookkeeping -}
371
    _ -> trace ("Costs.stmtMacroCosts: "++show macro) nullCosts
372 373 374

-- ---------------------------------------------------------------------------

375
floatOps :: [PrimOp]
376
floatOps =
377
  [   FloatGtOp	 , FloatGeOp  , FloatEqOp  , FloatNeOp	, FloatLtOp  , FloatLeOp
378 379 380
    , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
    , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
    , Float2IntOp , Int2FloatOp
381 382
    , FloatExpOp   , FloatLogOp	  , FloatSqrtOp
    , FloatSinOp   , FloatCosOp	  , FloatTanOp
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
    , FloatAsinOp  , FloatAcosOp  , FloatAtanOp
    , FloatSinhOp  , FloatCoshOp  , FloatTanhOp
    , FloatPowerOp
    , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
    , Double2IntOp , Int2DoubleOp
    , Double2FloatOp , Float2DoubleOp
    , DoubleExpOp   , DoubleLogOp   , DoubleSqrtOp
    , DoubleSinOp   , DoubleCosOp   , DoubleTanOp
    , DoubleAsinOp  , DoubleAcosOp  , DoubleAtanOp
    , DoubleSinhOp  , DoubleCoshOp  , DoubleTanhOp
    , DoublePowerOp
    , FloatEncodeOp  , FloatDecodeOp
    , DoubleEncodeOp , DoubleDecodeOp
  ]

398 399
gmpOps :: [PrimOp]
gmpOps	=
400 401 402 403
  [   IntegerAddOp , IntegerSubOp , IntegerMulOp
    , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
    , IntegerCmpOp
    , Integer2IntOp  , Int2IntegerOp
404
    , Addr2IntegerOp
405 406 407
  ]


408
abs_costs = nullCosts   -- NB:  This is normal STG code with costs already 
409 410
			--	included; no need to add costs again.

411 412 413
umul_costs = Cost (21,4,0,0,0)	   -- due to spy counts
rem_costs =  Cost (30,15,0,0,0)	   -- due to spy counts
div_costs =  Cost (30,15,0,0,0)	   -- due to spy counts
414 415 416 417 418

primOpCosts :: PrimOp -> CostRes

-- Special cases

419 420 421 422
primOpCosts (CCallOp _ _ _ _ _) = SAVE_COSTS + RESTORE_COSTS  	
	                          -- don't guess costs of ccall proper
                                  -- for exact costing use a GRAN_EXEC
                                  -- in the C code
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441

-- Usually 3 mov instructions are needed to get args and res in right place.

primOpCosts IntMulOp  = Cost (3, 1, 0, 0, 0)  + umul_costs
primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0)  + div_costs
primOpCosts IntRemOp  = Cost (3, 1, 0, 0, 0)  + rem_costs
primOpCosts IntNegOp  = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
primOpCosts IntAbsOp  = Cost (0, 1, 0, 0, 0) -- abs closure already costed

primOpCosts FloatGtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts FloatGeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts FloatEqOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts FloatNeOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts FloatLtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts FloatLeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
442
primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
443 444
primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp

445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
primOpCosts FloatExpOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatLogOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatSqrtOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatSinOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatCosOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatTanOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatAsinOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatAcosOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatAtanOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatSinhOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatCoshOp	  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatTanhOp	  = Cost (2, 1, 4, 4, 3)
--primOpCosts FloatAsinhOp  = Cost (2, 1, 4, 4, 3)
--primOpCosts FloatAcoshOp  = Cost (2, 1, 4, 4, 3)
--primOpCosts FloatAtanhOp  = Cost (2, 1, 4, 4, 3)
primOpCosts FloatPowerOp  = Cost (2, 1, 4, 4, 3)
461 462 463

{- There should be special handling of the Array PrimOps in here   HWL -}

464
primOpCosts primOp
465
  | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1)  :: CostRes
466
  | primOp `elem` gmpOps   = Cost (30, 5, 10, 10, 0) :: CostRes  -- GUESS; check it
467
  | otherwise		   = Cost (1, 0, 0, 0, 0)
468 469 470 471

-- ---------------------------------------------------------------------------
{- HWL: currently unused

472
costsByKind :: PrimRep -> Side -> CostRes
473 474 475

-- The following PrimKinds say that the data is already in a reg

476 477 478 479 480 481
costsByKind CharRep	_ = nullCosts
costsByKind IntRep	_ = nullCosts
costsByKind WordRep	_ = nullCosts
costsByKind AddrRep	_ = nullCosts
costsByKind FloatRep	_ = nullCosts
costsByKind DoubleRep	_ = nullCosts
482 483 484 485
-}
-- ---------------------------------------------------------------------------
\end{code}

486
This is the data structure of {\tt PrimOp} copied from prelude/PrimOp.lhs.
487 488 489 490 491 492 493 494
I include here some comments about the estimated costs for these @PrimOps@.
Compare with the @primOpCosts@ fct above.  -- HWL

\begin{pseudocode}
data PrimOp
    -- I assume all these basic comparisons take just one ALU instruction
    -- Checked that for Char, Int; Word, Addr should be the same as Int.

495 496 497 498
    = CharGtOp	 | CharGeOp   | CharEqOp   | CharNeOp	| CharLtOp   | CharLeOp
    | IntGtOp	 | IntGeOp    | IntEqOp	   | IntNeOp	| IntLtOp    | IntLeOp
    | WordGtOp	 | WordGeOp   | WordEqOp   | WordNeOp	| WordLtOp   | WordLeOp
    | AddrGtOp	 | AddrGeOp   | AddrEqOp   | AddrNeOp	| AddrLtOp   | AddrLeOp
499 500 501 502

    -- Analogously, these take one FP unit instruction
    -- Haven't checked that, yet.

503
    | FloatGtOp	 | FloatGeOp  | FloatEqOp  | FloatNeOp	| FloatLtOp  | FloatLeOp
504 505 506 507 508 509
    | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp

    -- 1 ALU op; unchecked
    | OrdOp | ChrOp

    -- these just take 1 ALU op; checked
510
    | IntAddOp | IntSubOp
511 512 513 514

    -- but these take more than that; see special cases in primOpCosts
    -- I counted the generated ass. instructions for these -> checked
    | IntMulOp | IntQuotOp
515
    | IntRemOp | IntNegOp | IntAbsOp
516 517 518 519

    -- Rest is unchecked so far -- HWL

    -- Word#-related ops:
520
    | AndOp   | OrOp  | NotOp | XorOp | ShiftLOp | ShiftROp
521 522 523 524 525 526 527 528 529
    | Int2WordOp | Word2IntOp -- casts

    -- Addr#-related ops:
    | Int2AddrOp | Addr2IntOp -- casts

    -- Float#-related ops:
    | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
    | Float2IntOp | Int2FloatOp

530 531
    | FloatExpOp   | FloatLogOp	  | FloatSqrtOp
    | FloatSinOp   | FloatCosOp	  | FloatTanOp
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
    | FloatAsinOp  | FloatAcosOp  | FloatAtanOp
    | FloatSinhOp  | FloatCoshOp  | FloatTanhOp
    -- not all machines have these available conveniently:
    -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
    | FloatPowerOp -- ** op

    -- Double#-related ops:
    | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
    | Double2IntOp | Int2DoubleOp
    | Double2FloatOp | Float2DoubleOp

    | DoubleExpOp   | DoubleLogOp   | DoubleSqrtOp
    | DoubleSinOp   | DoubleCosOp   | DoubleTanOp
    | DoubleAsinOp  | DoubleAcosOp  | DoubleAtanOp
    | DoubleSinhOp  | DoubleCoshOp  | DoubleTanhOp
    -- not all machines have these available conveniently:
    -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
    | DoublePowerOp -- ** op

    -- Integer (and related...) ops:
    -- slightly weird -- to match GMP package.
    | IntegerAddOp | IntegerSubOp | IntegerMulOp
    | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp

    | IntegerCmpOp

    | Integer2IntOp  | Int2IntegerOp
    | Addr2IntegerOp -- "Addr" is *always* a literal string
    -- ?? gcd, etc?

    | FloatEncodeOp  | FloatDecodeOp
    | DoubleEncodeOp | DoubleDecodeOp

    -- primitive ops for primitive arrays

    | NewArrayOp
568
    | NewByteArrayOp PrimRep
569 570 571 572 573 574

    | SameMutableArrayOp
    | SameMutableByteArrayOp

    | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs

575 576 577 578 579 580
    | ReadByteArrayOp	PrimRep
    | WriteByteArrayOp	PrimRep
    | IndexByteArrayOp	PrimRep
    | IndexOffAddrOp	PrimRep
	-- PrimRep can be one of {Char,Int,Addr,Float,Double}Kind.
	-- This is just a cheesy encoding of a bunch of ops.
581 582
	-- Note that ForeignObjRep is not included -- the only way of
	-- creating a ForeignObj is with a ccall or casm.
583 584 585 586 587 588 589

    | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp

    | MakeStablePtrOp | DeRefStablePtrOp
\end{pseudocode}

A special ``trap-door'' to use in making calls direct to C functions:
590 591 592 593 594
Note: From GrAn point of view, CCall is probably very expensive 
      The programmer can specify the costs of the Ccall by inserting
      a GRAN_EXEC(a,b,l,s,f) at the end of the C- code, specifing the
      number or arithm., branch, load, store and floating point instructions
      -- HWL
595 596

\begin{pseudocode}
597 598 599 600 601 602
    | CCallOp	String	-- An "unboxed" ccall# to this named function
		Bool	-- True <=> really a "casm"
		Bool	-- True <=> might invoke Haskell GC
		[Type]	-- Unboxed argument; the state-token
			-- argument will have been put *first*
		Type	-- Return type; one of the "StateAnd<blah>#" types
603 604 605

    -- (... to be continued ... )
\end{pseudocode}