Costs.lhs 22.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
%
% (c) The GRASP/AQUA Project, Glasgow University, 1994-1995
%     Hans Wolfgang Loidl
%
% ---------------------------------------------------------------------------

\section[Costs]{Evaluating the costs of computing some abstract C code}

This module   provides all necessary  functions for   computing for a given
abstract~C Program the costs of executing that program. This is done by the
exported function:

\begin{quote} 
 {\verb type CostRes = (Int, Int, Int, Int, Int)}
 {\verb costs :: AbstractC -> CostRes }
\end{quote}

The meaning of the result tuple is:
\begin{itemize}
 \item The first component ({\tt i}) counts the number of integer,
   arithmetic and bit-manipulating instructions.
 \item The second component ({\tt b}) counts the number of branches (direct
   branches as well as indirect ones).
 \item The third component ({\tt l}) counts the number of load instructions.
 \item The fourth component ({\tt s}) counts the number of store
   instructions.
 \item The fifth component ({\tt f}) counts the number of floating point
   instructions. 
\end{itemize}
 
This function is needed in GrAnSim for parallelism.

These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):

\begin{pseudocode}

#define LOAD_COSTS		2
#define STORE_COSTS		2
#define INT_ARITHM_COSTS	1
#define GMP_ARITHM_COSTS	3 {- any clue for GMP costs ? -}
#define FLOAT_ARITHM_COSTS	3 {- any clue for float costs ? -}
#define BRANCH_COSTS 		2

\end{pseudocode}

\begin{code}
#include "HsVersions.h"

#define ACCUM_COSTS(i,b,l,s,f)  (i+b+l+s+f)

#define NUM_REGS		10 {- PprAbsCSyn.lhs -}       {- runtime/c-as-asm/CallWrap_C.lc -}
#define RESTORE_COSTS		(Cost (0, 0, NUM_REGS, 0, 0)  :: CostRes)
#define SAVE_COSTS		(Cost (0, 0, 0, NUM_REGS, 0)  :: CostRes)
#define CCALL_COSTS_GUESS	(Cost (50, 0, 0, 0, 0)        :: CostRes)

module Costs( costs,
              addrModeCosts, CostRes(Cost), nullCosts, Side(..)
    ) where

import AbsCFuns
import AbsCSyn
import AbsPrel
import PrimOps
import TyCon
import Util

-- --------------------------------------------------------------------------
#ifndef GRAN
-- a module of "stubs" that don't do anything
data CostRes = Cost (Int, Int, Int, Int, Int)
data Side = Lhs | Rhs 

nullCosts    = Cost (0, 0, 0, 0, 0) :: CostRes

costs :: AbstractC -> CostRes
addrModeCosts :: CAddrMode -> Side -> CostRes
costs _ = nullCosts
addrModeCosts _ _ = nullCosts

instance Eq CostRes; instance Text CostRes

instance Num CostRes where
    x + y = nullCosts

#else {-GRAN-}
-- the real thing

data CostRes = Cost (Int, Int, Int, Int, Int)
	       deriving (Text)

nullCosts    = Cost (0, 0, 0, 0, 0) :: CostRes
initHdrCosts = Cost (2, 0, 0, 1, 0) :: CostRes
errorCosts   = Cost (-1, -1, -1, -1, -1)  -- just for debugging 

oneArithm = Cost (1, 0, 0, 0, 0) :: CostRes

instance Eq CostRes where
 (==) t1 t2 = i && b && l && s && f
             where (i,b,l,s,f) = binOp' (==) t1 t2

instance Num CostRes where
 (+) = binOp (+)
 (-) = binOp (-)
 (*) = binOp (*)
 negate  = mapOp negate
 abs     = mapOp abs
 signum  = mapOp signum

mapOp :: (Int -> Int) -> CostRes -> CostRes
mapOp g ( Cost (i, b, l, s, f) )  = Cost (g i, g b, g l, g s, g f)

foldrOp :: (Int -> a -> a) -> a -> CostRes -> a
foldrOp o x  ( Cost (i1, b1, l1, s1, f1) )   =
	i1 `o` ( b1 `o` ( l1 `o` ( s1 `o` ( f1 `o` x))))

binOp :: (Int -> Int -> Int) -> CostRes -> CostRes -> CostRes 
binOp o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
        ( Cost (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) )

binOp' :: (Int -> Int -> a) -> CostRes -> CostRes -> (a,a,a,a,a) 
binOp' o ( Cost (i1, b1, l1, s1, f1) ) ( Cost  (i2, b2, l2, s2, f2) )  =
         (i1 `o` i2, b1 `o` b2, l1 `o` l2, s1 `o` s2, f1 `o` f2) 

-- --------------------------------------------------------------------------

data Side = Lhs | Rhs 
	    deriving (Eq)

-- --------------------------------------------------------------------------

costs :: AbstractC -> CostRes

costs absC = 
  case absC of
   AbsCNop               	->  nullCosts

   AbsCStmts absC1 absC2 	-> costs absC1 + costs absC2

   CAssign (CReg _) (CReg _)  	-> Cost (1,0,0,0,0)   -- typ.: mov %reg1,%reg2

   CAssign (CReg _) (CTemp _ _) -> Cost (1,0,0,0,0)  

   CAssign (CReg _) (CAddr _)   -> Cost (1,0,0,0,0)  -- typ.: add %reg1,<adr>,%reg2

   CAssign target_m source_m  	-> addrModeCosts target_m Lhs +
                                   addrModeCosts source_m Rhs 

   CJump (CLbl _  _)	      	-> Cost (0,1,0,0,0)  -- no ld for call necessary

   CJump mode                 	-> addrModeCosts mode Rhs +
				   Cost (0,1,0,0,0)

   CFallThrough mode  -> addrModeCosts mode Rhs +               -- chu' 0.24
		  	 Cost (0,1,0,0,0)
	
   CReturn mode info  -> case info of
    	                  DirectReturn -> addrModeCosts mode Rhs +
                                          Cost (0,1,0,0,0)

		     	    -- i.e. ld address to reg and call reg 

	                  DynamicVectoredReturn mode' -> 
					addrModeCosts mode Rhs + 
					addrModeCosts mode' Rhs +
                                        Cost (0,1,1,0,0)
				
			    {- generates code like this:
				JMP_(<mode>)[RVREL(<mode'>)];
			       i.e. 1 possb ld for mode' 
		     		    1 ld for RVREL
				    1 possb ld for mode
				    1 call				-}

	                  StaticVectoredReturn _ -> addrModeCosts mode Rhs +
                                                  Cost (0,1,1,0,0)

			    -- as above with mode' fixed to CLit
		            -- typically 2 ld + 1 call; 1st ld due
		            -- to CVal as mode

   CSwitch mode alts absC     -> nullCosts
				 {- for handling costs of all branches of
				    a CSwitch see PprAbsC.
				    Basically: 
				     Costs for branch = 
					Costs before CSwitch + 
					addrModeCosts of head +
					Costs for 1 cond branch +
					Costs for body of branch
				 -}

   CCodeBlock _ absC          -> costs absC

   CInitHdr cl_info reg_rel cost_centre inplace_upd -> initHdrCosts

			{- This is more fancy but superflous: The addr modes
			   are fixed and so the costs are const!

                        argCosts + initHdrCosts
			where argCosts = addrModeCosts (CAddr reg_rel) Rhs +
					 addrModeCosts base_lbl +    -- CLbl!
					 3*addrModeCosts (mkIntCLit 1{- any val -}) 
			-}
			{- this extends to something like
			    SET_SPEC_HDR(...)
		           For costing the args of this macro
			   see PprAbsC.lhs where args are inserted -}

   COpStmt modes_res primOp modes_args _ _ ->
	{- 
           let
		n = length modes_res 
	   in 
		(0, 0, n, n, 0) +
                primOpCosts primOp +
                if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
                                             else nullCosts
	   -- ^^HWL
	-}
        foldl (+) nullCosts [addrModeCosts mode Lhs | mode <- modes_res]  +
        foldl (+) nullCosts [addrModeCosts mode Rhs | mode <- modes_args]  +
        primOpCosts primOp +
        if primOpNeedsWrapper primOp then SAVE_COSTS + RESTORE_COSTS
                                     else nullCosts
                
   CSimultaneous absC        -> costs absC

   CMacroStmt   macro modes  -> stmtMacroCosts macro modes

   CCallProfCtrMacro   _ _   -> nullCosts  
				  {- we don't count profiling in GrAnSim -}

   CCallProfCCMacro    _ _   -> nullCosts  
				  {- we don't count profiling in GrAnSim -}

  -- *** the next three [or so...] are DATA (those above are CODE) ***
  -- as they are data rather than code they all have nullCosts         -- HWL

   CStaticClosure _ _ _ _    -> nullCosts
			     
241
   CClosureInfoAndCode _ _ _ _ _ _ -> nullCosts
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
			     
   CRetVector _ _ _          -> nullCosts
			     
   CRetUnVector _ _          -> nullCosts
			     
   CFlatRetVector _ _        -> nullCosts
			     
   CCostCentreDecl _ _       -> nullCosts
			     
   CClosureUpdInfo _         -> nullCosts

   CSplitMarker              -> nullCosts

-- ---------------------------------------------------------------------------

addrModeCosts :: CAddrMode -> Side -> CostRes

-- addrModeCosts _ _ = nullCosts

addrModeCosts addr_mode side =
  let
    lhs = side == Lhs
  in 
  case addr_mode of
    CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
                       else Cost (0, 0, 1, 0, 0)

    CAddr _  -> if lhs then Cost (0, 0, 0, 1, 0)  -- ??unchecked
                       else Cost (0, 0, 1, 0, 0)

    CReg _   -> nullCosts        {- loading from, storing to reg is free ! -}
				 {- for costing CReg->Creg ops see special -}
				 {- case in costs fct -}					
    CTableEntry base_mode offset_mode kind ->
                addrModeCosts base_mode side + 
		addrModeCosts offset_mode side +
		Cost (1,0,1,0,0)

    CTemp _ _  -> nullCosts	{- if lhs then Cost (0, 0, 0, 1, 0)
		                          else Cost (0, 0, 1, 0, 0)  -}
	-- ``Temporaries'' correspond to local variables in C, and registers in
	-- native code.
	-- I assume they can be somewhat optimized by gcc -- HWL

    CLbl _ _   -> if lhs then Cost (0, 0, 0, 1, 0)
                         else Cost (2, 0, 0, 0, 0)
		  -- Rhs: typically: sethi %hi(lbl),%tmp_reg
		  --	     	     or    %tmp_reg,%lo(lbl),%target_reg

    CUnVecLbl _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
                            else Cost (2, 0, 0, 0, 0)
		     -- same as CLbl

    --  Check the following 3 (checked form CLit on)

    CCharLike mode -> if lhs then Cost (0, 0, 0, 1, 0)
                             else Cost (0, 0, 1, 0, 0)

    CIntLike mode  -> if lhs then Cost (0, 0, 0, 1, 0)
                             else Cost (0, 0, 1, 0, 0)

    CString _      -> if lhs then Cost (0, 0, 0, 1, 0)
                             else Cost (0, 0, 1, 0, 0)

    CLit    _      -> if lhs then nullCosts            -- should never occur
                             else Cost (1, 0, 0, 0, 0) -- typ.: mov lit,%reg

    CLitLit _  _   -> if lhs then nullCosts       
                             else Cost (1, 0, 0, 0, 0) 
		      -- same es CLit

    COffset _      -> if lhs then nullCosts       
                             else Cost (1, 0, 0, 0, 0) 
		      -- same es CLit

    CCode absC     -> costs absC

    CLabelledCode _ absC  ->  costs absC

    CJoinPoint _ _        -> if lhs then Cost (0, 0, 0, 1, 0)
                                    else Cost (0, 0, 1, 0, 0)

    CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list

    CCostCentre _ _ -> nullCosts

-- ---------------------------------------------------------------------------

exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes

exprMacroCosts side macro mode_list = 
  let
    arg_costs = foldl (+) nullCosts 
		      (map (\ x -> addrModeCosts x Rhs) mode_list)
  in
  arg_costs +
  case macro of
    INFO_PTR   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
                                 else Cost (0, 0, 1, 0, 0)
    ENTRY_CODE -> nullCosts                   
    INFO_TAG   -> if side == Lhs then Cost (0, 0, 0, 1, 0)
                                 else Cost (0, 0, 1, 0, 0)
    EVAL_TAG   -> if side == Lhs then Cost (1, 0, 0, 1, 0)
                                 else Cost (1, 0, 1, 0, 0)
		  -- costs of INFO_TAG + (1,0,0,0,0)

-- ---------------------------------------------------------------------------

stmtMacroCosts :: CStmtMacro -> [CAddrMode] -> CostRes

stmtMacroCosts macro modes =
  let 
    arg_costs =   foldl (+) nullCosts 
			[addrModeCosts mode Rhs | mode <- modes] 
  in
  case macro of
    ARGS_CHK_A_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
		-- p=probability of PAP (instead of AP): + p*(3,1,0,0,0)
    ARGS_CHK_A            ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
		-- p=probability of PAP (instead of AP): + p*(0,1,0,0,0)
    ARGS_CHK_B_LOAD_NODE  ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
    ARGS_CHK_B            ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
    HEAP_CHK              ->  Cost (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
    -- STK_CHK               ->  (2, 1, 0, 0, 0)       {- StgMacros.lh  -}
    STK_CHK               ->  Cost (0, 0, 0, 0, 0)       {- StgMacros.lh  -}
    UPD_CAF               ->  Cost (7, 0, 1, 3, 0)       {- SMupdate.lh  -}
    UPD_IND               ->  Cost (8, 2, 2, 0, 0)       {- SMupdate.lh  
				updatee in old-gen: Cost (4, 1, 1, 0, 0)
				updatee in new-gen: Cost (4, 1, 1, 0, 0)
                                NB: we include costs fo checking if there is
				    a BQ, but we omit costs for awakening BQ
				    (these probably differ between old-gen and
				    new gen) -} 
    UPD_INPLACE_NOPTRS    ->  Cost (13, 3, 3, 2, 0)       {- SMupdate.lh  
				common for both:    Cost (4, 1, 1, 0, 0)
  				updatee in old-gen: Cost (14, 3, 2, 4, 0) 
				updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}
    UPD_INPLACE_PTRS      ->  Cost (13, 3, 3, 2, 0)       {- SMupdate.lh  
				common for both:    Cost (4, 1, 1, 0, 0)
  				updatee in old-gen: Cost (14, 3, 2, 4, 0) 
				updatee in new-gen: Cost (4, 1, 1, 0, 0)   -}

    UPD_BH_UPDATABLE      ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
    UPD_BH_SINGLE_ENTRY   ->  Cost (3, 0, 0, 1, 0)       {- SMupdate.lh  -}
    PUSH_STD_UPD_FRAME    ->  Cost (3, 0, 0, 4, 0)       {- SMupdate.lh  -}
    POP_STD_UPD_FRAME     ->  Cost (1, 0, 3, 0, 0)       {- SMupdate.lh  -}
    SET_ARITY             ->  nullCosts             {- StgMacros.lh  -}
    CHK_ARITY             ->  nullCosts             {- StgMacros.lh  -}
    SET_TAG               ->  nullCosts             {- COptRegs.lh -}
    GRAN_FETCH            	->  nullCosts     {- GrAnSim bookkeeping -}
    GRAN_RESCHEDULE   		->  nullCosts     {- GrAnSim bookkeeping -}
    GRAN_FETCH_AND_RESCHEDULE	->  nullCosts     {- GrAnSim bookkeeping -}
    THREAD_CONTEXT_SWITCH   	->  nullCosts     {- GrAnSim bookkeeping -}

-- ---------------------------------------------------------------------------

floatOps :: [PrimOp] 
floatOps =
  [   FloatGtOp  , FloatGeOp  , FloatEqOp  , FloatNeOp  , FloatLtOp  , FloatLeOp
    , DoubleGtOp , DoubleGeOp , DoubleEqOp , DoubleNeOp , DoubleLtOp , DoubleLeOp
    , FloatAddOp , FloatSubOp , FloatMulOp , FloatDivOp , FloatNegOp
    , Float2IntOp , Int2FloatOp
    , FloatExpOp   , FloatLogOp   , FloatSqrtOp
    , FloatSinOp   , FloatCosOp   , FloatTanOp
    , FloatAsinOp  , FloatAcosOp  , FloatAtanOp
    , FloatSinhOp  , FloatCoshOp  , FloatTanhOp
    , FloatPowerOp
    , DoubleAddOp , DoubleSubOp , DoubleMulOp , DoubleDivOp , DoubleNegOp
    , Double2IntOp , Int2DoubleOp
    , Double2FloatOp , Float2DoubleOp
    , DoubleExpOp   , DoubleLogOp   , DoubleSqrtOp
    , DoubleSinOp   , DoubleCosOp   , DoubleTanOp
    , DoubleAsinOp  , DoubleAcosOp  , DoubleAtanOp
    , DoubleSinhOp  , DoubleCoshOp  , DoubleTanhOp
    , DoublePowerOp
    , FloatEncodeOp  , FloatDecodeOp
    , DoubleEncodeOp , DoubleDecodeOp
  ]

gmpOps :: [PrimOp] 
gmpOps  =
  [   IntegerAddOp , IntegerSubOp , IntegerMulOp
    , IntegerQuotRemOp , IntegerDivModOp , IntegerNegOp
    , IntegerCmpOp
    , Integer2IntOp  , Int2IntegerOp
    , Addr2IntegerOp 
  ]


-- Haven't found the .umul .div .rem macros yet
-- If they are not Haskell cde, they are not costed, yet

abs_costs = nullCosts  -- NB:  This is normal STG code with costs already 
			--	included; no need to add costs again.

umul_costs = Cost (21,4,0,0,0)     -- due to spy counts
rem_costs =  Cost (30,15,0,0,0)    -- due to spy counts
div_costs =  Cost (30,15,0,0,0)    -- due to spy counts

primOpCosts :: PrimOp -> CostRes

-- Special cases

primOpCosts (CCallOp _ _ _ _ _) = SAVE_COSTS + CCALL_COSTS_GUESS + 
				  RESTORE_COSTS  	-- GUESS; check it

-- Usually 3 mov instructions are needed to get args and res in right place.

primOpCosts IntMulOp  = Cost (3, 1, 0, 0, 0)  + umul_costs
primOpCosts IntQuotOp = Cost (3, 1, 0, 0, 0)  + div_costs
primOpCosts IntRemOp  = Cost (3, 1, 0, 0, 0)  + rem_costs
primOpCosts IntNegOp  = Cost (1, 1, 0, 0, 0) -- translates into 1 sub
primOpCosts IntAbsOp  = Cost (0, 1, 0, 0, 0) -- abs closure already costed

primOpCosts FloatGtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts FloatGeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts FloatEqOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts FloatNeOp  = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts FloatLtOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts FloatLeOp  = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts DoubleGtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts DoubleGeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp
primOpCosts DoubleEqOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts DoubleNeOp = Cost (0, 0, 0, 0, 2) -- cheap f-comp
primOpCosts DoubleLtOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp 
primOpCosts DoubleLeOp = Cost (2, 0, 0, 0, 2) -- expensive f-comp

primOpCosts FloatExpOp    = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatLogOp    = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatSqrtOp   = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatSinOp    = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatCosOp    = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatTanOp    = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatAsinOp   = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatAcosOp   = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatAtanOp   = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatSinhOp   = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatCoshOp   = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatTanhOp   = Cost (2, 1, 4, 4, 3)   
--primOpCosts FloatAsinhOp  = Cost (2, 1, 4, 4, 3)   
--primOpCosts FloatAcoshOp  = Cost (2, 1, 4, 4, 3)   
--primOpCosts FloatAtanhOp  = Cost (2, 1, 4, 4, 3)   
primOpCosts FloatPowerOp  = Cost (2, 1, 4, 4, 3)   

{- There should be special handling of the Array PrimOps in here   HWL -}

primOpCosts primOp 
  | primOp `elem` floatOps = Cost (0, 0, 0, 0, 1)  :: CostRes
  | primOp `elem` gmpOps   = Cost (50, 5, 10, 10, 0) :: CostRes  -- GUESS; check it
  | otherwise              = Cost (1, 0, 0, 0, 0)

-- ---------------------------------------------------------------------------
{- HWL: currently unused

costsByKind :: PrimKind -> Side -> CostRes

-- The following PrimKinds say that the data is already in a reg

costsByKind CharKind    _ = nullCosts
costsByKind IntKind     _ = nullCosts
costsByKind WordKind    _ = nullCosts
costsByKind AddrKind    _ = nullCosts
costsByKind FloatKind   _ = nullCosts
costsByKind DoubleKind	_ = nullCosts
-}
-- ---------------------------------------------------------------------------

#endif {-GRAN-}
\end{code}

This is the data structure of {\tt PrimOp} copied from prelude/PrimOps.lhs.
I include here some comments about the estimated costs for these @PrimOps@.
Compare with the @primOpCosts@ fct above.  -- HWL

\begin{pseudocode}
data PrimOp
    -- I assume all these basic comparisons take just one ALU instruction
    -- Checked that for Char, Int; Word, Addr should be the same as Int.

    = CharGtOp   | CharGeOp   | CharEqOp   | CharNeOp   | CharLtOp   | CharLeOp
    | IntGtOp    | IntGeOp    | IntEqOp    | IntNeOp    | IntLtOp    | IntLeOp
    | WordGtOp   | WordGeOp   | WordEqOp   | WordNeOp   | WordLtOp   | WordLeOp
    | AddrGtOp   | AddrGeOp   | AddrEqOp   | AddrNeOp   | AddrLtOp   | AddrLeOp

    -- Analogously, these take one FP unit instruction
    -- Haven't checked that, yet.

    | FloatGtOp  | FloatGeOp  | FloatEqOp  | FloatNeOp  | FloatLtOp  | FloatLeOp
    | DoubleGtOp | DoubleGeOp | DoubleEqOp | DoubleNeOp | DoubleLtOp | DoubleLeOp

    -- 1 ALU op; unchecked
    | OrdOp | ChrOp

    -- these just take 1 ALU op; checked
    | IntAddOp | IntSubOp 

    -- but these take more than that; see special cases in primOpCosts
    -- I counted the generated ass. instructions for these -> checked
    | IntMulOp | IntQuotOp
541
    | IntRemOp | IntNegOp | IntAbsOp
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627

    -- Rest is unchecked so far -- HWL

    -- Word#-related ops:
    | AndOp   | OrOp  | NotOp | ShiftLOp | ShiftROp
    | Int2WordOp | Word2IntOp -- casts

    -- Addr#-related ops:
    | Int2AddrOp | Addr2IntOp -- casts

    -- Float#-related ops:
    | FloatAddOp | FloatSubOp | FloatMulOp | FloatDivOp | FloatNegOp
    | Float2IntOp | Int2FloatOp

    | FloatExpOp   | FloatLogOp   | FloatSqrtOp
    | FloatSinOp   | FloatCosOp   | FloatTanOp
    | FloatAsinOp  | FloatAcosOp  | FloatAtanOp
    | FloatSinhOp  | FloatCoshOp  | FloatTanhOp
    -- not all machines have these available conveniently:
    -- | FloatAsinhOp | FloatAcoshOp | FloatAtanhOp
    | FloatPowerOp -- ** op

    -- Double#-related ops:
    | DoubleAddOp | DoubleSubOp | DoubleMulOp | DoubleDivOp | DoubleNegOp
    | Double2IntOp | Int2DoubleOp
    | Double2FloatOp | Float2DoubleOp

    | DoubleExpOp   | DoubleLogOp   | DoubleSqrtOp
    | DoubleSinOp   | DoubleCosOp   | DoubleTanOp
    | DoubleAsinOp  | DoubleAcosOp  | DoubleAtanOp
    | DoubleSinhOp  | DoubleCoshOp  | DoubleTanhOp
    -- not all machines have these available conveniently:
    -- | DoubleAsinhOp | DoubleAcoshOp | DoubleAtanhOp
    | DoublePowerOp -- ** op

    -- Integer (and related...) ops:
    -- slightly weird -- to match GMP package.
    | IntegerAddOp | IntegerSubOp | IntegerMulOp
    | IntegerQuotRemOp | IntegerDivModOp | IntegerNegOp

    | IntegerCmpOp

    | Integer2IntOp  | Int2IntegerOp
    | Addr2IntegerOp -- "Addr" is *always* a literal string
    -- ?? gcd, etc?

    | FloatEncodeOp  | FloatDecodeOp
    | DoubleEncodeOp | DoubleDecodeOp

    -- primitive ops for primitive arrays

    | NewArrayOp
    | NewByteArrayOp PrimKind

    | SameMutableArrayOp
    | SameMutableByteArrayOp

    | ReadArrayOp | WriteArrayOp | IndexArrayOp -- for arrays of Haskell ptrs

    | ReadByteArrayOp   PrimKind
    | WriteByteArrayOp  PrimKind
    | IndexByteArrayOp  PrimKind
    | IndexOffAddrOp    PrimKind
        -- PrimKind can be one of {Char,Int,Addr,Float,Double}Kind.
        -- This is just a cheesy encoding of a bunch of ops.
        -- Note that MallocPtrKind is not included -- the only way of
        -- creating a MallocPtr is with a ccall or casm.

    | UnsafeFreezeArrayOp | UnsafeFreezeByteArrayOp

    | MakeStablePtrOp | DeRefStablePtrOp
\end{pseudocode}

A special ``trap-door'' to use in making calls direct to C functions:
Note: From GrAn point of view, CCall is probably very expensive -- HWL

\begin{pseudocode}
    | CCallOp   String          -- An "unboxed" ccall# to this named function
                Bool            -- True <=> really a "casm"
                Bool            -- True <=> might invoke Haskell GC
                [UniType]       -- Unboxed argument; the state-token
                                -- argument will have been put *first*
                UniType         -- Return type; one of the "StateAnd<blah>#" types

    -- (... to be continued ... )
\end{pseudocode}