AbsCSyn.lhs 18.9 KB
Newer Older
1
%
2
3
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
4
% $Id: AbsCSyn.lhs,v 1.50 2002/09/13 15:02:25 simonpj Exp $
5
6
7
8
9
10
11
12
13
14
15
16
%
\section[AbstractC]{Abstract C: the last stop before machine code}

This ``Abstract C'' data type describes the raw Spineless Tagless
machine model at a C-ish level; it is ``abstract'' in that it only
includes C-like structures that we happen to need.  The conversion of
programs from @StgSyntax@ (basically a functional language) to
@AbstractC@ (basically imperative C) is the heart of code generation.
From @AbstractC@, one may convert to real C (for portability) or to
raw assembler/machine code.

\begin{code}
17
module AbsCSyn {- (
18
19
	-- export everything
	AbstractC(..),
20
	C_SRT(..)
21
22
23
24
25
	CStmtMacro(..),
	CExprMacro(..),
	CAddrMode(..),
	ReturnInfo(..),
	mkAbstractCs, mkAbsCStmts, mkAlgAltsCSwitch,
26
	mkIntCLit,
27
28
29
30
31
32
33
34
	mkAbsCStmtList,
	mkCCostCentre,

	-- RegRelatives
	RegRelative(..),

	-- registers
	MagicId(..), node, infoptr,
35
	isVolatileReg,
36
	CostRes(Cost)
37
    )-} where
38

39
40
#include "HsVersions.h"

sof's avatar
sof committed
41
import {-# SOURCE #-} ClosureInfo ( ClosureInfo )
42

43
import CLabel
44
import Constants   	( mAX_Vanilla_REG, mAX_Float_REG,
45
46
			  mAX_Double_REG, spRelToInt )
import CostCentre       ( CostCentre, CostCentreStack )
47
import Literal		( mkMachInt, Literal(..) )
48
import ForeignCall	( CCallSpec )
49
import PrimRep		( PrimRep(..) )
50
import MachOp		( MachOp(..) )
sof's avatar
sof committed
51
import Unique           ( Unique )
52
import StgSyn		( StgOp )
53
import TyCon		( TyCon )
54
import BitSet				-- for liveness masks
55
import FastTypes
56
import FastString
57
58
59
60
61
\end{code}

@AbstractC@ is a list of Abstract~C statements, but the data structure
is tree-ish, for easier and more efficient putting-together.
\begin{code}
62
63
absCNop = AbsCNop

64
65
66
67
68
69
70
71
72
73
74
data AbstractC
  = AbsCNop
  | AbsCStmts		AbstractC AbstractC

  -- and the individual stmts...
\end{code}

A note on @CAssign@: In general, the type associated with an assignment
is the type of the lhs.  However, when the lhs is a pointer to mixed
types (e.g. SpB relative), the type of the assignment is the type of
the rhs for float types, or the generic StgWord for all other types.
75
(In particular, a CharRep on the rhs is promoted to IntRep when
76
77
78
79
stored in a mixed type location.)

\begin{code}
  | CAssign
80
81
	!CAddrMode 	-- target
	!CAddrMode	-- source
82
83
84

  | CJump
	CAddrMode	-- Put this in the program counter
85
			-- eg `CJump (CReg (VanillaReg PtrRep 1))' puts Ret1 in PC
86
87
88
89
90
91
92
93
			-- Enter can be done by:
			--	  CJump (CVal NodeRel zeroOff)

  | CFallThrough
	CAddrMode	-- Fall through into this routine
    	    	    	-- (for the benefit of the native code generators)
    	    	    	-- Equivalent to CJump in C land

94
95
96
  | CReturn		-- Perform a return
    	CAddrMode   	-- Address of a RET_<blah> info table
    	ReturnInfo  	-- Whether it's a direct or vectored return
97

98
  | CSwitch !CAddrMode
99
	[(Literal, AbstractC)]	-- alternatives
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
	AbstractC		-- default; if there is no real Abstract C in here
				-- (e.g., all comments; see function "nonemptyAbsC"),
				-- then that means the default _cannot_ occur.
				-- If there is only one alternative & no default code,
				-- then there is no need to check the tag.
				-- Therefore, e.g.:
				--  CSwitch m [(tag,code)] AbsCNop == code

  | CCodeBlock CLabel AbstractC
			-- A labelled block of code; this "statement" is not
			-- executed; rather, the labelled code will be hoisted
			-- out to the top level (out of line) & it can be
			-- jumped to.

  | CInitHdr		-- to initialise the header of a closure (both fixed/var parts)
	ClosureInfo
116
	CAddrMode	-- address of the info ptr
117
	!CAddrMode	-- cost centre to place in closure
118
			--   CReg CurCostCentre or CC_HDR(R1.p{-Node-})
119
	Int		-- size of closure, for profiling
120

121
122
123
  -- NEW CASES FOR EXPANDED PRIMOPS

  | CMachOpStmt			-- Machine-level operation
124
	CAddrMode		-- result
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
	MachOp
	[CAddrMode]		-- Arguments
        (Maybe [MagicId])	-- list of regs which need to be preserved
	-- across the primop.  This is allowed to be Nothing only if
	-- machOpIsDefinitelyInline returns True.  And that in turn may
	-- only return True if we are absolutely sure that the mach op
	-- can be done inline on all platforms.  

  | CSequential		-- Do the nested AbstractCs sequentially.
	[AbstractC]	-- In particular, as far as the AbsCUtils.doSimultaneously
			-- is concerned, these stmts are to be treated as atomic
			-- and are not to be reordered.

  -- end of NEW CASES FOR EXPANDED PRIMOPS

140
141
  | COpStmt
	[CAddrMode]	-- Results
142
	StgOp
143
144
145
146
147
148
149
	[CAddrMode]	-- Arguments
	[MagicId]	-- Potentially volatile/live registers
			-- (to save/restore around the call/op)

	-- INVARIANT: When a PrimOp which can cause GC is used, the
	-- only live data is tidily on the STG stacks or in the STG
	-- registers (the code generator ensures this).
150
	--
151
152
153
154
	-- Why this?  Because if the arguments were arbitrary
	-- addressing modes, they might be things like (Hp+6) which
	-- will get utterly spongled by GC.

155
  | CSimultaneous	-- Perform simultaneously all the statements
156
157
158
159
160
161
162
	AbstractC	-- in the nested AbstractC.  They are only
			-- allowed to be CAssigns, COpStmts and AbsCNops, so the
			-- "simultaneous" part just concerns making
			-- sure that permutations work.
			-- For example { a := b, b := a }
			-- 	needs to go via (at least one) temporary

163
164
165
166
167
168
  | CCheck 		-- heap or stack checks, or both.  
	CCheckMacro 	-- These might include some code to fill in tags 
	[CAddrMode]	-- on the stack, so we can't use CMacroStmt below.
	AbstractC

  | CRetDirect			-- Direct return
169
        !Unique			-- for making labels
170
	AbstractC   		-- return code
171
	C_SRT			-- SRT info
172
173
	Liveness		-- stack liveness at the return point

174
175
  -- see the notes about these next few; they follow below...
  | CMacroStmt		CStmtMacro	[CAddrMode]
176
177
  | CCallProfCtrMacro	FastString	[CAddrMode]
  | CCallProfCCMacro	FastString	[CAddrMode]
178

sof's avatar
sof committed
179
180
181
182
183
184
185
186
187
    {- The presence of this constructor is a makeshift solution;
       it being used to work around a gcc-related problem of
       handling typedefs within statement blocks (or, rather,
       the inability to do so.)
       
       The AbstractC flattener takes care of lifting out these
       typedefs if needs be (i.e., when generating .hc code and
       compiling 'foreign import dynamic's)
    -}
sof's avatar
sof committed
188
  | CCallTypedef Bool {- True => use "typedef"; False => use "extern"-}
189
  		 CCallSpec Unique [CAddrMode] [CAddrMode]
sof's avatar
sof committed
190

191
192
193
  -- *** the next three [or so...] are DATA (those above are CODE) ***

  | CStaticClosure
194
	ClosureInfo		-- Todo: maybe info_lbl & closure_lbl instead?
195
196
	CAddrMode		-- cost centre identifier to place in closure
	[CAddrMode]		-- free vars; ptrs, then non-ptrs.
197

198
199
200
  | CSRT CLabel [CLabel]  	-- SRT declarations: basically an array of 
				-- pointers to static closures.
  
ken's avatar
ken committed
201
202
  | CBitmap CLabel LivenessMask	-- A bitmap to be emitted if and only if
				-- it is larger than a target machine word.
203
204

  | CClosureInfoAndCode
205
206
	ClosureInfo		-- Explains placement and layout of closure
	AbstractC		-- Slow entry point code
207
	(Maybe AbstractC)
208
209
210
211
212
213
214
215
				-- Fast entry point code, if any
	String			-- Closure description; NB we can't get this
				-- from ClosureInfo, because the latter refers 
				-- to the *right* hand side of a defn, whereas
				-- the  "description" refers to *left* hand side

  | CRetVector			-- A labelled block of static data
	CLabel
216
	[CAddrMode]
217
	C_SRT			-- SRT info
218
	Liveness		-- stack liveness at the return point
219

220
221
222
  | CClosureTbl 		-- table of constructors for enumerated types
	TyCon			-- which TyCon this table is for

223
  | CModuleInitBlock		-- module initialisation block
224
225
	CLabel			-- "plain" label for init block
	CLabel			-- label for init block (with ver + way info)
226
227
	AbstractC		-- initialisation code

228
229
230
  | CCostCentreDecl		-- A cost centre *declaration*
	Bool			-- True  <=> local => full declaration
				-- False <=> extern; just say so
231
232
	CostCentre

233
234
235
236
  | CCostCentreStackDecl	-- A cost centre stack *declaration*
	CostCentreStack		-- this is the declaration for a
				-- pre-defined singleton CCS (see 
				-- CostCentre.lhs)
237

238
  | CSplitMarker		-- Split into separate object modules here
239
240
241
242
243
244
245
246
247
248

-- C_SRT is what StgSyn.SRT gets translated to... 
-- we add a label for the table, and expect only the 'offset/length' form

data C_SRT = NoC_SRT
	   | C_SRT CLabel !Int{-offset-} !Int{-length-}

needsSRT :: C_SRT -> Bool
needsSRT NoC_SRT       = False
needsSRT (C_SRT _ _ _) = True
249
250
251
252
253
254
255
256
257
258
259
260
261
262
\end{code}

About @CMacroStmt@, etc.: notionally, they all just call some
arbitrary C~macro or routine, passing the @CAddrModes@ as arguments.
However, we distinguish between various flavours of these things,
mostly just to keep things somewhat less wild and wooly.

\begin{description}
\item[@CMacroStmt@:]
Some {\em essential} bits of the STG execution model are done with C
macros.  An example is @STK_CHK@, which checks for stack-space
overflow.  This enumeration type lists all such macros:
\begin{code}
data CStmtMacro
263
264
265
266
267
268
269
  = ARGS_CHK				-- arg satisfaction check
  | ARGS_CHK_LOAD_NODE			-- arg check for top-level functions
  | UPD_CAF				-- update CAF closure with indirection
  | UPD_BH_UPDATABLE			-- eager backholing
  | UPD_BH_SINGLE_ENTRY			-- more eager blackholing
  | PUSH_UPD_FRAME			-- push update frame
  | PUSH_SEQ_FRAME			-- push seq frame
270
  | UPDATE_SU_FROM_UPD_FRAME		-- pull Su out of the update frame
271
  | SET_TAG				-- set TagReg if it exists
sof's avatar
sof committed
272
273
274
      -- dataToTag# primop -- *only* used in unregisterised builds.
      -- (see AbsCUtils.dsCOpStmt)
  | DATA_TO_TAGZH
275
276
277

  | REGISTER_FOREIGN_EXPORT		-- register a foreign exported fun
  | REGISTER_IMPORT			-- register an imported module
rrt's avatar
rrt committed
278
279
  | REGISTER_DIMPORT                    -- register an imported module from
                                        -- another DLL
280

281
282
283
284
  | GRAN_FETCH	    		-- for GrAnSim only  -- HWL
  | GRAN_RESCHEDULE   		-- for GrAnSim only  -- HWL
  | GRAN_FETCH_AND_RESCHEDULE	-- for GrAnSim only  -- HWL
  | THREAD_CONTEXT_SWITCH   	-- for GrAnSim only  -- HWL
285
  | GRAN_YIELD   		-- for GrAnSim only  -- HWL 
286
287
\end{code}

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
Heap/Stack checks.  There are far too many of these.

\begin{code}
data CCheckMacro

  = HP_CHK_NP				-- heap/stack checks when
  | STK_CHK_NP				-- node points to the closure
  | HP_STK_CHK_NP
  | HP_CHK_SEQ_NP			-- for 'seq' style case alternatives

  | HP_CHK				-- heap/stack checks when
  | STK_CHK				-- node doesn't point
  | HP_STK_CHK
					-- case alternative heap checks:

  | HP_CHK_NOREGS			--   no registers live
  | HP_CHK_UNPT_R1			--   R1 is boxed/unlifted
  | HP_CHK_UNBX_R1			--   R1 is unboxed
  | HP_CHK_F1				--   FloatReg1 (only) is live 
  | HP_CHK_D1				--   DblReg1   (only) is live
  | HP_CHK_L1				--   LngReg1   (only) is live
  | HP_CHK_UT_ALT			--   unboxed tuple return.

  | HP_CHK_GEN				-- generic heap check
\end{code}

314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
\item[@CCallProfCtrMacro@:]
The @String@ names a macro that, if \tr{#define}d, will bump one/some
of the STG-event profiling counters.

\item[@CCallProfCCMacro@:]
The @String@ names a macro that, if \tr{#define}d, will perform some
cost-centre-profiling-related action.
\end{description}

%************************************************************************
%*									*
\subsection[CAddrMode]{C addressing modes}
%*									*
%************************************************************************

\begin{code}
data CAddrMode
331
  = CVal  RegRelative PrimRep
332
333
334
335
336
337
338
339
340
341
342
343
			-- On RHS of assign: Contents of Magic[n]
			-- On LHS of assign: location Magic[n]
			-- (ie at addr Magic+n)

  | CAddr RegRelative
			-- On RHS of assign: Address of Magic[n]; ie Magic+n
			-- 	n=0 gets the Magic location itself
			--      (NB: n=0 case superceded by CReg)
			-- On LHS of assign: only sensible if n=0,
			--	which gives the magic location itself
			--      (NB: superceded by CReg)

344
345
346
347
             -- JRS 2002-02-05: CAddr is really scummy and should be fixed.
             -- The effect is that the semantics of CAddr depend on what the
             -- contained RegRelative is; it is decidely non-orthogonal.

348
349
  | CReg MagicId	-- To replace (CAddr MagicId 0)

350
  | CTemp !Unique !PrimRep	-- Temporary locations
351
352
353
354
	-- ``Temporaries'' correspond to local variables in C, and registers in
	-- native code.

  | CLbl    CLabel	-- Labels in the runtime system, etc.
355
	    PrimRep	-- the kind is so we can generate accurate C decls
356

357
  | CCharLike CAddrMode	-- The address of a static char-like closure for
358
			-- the specified character.  It is guaranteed to be in
359
			-- the range mIN_CHARLIKE..mAX_CHARLIKE
360
361

  | CIntLike CAddrMode	-- The address of a static int-like closure for the
362
363
			-- specified small integer.  It is guaranteed to be in
			-- the range mIN_INTLIKE..mAX_INTLIKE
364

365
  | CLit    Literal
366

367
368
369
370
371
372
373
374
  | CJoinPoint		-- This is used as the amode of a let-no-escape-bound
			-- variable.
	VirtualSpOffset	  -- Sp value after any volatile free vars
			  -- of the rhs have been saved on stack.
			  -- Just before the code for the thing is jumped to,
			  -- Sp will be set to this value,
			  -- and then any stack-passed args pushed,
			  -- then the code for this thing will be entered
375
  | CMacroExpr
376
    	!PrimRep    	-- the kind of the result
377
    	CExprMacro    	-- the macro to generate a value
378
	[CAddrMode]   	-- and its arguments
379

380
  | CBytesPerWord	-- Word size, in bytes, on this platform
381
382
			-- required for: half-word loads (used in fishing tags
			-- out of info tables), and sizeofByteArray#.
383
384
385
386
387
388
389
\end{code}

Various C macros for values which are dependent on the back-end layout.

\begin{code}

data CExprMacro
390
391
392
  = ENTRY_CODE
  | ARG_TAG				-- stack argument tagging
  | GET_TAG				-- get current constructor tag
393
  | UPD_FRAME_UPDATEE
394
  | CCS_HDR
395
396
397
  | BYTE_ARR_CTS		-- used when passing a ByteArray# to a ccall
  | PTRS_ARR_CTS		-- similarly for an Array#
  | ForeignObj_CLOSURE_DATA	-- and again for a ForeignObj#
398
399
\end{code}

400
401
Convenience functions:

402
403
404
\begin{code}
mkIntCLit :: Int -> CAddrMode
mkIntCLit i = CLit (mkMachInt (toInteger i))
405

406
mkCString :: FastString -> CAddrMode
407
408
mkCString s = CLit (MachStr s)

409
410
411
412
413
mkCCostCentre :: CostCentre -> CAddrMode
mkCCostCentre cc = CLbl (mkCC_Label cc) DataPtrRep

mkCCostCentreStack :: CostCentreStack -> CAddrMode
mkCCostCentreStack ccs = CLbl (mkCCS_Label ccs) DataPtrRep
414
415
416
417
418
419
420
421
422
423
\end{code}

%************************************************************************
%*									*
\subsection[RegRelative]{@RegRelatives@: ???}
%*									*
%************************************************************************

\begin{code}
data RegRelative
424
425
426
  = HpRel 	FastInt	-- }
  | SpRel 	FastInt	-- }- offsets in StgWords
  | NodeRel	FastInt	-- }
427
428
  | CIndex	CAddrMode CAddrMode PrimRep	-- pointer arithmetic :-)
						-- CIndex a b k === (k*)a[b]
429
430
431
432
433

data ReturnInfo
  = DirectReturn    	    	    	-- Jump directly, if possible
  | StaticVectoredReturn Int		-- Fixed tag, starting at zero
  | DynamicVectoredReturn CAddrMode	-- Dynamic tag given by amode, starting at zero
434
435
436
437

hpRel :: VirtualHeapOffset 	-- virtual offset of Hp
      -> VirtualHeapOffset 	-- virtual offset of The Thing
      -> RegRelative		-- integer offset
438
hpRel hp off = HpRel (iUnbox (hp - off))
439
440
441
442

spRel :: VirtualSpOffset 	-- virtual offset of Sp
      -> VirtualSpOffset 	-- virtual offset of The Thing
      -> RegRelative		-- integer offset
443
spRel sp off = SpRel (iUnbox (spRelToInt sp off))
444
445
446

nodeRel :: VirtualHeapOffset
        -> RegRelative
447
nodeRel off = NodeRel (iUnbox off)
448

449
450
451
452
\end{code}

%************************************************************************
%*									*
453
\subsection[Liveness]{Liveness Masks}
454
455
456
%*									*
%************************************************************************

457
458
459
We represent liveness bitmaps as a BitSet (whose internal
representation really is a bitmap).  These are pinned onto case return
vectors to indicate the state of the stack for the garbage collector.
460

ken's avatar
ken committed
461
462
463
464
465
466
467
468
In the compiled program, liveness bitmaps that fit inside a single
word (StgWord) are stored as a single word, while larger bitmaps are
stored as a pointer to an array of words.  When we compile via C
(especially when we bootstrap via HC files), we generate identical C
code regardless of whether words are 32- or 64-bit on the target
machine, by postponing the decision of how to store each liveness
bitmap to C compilation time (or rather, C preprocessing time).

469
470
\begin{code}
type LivenessMask = [BitSet]
471

ken's avatar
ken committed
472
data Liveness = Liveness CLabel LivenessMask
473
\end{code}
474

475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
%************************************************************************
%*									*
\subsection[HeapOffset]{@Heap Offsets@}
%*									*
%************************************************************************

This used to be a grotesquely complicated datatype in an attempt to
hide the details of header sizes from the compiler itself.  Now these
constants are imported from the RTS, and we deal in real Ints.

\begin{code}
type HeapOffset = Int			-- ToDo: remove

type VirtualHeapOffset	= HeapOffset
type VirtualSpOffset	= Int

type HpRelOffset	= HeapOffset
type SpRelOffset	= Int
\end{code}

%************************************************************************
%*									*
\subsection[MagicId]{@MagicIds@: registers and such}
%*									*
%************************************************************************
500
501
502
503
504
505
506

\begin{code}
data MagicId
  = BaseReg 	-- mentioned only in nativeGen

  -- Argument and return registers
  | VanillaReg		-- pointers, unboxed ints and chars
507
	PrimRep
508
	FastInt	-- its number (1 .. mAX_Vanilla_REG)
509

510
  | FloatReg		-- single-precision floating-point registers
511
	FastInt	-- its number (1 .. mAX_Float_REG)
512

513
  | DoubleReg		-- double-precision floating-point registers
514
	FastInt	-- its number (1 .. mAX_Double_REG)
515

516
517
518
519
520
521
522
523
524
  -- STG registers
  | Sp			-- Stack ptr; points to last occupied stack location.
  | Su     		-- Stack update frame pointer
  | SpLim		-- Stack limit
  | Hp			-- Heap ptr; points to last occupied heap location.
  | HpLim		-- Heap limit register
  | CurCostCentre 	-- current cost centre register.
  | VoidReg 		-- see "VoidPrim" type; just a placeholder; 
			--   no actual register
sof's avatar
sof committed
525
526
  | LongReg	        -- long int registers (64-bit, really)
	PrimRep	        -- Int64Rep or Word64Rep
527
	FastInt	-- its number (1 .. mAX_Long_REG)
sof's avatar
sof committed
528

529
530
  | CurrentTSO		-- pointer to current thread's TSO
  | CurrentNursery	-- pointer to allocation area
531
  | HpAlloc		-- allocation count for heap check failure
532

533

534
535
node 	= VanillaReg PtrRep     (_ILIT 1) -- A convenient alias for Node
tagreg  = VanillaReg WordRep    (_ILIT 2) -- A convenient alias for TagReg
536

537
nodeReg = CReg node
538
539
540
541
542
543
\end{code}

We need magical @Eq@ because @VanillaReg@s come in multiple flavors.

\begin{code}
instance Eq MagicId where
544
    reg1 == reg2 = tag reg1 ==# tag reg2
545
     where
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
	tag BaseReg	     = (_ILIT(0) :: FastInt)
	tag Sp		     = _ILIT(1)
	tag Su		     = _ILIT(2)
	tag SpLim	     = _ILIT(3)
	tag Hp		     = _ILIT(4)
	tag HpLim	     = _ILIT(5)
	tag CurCostCentre    = _ILIT(6)
	tag VoidReg	     = _ILIT(7)

	tag (VanillaReg _ i) = _ILIT(8) +# i

	tag (FloatReg i)  = _ILIT(8) +# maxv +# i
	tag (DoubleReg i) = _ILIT(8) +# maxv +# maxf +# i
	tag (LongReg _ i) = _ILIT(8) +# maxv +# maxf +# maxd +# i

        maxv = iUnbox mAX_Vanilla_REG
        maxf = iUnbox mAX_Float_REG
        maxd = iUnbox mAX_Double_REG
564
565
566
567
568
\end{code}

Returns True for any register that {\em potentially} dies across
C calls (or anything near equivalent).  We just say @True@ and
let the (machine-specific) registering macros sort things out...
569

570
571
\begin{code}
isVolatileReg :: MagicId -> Bool
572
isVolatileReg any = True
573
\end{code}