CmmLex.x 8.22 KB
Newer Older
1
-----------------------------------------------------------------------------
Simon Marlow's avatar
Simon Marlow committed
2
3
--
-- (c) The University of Glasgow, 2004-2006
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
--
-- Lexer for concrete Cmm.  We try to stay close to the C-- spec, but there
-- are a few minor differences:
--
--   * extra keywords for our macros, and float32/float64 types
--   * global registers (Sp,Hp, etc.)
--
-----------------------------------------------------------------------------

{
module CmmLex (
   CmmToken(..), cmmlex,
  ) where

#include "HsVersions.h"

import Cmm
import Lexer

import SrcLoc
import UniqFM
import StringBuffer
import FastString
import Ctype
Simon Marlow's avatar
Simon Marlow committed
28
import Util
29
30
31
--import TRACE
}

32
$whitechar   = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
33
34
35
$white_no_nl = $whitechar # \n

$ascdigit  = 0-9
36
$unidigit  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
37
38
39
40
$digit     = [$ascdigit $unidigit]
$octit	   = 0-7
$hexit     = [$digit A-F a-f]

41
$unilarge  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
42
43
44
$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
$large     = [$asclarge $unilarge]

45
$unismall  = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
46
47
48
$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
$small     = [$ascsmall $unismall \_]

49
$namebegin = [$large $small \. \$ \@]
50
51
52
53
54
55
56
57
58
$namechar  = [$namebegin $digit]

@decimal     = $digit+
@octal       = $octit+
@hexadecimal = $hexit+
@exponent    = [eE] [\-\+]? @decimal

@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent

59
@escape      = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
60
61
62
63
64
@strchar     = ($printable # [\"\\]) | @escape

cmm :-

$white_no_nl+		;
65
^\# pragma .* \n        ; -- Apple GCC 3.3 CPP generates pragmas in its output
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

^\# (line)? 		{ begin line_prag }

-- single-line line pragmas, of the form
--    # <line> "<file>" <extra-stuff> \n
<line_prag> $digit+			{ setLine line_prag1 }
<line_prag1> \" ($printable # \")* \"	{ setFile line_prag2 }
<line_prag2> .*				{ pop }

<0> {
  \n			;

  [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!]	{ special_char }
  
  ".." 			{ kw CmmT_DotDot }
  "::" 			{ kw CmmT_DoubleColon }
  ">>"			{ kw CmmT_Shr }
  "<<"			{ kw CmmT_Shl }
  ">="			{ kw CmmT_Ge }
  "<="			{ kw CmmT_Le }
  "=="			{ kw CmmT_Eq }
  "!="			{ kw CmmT_Ne }
  "&&"			{ kw CmmT_BoolAnd }
  "||"			{ kw CmmT_BoolOr }
  
  R@decimal		{ global_regN VanillaReg }
  F@decimal		{ global_regN FloatReg }
  D@decimal		{ global_regN DoubleReg }
  L@decimal		{ global_regN LongReg }
  Sp			{ global_reg Sp }
  SpLim			{ global_reg SpLim }
  Hp			{ global_reg Hp }
  HpLim			{ global_reg HpLim }
  CurrentTSO		{ global_reg CurrentTSO }
  CurrentNursery	{ global_reg CurrentNursery }
  HpAlloc		{ global_reg HpAlloc }
102
  BaseReg		{ global_reg BaseReg }
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
  
  $namebegin $namechar*	{ name }
  
  0 @octal		{ tok_octal }
  @decimal		{ tok_decimal }
  0[xX] @hexadecimal	{ tok_hexadecimal }
  @floating_point	{ strtoken tok_float }
  
  \" @strchar* \"	{ strtoken tok_string }
}

{
data CmmToken
  = CmmT_SpecChar  Char
  | CmmT_DotDot
  | CmmT_DoubleColon
  | CmmT_Shr
  | CmmT_Shl
  | CmmT_Ge
  | CmmT_Le
  | CmmT_Eq
  | CmmT_Ne
  | CmmT_BoolAnd
  | CmmT_BoolOr
  | CmmT_CLOSURE
  | CmmT_INFO_TABLE
  | CmmT_INFO_TABLE_RET
  | CmmT_INFO_TABLE_FUN
  | CmmT_INFO_TABLE_CONSTR
  | CmmT_INFO_TABLE_SELECTOR
  | CmmT_else
  | CmmT_export
  | CmmT_section
  | CmmT_align
  | CmmT_goto
  | CmmT_if
  | CmmT_jump
  | CmmT_foreign
141
  | CmmT_prim
142
  | CmmT_return
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  | CmmT_import
  | CmmT_switch
  | CmmT_case
  | CmmT_default
  | CmmT_bits8
  | CmmT_bits16
  | CmmT_bits32
  | CmmT_bits64
  | CmmT_float32
  | CmmT_float64
  | CmmT_GlobalReg GlobalReg
  | CmmT_Name	   FastString
  | CmmT_String	   String
  | CmmT_Int	   Integer
  | CmmT_Float     Rational
  | CmmT_EOF
#ifdef DEBUG
  deriving (Show)
#endif

-- -----------------------------------------------------------------------------
-- Lexer actions

type Action = SrcSpan -> StringBuffer -> Int -> P (Located CmmToken)

begin :: Int -> Action
begin code _span _str _len = do pushLexState code; lexToken

pop :: Action
pop _span _buf _len = do popLexState; lexToken

special_char :: Action
special_char span buf len = return (L span (CmmT_SpecChar (currentChar buf)))

kw :: CmmToken -> Action
kw tok span buf len = return (L span tok)

global_regN :: (Int -> GlobalReg) -> Action
global_regN con span buf len 
  = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
  where buf' = stepOn buf
184
	n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

global_reg :: GlobalReg -> Action
global_reg r span buf len = return (L span (CmmT_GlobalReg r))

strtoken :: (String -> CmmToken) -> Action
strtoken f span buf len = 
  return (L span $! (f $! lexemeToString buf len))

name :: Action
name span buf len = 
  case lookupUFM reservedWordsFM fs of
	Just tok -> return (L span tok)
	Nothing  -> return (L span (CmmT_Name fs))
  where
	fs = lexemeToFastString buf len

reservedWordsFM = listToUFM $
	map (\(x, y) -> (mkFastString x, y)) [
	( "CLOSURE",		CmmT_CLOSURE ),
	( "INFO_TABLE",		CmmT_INFO_TABLE ),
	( "INFO_TABLE_RET",	CmmT_INFO_TABLE_RET ),
	( "INFO_TABLE_FUN",	CmmT_INFO_TABLE_FUN ),
	( "INFO_TABLE_CONSTR",	CmmT_INFO_TABLE_CONSTR ),
	( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
	( "else",		CmmT_else ),
	( "export",		CmmT_export ),
	( "section",		CmmT_section ),
	( "align",		CmmT_align ),
	( "goto",		CmmT_goto ),
	( "if",			CmmT_if ),
	( "jump",		CmmT_jump ),
	( "foreign",		CmmT_foreign ),
217
	( "prim",		CmmT_prim ),
218
	( "return",		CmmT_return ),
219
220
221
222
223
224
225
226
227
228
229
230
231
	( "import",		CmmT_import ),
	( "switch",		CmmT_switch ),
	( "case",		CmmT_case ),
	( "default",		CmmT_default ),
	( "bits8",		CmmT_bits8 ),
	( "bits16",		CmmT_bits16 ),
	( "bits32",		CmmT_bits32 ),
	( "bits64",		CmmT_bits64 ),
	( "float32",		CmmT_float32 ),
	( "float64",		CmmT_float64 )
	]

tok_decimal span buf len 
232
  = return (L span (CmmT_Int  $! parseUnsignedInteger buf len 10 octDecDigit))
233
234

tok_octal span buf len 
235
  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
236
237

tok_hexadecimal span buf len 
238
  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
239
240
241
242
243
244
245
246
247
248
249

tok_float str = CmmT_Float $! readRational str

tok_string str = CmmT_String (read str)
		 -- urk, not quite right, but it'll do for now

-- -----------------------------------------------------------------------------
-- Line pragmas

setLine :: Int -> Action
setLine code span buf len = do
250
  let line = parseUnsignedInteger buf len 10 octDecDigit
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
	-- subtract one: the line number refers to the *following* line
  -- trace ("setLine "  ++ show line) $ do
  popLexState
  pushLexState code
  lexToken

setFile :: Int -> Action
setFile code span buf len = do
  let file = lexemeToFastString (stepOn buf) (len-2)
  setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
  popLexState
  pushLexState code
  lexToken

-- -----------------------------------------------------------------------------
-- This is the top-level function: called from the parser each time a
-- new token is to be read from the input.

cmmlex :: (Located CmmToken -> P a) -> P a
cmmlex cont = do
  tok@(L _ tok__) <- lexToken
  --trace ("token: " ++ show tok__) $ do
  cont tok

lexToken :: P (Located CmmToken)
lexToken = do
  inp@(loc1,buf) <- getInput
  sc <- getLexState
  case alexScan inp sc of
    AlexEOF -> do let span = mkSrcSpan loc1 loc1
282
		  setLastToken span 0 0
283
284
285
286
287
288
289
290
		  return (L span CmmT_EOF)
    AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
    AlexSkip inp2 _ -> do
	setInput inp2
	lexToken
    AlexToken inp2@(end,buf2) len t -> do
	setInput inp2
	let span = mkSrcSpan loc1 end
291
	span `seq` setLastToken span len len
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
	t span buf len

-- -----------------------------------------------------------------------------
-- Monad stuff

-- Stuff that Alex needs to know about our input type:
type AlexInput = (SrcLoc,StringBuffer)

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,s) = prevChar s '\n'

alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar (loc,s) 
  | atEnd s   = Nothing
  | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
  where c = currentChar s
        loc' = advanceSrcLoc loc c
	s'   = stepOn s

getInput :: P AlexInput
getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)

setInput :: AlexInput -> P ()
setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
}