OccName.lhs 20.1 KB
Newer Older
1
%
2 3 4 5 6 7 8
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%

\section[OccName]{@OccName@}

\begin{code}
module OccName (
9
	-- The NameSpace type; abstact
10 11
	NameSpace, tcName, clsName, tcClsName, dataName, varName, ipName,
	tvName, uvName, nameSpaceString, 
12 13 14

	-- The OccName type
	OccName, 	-- Abstract, instance of Outputable
15
	pprOccName, 
16

17
	mkOccFS, mkSysOcc, mkSysOccFS, mkCCallOcc, mkVarOcc, mkKindOccFS,
18
	mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
19
	mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
20
 	mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
21
	mkGenOcc1, mkGenOcc2, 
22
	
23
	isSysOcc, isTvOcc, isUvOcc, isDataOcc, isDataSymOcc, isSymOcc, isIPOcc, isValOcc,
24 25 26

	occNameFS, occNameString, occNameUserString, occNameSpace, occNameFlavour, 
	setOccNameSpace,
27 28 29 30

	-- Tidying up
	TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,

31
	-- Encoding
sof's avatar
sof committed
32
	EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
33 34 35 36 37

	-- The basic form of names
	isLexCon, isLexVar, isLexId, isLexSym,
	isLexConId, isLexConSym, isLexVarId, isLexVarSym,
	isLowerISO, isUpperISO
38 39 40 41 42

    ) where

#include "HsVersions.h"

43
import Char	( isDigit, isUpper, isLower, ISALPHANUM, ord, chr, digitToInt )
44 45 46 47 48 49
import Util	( thenCmp )
import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
import Outputable
import GlaExts
\end{code}

50 51 52 53 54 55 56 57 58 59 60 61
We hold both module names and identifier names in a 'Z-encoded' form
that makes them acceptable both as a C identifier and as a Haskell
(prefix) identifier. 

They can always be decoded again when printing error messages
or anything else for the user, but it does make sense for it
to be represented here in encoded form, so that when generating
code the encoding operation is not performed on each occurrence.

These type synonyms help documentation.

\begin{code}
62
type UserFS    = FAST_STRING	-- As the user typed it
63 64 65 66 67 68 69
type EncodedFS = FAST_STRING	-- Encoded form

type UserString = String	-- As the user typed it
type EncodedString = String	-- Encoded form


pprEncodedFS :: EncodedFS -> SDoc
70 71
pprEncodedFS fs
  = getPprStyle 	$ \ sty ->
72 73 74 75
    if userStyle sty
	-- ptext (decodeFS fs) would needlessly pack the string again
	then text (decode (_UNPK_ fs))
        else ptext fs
76 77 78 79 80 81 82 83 84 85
\end{code}

%************************************************************************
%*									*
\subsection{Name space}
%*									*
%************************************************************************

\begin{code}
data NameSpace = VarName	-- Variables
86
	       | IPName		-- Implicit Parameters
87 88
	       | DataName	-- Data constructors
	       | TvName		-- Type variables
89
	       | UvName		-- Usage variables
90
	       | TcClsName	-- Type constructors and classes; Haskell has them
91
				-- in the same name space for now.
92 93 94 95 96 97 98 99 100 101
	       deriving( Eq, Ord )

-- Though type constructors and classes are in the same name space now,
-- the NameSpace type is abstract, so we can easily separate them later
tcName    = TcClsName		-- Type constructors
clsName   = TcClsName		-- Classes
tcClsName = TcClsName		-- Not sure which!

dataName = DataName
tvName   = TvName
102
uvName   = UvName
103
varName  = VarName
104
ipName   = IPName
105 106 107 108 109


nameSpaceString :: NameSpace -> String
nameSpaceString DataName  = "Data constructor"
nameSpaceString VarName   = "Variable"
110
nameSpaceString IPName    = "Implicit Param"
111
nameSpaceString TvName    = "Type variable"
112
nameSpaceString UvName    = "Usage variable"
113
nameSpaceString TcClsName = "Type constructor or class"
114 115 116 117 118 119 120 121 122 123
\end{code}


%************************************************************************
%*									*
\subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
%*									*
%************************************************************************

\begin{code}
124 125 126 127 128 129 130 131 132 133 134 135 136
data OccName = OccName 
			NameSpace
			EncodedFS
\end{code}


\begin{code}
instance Eq OccName where
    (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2

instance Ord OccName where
    compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
						(sp1 `compare` sp2)
137 138 139 140 141 142 143 144 145 146 147
\end{code}


%************************************************************************
%*									*
\subsection{Printing}
%*									*
%************************************************************************
 
\begin{code}
instance Outputable OccName where
148
    ppr = pprOccName
149 150

pprOccName :: OccName -> SDoc
151
pprOccName (OccName sp occ) = pprEncodedFS occ
152 153 154 155 156 157 158 159 160
\end{code}


%************************************************************************
%*									*
\subsection{Construction}
%*									*
%************************************************************************

161 162
*Sys* things do no encoding; the caller should ensure that the thing is
already encoded
163

164 165
\begin{code}
mkSysOcc :: NameSpace -> EncodedString -> OccName
166
mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
167 168 169 170 171 172
		      OccName occ_sp (_PK_ str)

mkSysOccFS :: NameSpace -> EncodedFS -> OccName
mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
		       OccName occ_sp fs

173 174 175 176 177 178 179
mkCCallOcc :: EncodedString -> OccName
-- This version of mkSysOcc doesn't check that the string is already encoded,
-- because it will be something like "{__ccall f dyn Int# -> Int#}" 
-- This encodes a lot into something that then parses like an Id.
-- But then alreadyEncoded complains about the braces!
mkCCallOcc str = OccName varName (_PK_ str)

180 181 182 183 184 185
-- Kind constructors get a speical function.  Uniquely, they are not encoded,
-- so that they have names like '*'.  This means that *even in interface files*
-- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
-- has an ASSERT that doesn't hold.
mkKindOccFS :: NameSpace -> EncodedFS -> OccName
mkKindOccFS occ_sp fs = OccName occ_sp fs
186 187
\end{code}

188
*Source-code* things are encoded.
189 190

\begin{code}
191 192
mkOccFS :: NameSpace -> UserFS -> OccName
mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
193

194 195
mkVarOcc :: UserFS -> OccName
mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
196 197 198
\end{code}


199

200 201
%************************************************************************
%*									*
202
\subsection{Predicates and taking them apart}
203 204 205
%*									*
%************************************************************************

206 207 208
\begin{code} 
occNameFS :: OccName -> EncodedFS
occNameFS (OccName _ s) = s
209

210 211
occNameString :: OccName -> EncodedString
occNameString (OccName _ s) = _UNPK_ s
212

213 214
occNameUserString :: OccName -> UserString
occNameUserString occ = decode (occNameString occ)
215

216 217
occNameSpace :: OccName -> NameSpace
occNameSpace (OccName sp _) = sp
218

219 220
setOccNameSpace :: OccName -> NameSpace -> OccName
setOccNameSpace (OccName _ occ) sp = OccName sp occ
221

222 223 224 225
-- occNameFlavour is used only to generate good error messages
occNameFlavour :: OccName -> String
occNameFlavour (OccName sp _) = nameSpaceString sp
\end{code}
226

227
\begin{code}
228
isTvOcc, isDataSymOcc, isSymOcc, isUvOcc :: OccName -> Bool
229

230 231
isTvOcc (OccName TvName _) = True
isTvOcc other              = False
232

233 234 235
isUvOcc (OccName UvName _) = True
isUvOcc other              = False

236 237 238 239
isValOcc (OccName VarName  _) = True
isValOcc (OccName DataName _) = True
isValOcc other		      = False

240 241 242 243
-- Data constructor operator (starts with ':', or '[]')
-- Pretty inefficient!
isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
isDataSymOcc other		  = False
244

245
isDataOcc (OccName DataName _) = True
246
isDataOcc other		       = False
247

248 249 250 251
-- Any operator (data constructor or variable)
-- Pretty inefficient!
isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
isSymOcc (OccName VarName s)  = isLexSym (decodeFS s)
252 253 254

isIPOcc (OccName IPName _) = True
isIPOcc _		   = False
255
\end{code}
256 257


258 259 260 261 262
%************************************************************************
%*									*
\subsection{Making system names}
%*									*
%************************************************************************
263

264
Here's our convention for splitting up the interface file name space:
265

266 267
	d...		dictionary identifiers
			(local variables, so no name-clash worries)
268

269
	$f...		dict-fun identifiers (from inst decls)
270
	$dm...		default methods
271 272 273 274 275
	$p...		superclass selectors
	$w...		workers
	$T...		compiler-generated tycons for dictionaries
	$D...		...ditto data cons
	$sf..		specialised version of f
276

277
	in encoded form these appear as Zdfxxx etc
278

279
	:...		keywords (export:, letrec: etc.)
280

281 282 283 284 285
This knowledge is encoded in the following functions.


@mk_deriv@ generates an @OccName@ from the one-char prefix and a string.
NB: The string must already be encoded!
286 287

\begin{code}
288 289 290 291 292
mk_deriv :: NameSpace 
	 -> String		-- Distinguishes one sort of derived name from another
	 -> EncodedString	-- Must be already encoded!!  We don't want to encode it a 
				-- second time because encoding isn't itempotent
	 -> OccName
293

294 295
mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
\end{code}
296

297
\begin{code}
298
mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
299
 	   mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
300 301
   :: OccName -> OccName

302 303 304
-- These derived variables have a prefix that no Haskell value could have
mkWorkerOcc        = mk_simple_deriv varName  "$w"
mkDefaultMethodOcc = mk_simple_deriv varName  "$dm"
305 306 307
mkDerivedTyConOcc  = mk_simple_deriv tcName   ":"	-- The : prefix makes sure it classifies
mkClassTyConOcc    = mk_simple_deriv tcName   ":T"	-- as a tycon/datacon
mkClassDataConOcc  = mk_simple_deriv dataName ":D"	--
308
mkDictOcc	   = mk_simple_deriv varName  "$d"
309
mkIPOcc		   = mk_simple_deriv varName  "$i"
310 311
mkSpecOcc	   = mk_simple_deriv varName  "$s"
mkForeignExportOcc = mk_simple_deriv varName  "$f"
312 313
mkGenOcc1           = mk_simple_deriv varName  "$gfrom"      -- Generics
mkGenOcc2           = mk_simple_deriv varName  "$gto"        -- Generics
314
mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
315 316 317 318 319 320 321


isSysOcc ::  OccName -> Bool	-- True for all these '$' things
isSysOcc occ = case occNameUserString occ of
		   ('$' : _ ) -> True
		   other      -> False	-- We don't care about the ':' ones
					-- isSysOcc is only called for Ids anyway
322 323 324 325 326 327 328
\end{code}

\begin{code}
mkSuperDictSelOcc :: Int 	-- Index of superclass, eg 3
		  -> OccName 	-- Class, eg "Ord"
		  -> OccName	-- eg "p3Ord"
mkSuperDictSelOcc index cls_occ
329
  = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
330 331 332 333
\end{code}


\begin{code}
334 335 336 337 338 339 340 341 342
mkDFunOcc :: EncodedString	-- Typically the class and type glommed together e.g. "OrdMaybe"
	  -> Int		-- Unique to distinguish dfuns which share the previous two
				--	eg 3
	  -- The requirement is that the (string,index) pair be unique in this module

	  -> OccName	-- "$fOrdMaybe3"

mkDFunOcc string index
  = mk_deriv VarName "$f" (show_index ++ string)
343 344 345 346 347
  where
    show_index | index == 0 = ""
   	       | otherwise  = show index
\end{code}

348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
We used to add a '$m' to indicate a method, but that gives rise to bad
error messages from the type checker when we print the function name or pattern
of an instance-decl binding.  Why? Because the binding is zapped
to use the method name in place of the selector name.
(See TcClassDcl.tcMethodBind)

The way it is now, -ddump-xx output may look confusing, but
you can always say -dppr-debug to get the uniques.

However, we *do* have to zap the first character to be lower case,
because overloaded constructors (blarg) generate methods too.
And convert to VarName space

e.g. a call to constructor MkFoo where
	data (Ord a) => Foo a = MkFoo a

If this is necessary, we do it by prefixing '$m'.  These 
guys never show up in error messages.  What a hack.

\begin{code}
mkMethodOcc :: OccName -> OccName
mkMethodOcc occ@(OccName VarName fs) = occ
370
mkMethodOcc occ			     = mk_simple_deriv varName "$m" occ
371 372
\end{code}

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395

%************************************************************************
%*									*
\subsection{Tidying them up}
%*									*
%************************************************************************

Before we print chunks of code we like to rename it so that
we don't have to print lots of silly uniques in it.  But we mustn't
accidentally introduce name clashes!  So the idea is that we leave the
OccName alone unless it accidentally clashes with one that is already
in scope; if so, we tack on '1' at the end and try again, then '2', and
so on till we find a unique one.

There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1' 
because that isn't a single lexeme.  So we encode it to 'lle' and *then*
tack on the '1', if necessary.

\begin{code}
type TidyOccEnv = FiniteMap FAST_STRING Int	-- The in-scope OccNames
emptyTidyOccEnv = emptyFM

initTidyOccEnv :: [OccName] -> TidyOccEnv	-- Initialise with names to avoid!
396
initTidyOccEnv = foldl (\env (OccName _ fs) -> addToFM env fs 1) emptyTidyOccEnv
397 398 399

tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)

400 401 402
tidyOccName in_scope occ@(OccName occ_sp fs)
  | not (fs `elemFM` in_scope)
  = (addToFM in_scope fs 1, occ)	-- First occurrence
403 404

  | otherwise				-- Already occurs
405
  = go in_scope (_UNPK_ fs)
406 407 408 409 410 411 412
  where

    go in_scope str = case lookupFM in_scope pk_str of
			Just n  -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
				-- Need to go round again, just in case "t3" (say) 
				-- clashes with a "t3" that's already in scope

413
			Nothing -> (addToFM in_scope pk_str 1, mkSysOccFS occ_sp pk_str)
414 415 416 417 418 419 420 421
				-- str is now unique
		    where
		      pk_str = _PK_ str
\end{code}


%************************************************************************
%*									*
422
\subsection{The 'Z' encoding}
423 424 425
%*									*
%************************************************************************

426 427 428 429 430 431 432 433
This is the main name-encoding and decoding function.  It encodes any
string into a string that is acceptable as a C name.  This is the name
by which things are known right through the compiler.

The basic encoding scheme is this.  

* Tuples (,,,) are coded as Z3T

434
* Alphabetic characters (upper and lower) and digits
435 436 437 438 439
	all translate to themselves; 
	except 'Z', which translates to 'ZZ'
	and    'z', which translates to 'zz'
  We need both so that we can preserve the variable/tycon distinction

440
* Most other printable characters translate to 'zx' or 'Zx' for some
441 442
	alphabetic character x

443 444
* The others translate as 'znnnU' where 'nnn' is the decimal number
        of the character
445 446 447 448

	Before		After
	--------------------------
	Trak		Trak
449 450 451 452 453 454
	foo_wib		foozuwib
	>		zg
	>1		zg1
	foo#		foozh
	foo##		foozhzh
	foo##1		foozhzh1
455
	fooZ		fooZZ	
456
	:+		Zczp
457 458 459
	()		Z0T
	(,,,,)		Z4T

460 461

\begin{code}
462 463 464 465 466 467
-- alreadyEncoded is used in ASSERTs to check for encoded
-- strings.  It isn't fail-safe, of course, because, say 'zh' might
-- be encoded or not.
alreadyEncoded :: String -> Bool
alreadyEncoded s = all ok s
		 where
468 469 470 471 472
		   ok ' ' = True
			-- This is a bit of a lie; if we really wanted spaces
			-- in names we'd have to encode them.  But we do put
			-- spaces in ccall "occurrences", and we don't want to
			-- reject them here
473 474 475 476 477 478 479 480 481 482 483 484 485 486
		   ok ch  = ISALPHANUM ch

alreadyEncodedFS :: FAST_STRING -> Bool
alreadyEncodedFS fs = alreadyEncoded (_UNPK_ fs)

encode :: UserString -> EncodedString
encode cs = case maybe_tuple cs of
		Just n  -> 'Z' : show n ++ "T"		-- Tuples go to Z2T etc
		Nothing -> go cs
	  where
		go []     = []
		go (c:cs) = encode_ch c ++ go cs

-- ToDo: Unboxed tuples too, perhaps?
sof's avatar
sof committed
487
maybe_tuple ('(' : cs) = check_tuple (0::Int) cs
488 489
maybe_tuple other      = Nothing

sof's avatar
sof committed
490
check_tuple :: Int -> String -> Maybe Int
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
check_tuple n (',' : cs) = check_tuple (n+1) cs
check_tuple n ")"	 = Just n
check_tuple n other      = Nothing

encodeFS :: UserFS -> EncodedFS
encodeFS fast_str  | all unencodedChar str = fast_str
		   | otherwise	           = _PK_ (encode str)
		   where
		     str = _UNPK_ fast_str

unencodedChar :: Char -> Bool	-- True for chars that don't need encoding
unencodedChar 'Z' = False
unencodedChar 'z' = False
unencodedChar c   = ISALPHANUM c

encode_ch :: Char -> EncodedString
encode_ch c | unencodedChar c = [c]	-- Common case first

-- Constructors
encode_ch '('  = "ZL"	-- Needed for things like (,), and (->)
encode_ch ')'  = "ZR"	-- For symmetry with (
encode_ch '['  = "ZM"
encode_ch ']'  = "ZN"
encode_ch ':'  = "ZC"
encode_ch 'Z'  = "ZZ"

-- Variables
encode_ch 'z'  = "zz"
encode_ch '&'  = "za"
encode_ch '|'  = "zb"
521
encode_ch '^'  = "zc"
522 523 524 525 526 527 528 529 530 531 532 533 534
encode_ch '$'  = "zd"
encode_ch '='  = "ze"
encode_ch '>'  = "zg"
encode_ch '#'  = "zh"
encode_ch '.'  = "zi"
encode_ch '<'  = "zl"
encode_ch '-'  = "zm"
encode_ch '!'  = "zn"
encode_ch '+'  = "zp"
encode_ch '\'' = "zq"
encode_ch '\\' = "zr"
encode_ch '/'  = "zs"
encode_ch '*'  = "zt"
535
encode_ch '_'  = "zu"
536
encode_ch '%'  = "zv"
537
encode_ch c    = 'z' : shows (ord c) "U"
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
\end{code}

Decode is used for user printing.

\begin{code}
decodeFS :: FAST_STRING -> FAST_STRING
decodeFS fs = _PK_ (decode (_UNPK_ fs))

decode :: EncodedString -> UserString
decode [] = []
decode ('Z' : rest) = decode_escape rest
decode ('z' : rest) = decode_escape rest
decode (c   : rest) = c : decode rest

decode_escape :: EncodedString -> UserString

decode_escape ('L' : rest) = '(' : decode rest
decode_escape ('R' : rest) = ')' : decode rest
decode_escape ('M' : rest) = '[' : decode rest
decode_escape ('N' : rest) = ']' : decode rest
558 559
decode_escape ('C' : rest) = ':' : decode rest
decode_escape ('Z' : rest) = 'Z' : decode rest
560 561 562 563

decode_escape ('z' : rest) = 'z' : decode rest
decode_escape ('a' : rest) = '&' : decode rest
decode_escape ('b' : rest) = '|' : decode rest
564
decode_escape ('c' : rest) = '^' : decode rest
565 566 567 568 569 570 571 572 573 574 575 576 577
decode_escape ('d' : rest) = '$' : decode rest
decode_escape ('e' : rest) = '=' : decode rest
decode_escape ('g' : rest) = '>' : decode rest
decode_escape ('h' : rest) = '#' : decode rest
decode_escape ('i' : rest) = '.' : decode rest
decode_escape ('l' : rest) = '<' : decode rest
decode_escape ('m' : rest) = '-' : decode rest
decode_escape ('n' : rest) = '!' : decode rest
decode_escape ('p' : rest) = '+' : decode rest
decode_escape ('q' : rest) = '\'' : decode rest
decode_escape ('r' : rest) = '\\' : decode rest
decode_escape ('s' : rest) = '/' : decode rest
decode_escape ('t' : rest) = '*' : decode rest
578
decode_escape ('u' : rest) = '_' : decode rest
579
decode_escape ('v' : rest) = '%' : decode rest
580 581

-- Tuples are coded as Z23T
582
-- Characters not having a specific code are coded as z224U
583 584 585 586 587
decode_escape (c : rest)
  | isDigit c = go (digitToInt c) rest
  where
    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
    go n ('T' : rest)		= '(' : replicate n ',' ++ ')' : decode rest
588
    go n ('U' : rest)           = chr n : decode rest
589 590 591
    go n other = pprPanic "decode_escape" (ppr n <+> text (c:rest))

decode_escape (c : rest) = pprTrace "decode_escape" (char c) (decode rest)
592 593 594 595 596
\end{code}


%************************************************************************
%*									*
597
n\subsection{Lexical categories}
598 599 600
%*									*
%************************************************************************

601 602
These functions test strings to see if they fit the lexical categories
defined in the Haskell report.
603 604

\begin{code}
605 606
isLexCon,   isLexVar,    isLexId,    isLexSym    :: FAST_STRING -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FAST_STRING -> Bool
607

608 609
isLexCon cs = isLexConId  cs || isLexConSym cs
isLexVar cs = isLexVarId  cs || isLexVarSym cs
610

611 612
isLexId  cs = isLexConId  cs || isLexVarId  cs
isLexSym cs = isLexConSym cs || isLexVarSym cs
613

614
-------------
615

616 617 618
isLexConId cs				-- Prefix type or data constructors
  | _NULL_ cs	     = False		-- 	e.g. "Foo", "[]", "(,)" 
  | cs == SLIT("[]") = True
619
  | otherwise	     = startsConId (_HEAD_ cs)
620

621 622
isLexVarId cs				-- Ordinary prefix identifiers
  | _NULL_ cs	 = False		-- 	e.g. "x", "_x"
623
  | otherwise    = startsVarId (_HEAD_ cs)
624

625 626
isLexConSym cs				-- Infix type or data constructors
  | _NULL_ cs	= False			--	e.g. ":-:", ":", "->"
627 628
  | cs == SLIT("->") = True
  | otherwise	= startsConSym (_HEAD_ cs)
629 630 631

isLexVarSym cs				-- Infix identifiers
  | _NULL_ cs = False			-- 	e.g. "+"
632
  | otherwise = startsVarSym (_HEAD_ cs)
633 634

-------------
635 636 637 638 639 640 641
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
startsVarSym c = isSymbolASCII c || isSymbolISO c	-- Infix Ids
startsConSym c = c == ':'				-- Infix data constructors
startsVarId c  = isLower c || isLowerISO c || c == '_'	-- Ordinary Ids
startsConId c  = isUpper c || isUpperISO c || c == '('	-- Ordinary type constructors and data constructors


642 643 644 645 646 647
isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
	--0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
	--0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
648
\end{code}