OccName.lhs 23.3 KB
Newer Older
1
{-% DrIFT (Automatic class derivations for Haskell) v1.1 %-}
2
%
3
4
5
6
7
8
9
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%

\section[OccName]{@OccName@}

\begin{code}
module OccName (
10
	-- The NameSpace type; abstact
11
	NameSpace, tcName, clsName, tcClsName, dataName, varName, 
12
	tvName, srcDataName, nameSpaceString, 
13
14
15

	-- The OccName type
	OccName, 	-- Abstract, instance of Outputable
16
	pprOccName, 
17

18
19
	mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
	mkVarOcc, mkVarOccEncoded,
20
	mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
21
	mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
22
 	mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
23
24
	mkGenOcc1, mkGenOcc2, mkLocalOcc, 
	mkDataConWrapperOcc, mkDataConWorkerOcc,
25
	
26
	isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
27
	reportIfUnused,
28

29
30
	occNameFS, occNameString, occNameUserString, occNameSpace, 
	occNameFlavour, briefOccNameFlavour,
31
	setOccNameSpace,
32
33
34
35

	-- Tidying up
	TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,

36
	-- Encoding
sof's avatar
sof committed
37
	EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
38
39
40
41
42

	-- The basic form of names
	isLexCon, isLexVar, isLexId, isLexSym,
	isLexConId, isLexConSym, isLexVarId, isLexVarSym,
	isLowerISO, isUpperISO
43
44
45
46
47

    ) where

#include "HsVersions.h"

sof's avatar
sof committed
48
import Char	( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
49
import Util	( thenCmp )
50
import Unique	( Unique )
51
import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
52
import FastString
53
import Outputable
54
55
import Binary

56
import GLAEXTS
57
58
\end{code}

59
60
61
62
63
64
65
66
67
68
69
70
We hold both module names and identifier names in a 'Z-encoded' form
that makes them acceptable both as a C identifier and as a Haskell
(prefix) identifier. 

They can always be decoded again when printing error messages
or anything else for the user, but it does make sense for it
to be represented here in encoded form, so that when generating
code the encoding operation is not performed on each occurrence.

These type synonyms help documentation.

\begin{code}
71
72
type UserFS    = FastString	-- As the user typed it
type EncodedFS = FastString	-- Encoded form
73
74
75
76
77
78

type UserString = String	-- As the user typed it
type EncodedString = String	-- Encoded form


pprEncodedFS :: EncodedFS -> SDoc
79
80
pprEncodedFS fs
  = getPprStyle 	$ \ sty ->
81
    if userStyle sty
82
83
84
	-- ftext (decodeFS fs) would needlessly pack the string again
	then text (decode (unpackFS fs))
        else ftext fs
85
86
87
88
89
90
91
92
93
\end{code}

%************************************************************************
%*									*
\subsection{Name space}
%*									*
%************************************************************************

\begin{code}
94
95
data NameSpace = VarName	-- Variables, including "source" data constructors
	       | DataName	-- "Real" data constructors 
96
97
	       | TvName		-- Type variables
	       | TcClsName	-- Type constructors and classes; Haskell has them
98
				-- in the same name space for now.
99
	       deriving( Eq, Ord )
100
   {-! derive: Binary !-}
101

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
-- Note [Data Constructors]  
-- see also: Note [Data Constructor Naming] in DataCon.lhs
-- 
--	"Source" data constructors are the data constructors mentioned
--	in Haskell source code
--
--	"Real" data constructors are the data constructors of the
--	representation type, which may not be the same as the source
--	type

-- Example:
--	data T = T !(Int,Int)
--
-- The source datacon has type (Int,Int) -> T
-- The real   datacon has type Int -> Int -> T
-- GHC chooses a representation based on the strictness etc.


120
121
122
123
124
125
-- Though type constructors and classes are in the same name space now,
-- the NameSpace type is abstract, so we can easily separate them later
tcName    = TcClsName		-- Type constructors
clsName   = TcClsName		-- Classes
tcClsName = TcClsName		-- Not sure which!

126
127
128
dataName    = DataName
srcDataName = DataName	-- Haskell-source data constructors should be
			-- in the Data name space
129

130
131
tvName      = TvName
varName     = VarName
132
133
134
135
136
137

nameSpaceString :: NameSpace -> String
nameSpaceString DataName  = "Data constructor"
nameSpaceString VarName   = "Variable"
nameSpaceString TvName    = "Type variable"
nameSpaceString TcClsName = "Type constructor or class"
138
139
140
141
142
143
144
145
146
147
\end{code}


%************************************************************************
%*									*
\subsection[Name-pieces-datatypes]{The @OccName@ datatypes}
%*									*
%************************************************************************

\begin{code}
148
149
150
data OccName = OccName 
			NameSpace
			EncodedFS
151
   {-! derive : Binary !-}
152
153
154
155
156
157
158
159
160
161
\end{code}


\begin{code}
instance Eq OccName where
    (OccName sp1 s1) == (OccName sp2 s2) = s1 == s2 && sp1 == sp2

instance Ord OccName where
    compare (OccName sp1 s1) (OccName sp2 s2) = (s1  `compare` s2) `thenCmp`
						(sp1 `compare` sp2)
162
163
164
165
166
167
168
169
170
171
172
\end{code}


%************************************************************************
%*									*
\subsection{Printing}
%*									*
%************************************************************************
 
\begin{code}
instance Outputable OccName where
173
    ppr = pprOccName
174
175

pprOccName :: OccName -> SDoc
176
pprOccName (OccName sp occ) = pprEncodedFS occ
177
178
179
180
181
182
183
184
185
\end{code}


%************************************************************************
%*									*
\subsection{Construction}
%*									*
%************************************************************************

186
187
*Sys* things do no encoding; the caller should ensure that the thing is
already encoded
188

189
190
\begin{code}
mkSysOcc :: NameSpace -> EncodedString -> OccName
191
mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
192
		      OccName occ_sp (mkFastString str)
193
194
195
196
197

mkSysOccFS :: NameSpace -> EncodedFS -> OccName
mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
		       OccName occ_sp fs

198
mkFCallOcc :: EncodedString -> OccName
199
200
201
202
-- This version of mkSysOcc doesn't check that the string is already encoded,
-- because it will be something like "{__ccall f dyn Int# -> Int#}" 
-- This encodes a lot into something that then parses like an Id.
-- But then alreadyEncoded complains about the braces!
203
mkFCallOcc str = OccName varName (mkFastString str)
204

205
-- Kind constructors get a special function.  Uniquely, they are not encoded,
206
207
208
209
210
-- so that they have names like '*'.  This means that *even in interface files*
-- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
-- has an ASSERT that doesn't hold.
mkKindOccFS :: NameSpace -> EncodedFS -> OccName
mkKindOccFS occ_sp fs = OccName occ_sp fs
211
212
\end{code}

213
*Source-code* things are encoded.
214
215

\begin{code}
216
217
mkOccFS :: NameSpace -> UserFS -> OccName
mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
218

219
220
mkVarOcc :: UserFS -> OccName
mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
221
222
223

mkVarOccEncoded :: EncodedFS -> OccName
mkVarOccEncoded fs = mkSysOccFS varName fs
224
225
226
\end{code}


227

228
229
%************************************************************************
%*									*
230
\subsection{Predicates and taking them apart}
231
232
233
%*									*
%************************************************************************

234
235
236
\begin{code} 
occNameFS :: OccName -> EncodedFS
occNameFS (OccName _ s) = s
237

238
occNameString :: OccName -> EncodedString
239
occNameString (OccName _ s) = unpackFS s
240

241
242
occNameUserString :: OccName -> UserString
occNameUserString occ = decode (occNameString occ)
243

244
245
occNameSpace :: OccName -> NameSpace
occNameSpace (OccName sp _) = sp
246

247
248
setOccNameSpace :: NameSpace -> OccName -> OccName
setOccNameSpace sp (OccName _ occ) = OccName sp occ
249

250
251
-- occNameFlavour is used only to generate good error messages
occNameFlavour :: OccName -> String
252
occNameFlavour (OccName DataName _)  = "Data constructor"
253
254
255
256
257
258
259
260
261
262
occNameFlavour (OccName TvName _)    = "Type variable"
occNameFlavour (OccName TcClsName _) = "Type constructor or class"
occNameFlavour (OccName VarName s)   = "Variable"

-- briefOccNameFlavour is used in debug-printing of names
briefOccNameFlavour :: OccName -> String
briefOccNameFlavour (OccName DataName _)    = "d"
briefOccNameFlavour (OccName VarName _)     = "v"
briefOccNameFlavour (OccName TvName _)      = "tv"
briefOccNameFlavour (OccName TcClsName _)   = "tc"
263
\end{code}
264

265
\begin{code}
266
isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
267

268
269
isTvOcc (OccName TvName _) = True
isTvOcc other              = False
270

271
272
273
isTcOcc (OccName TcClsName _) = True
isTcOcc other                 = False

274
275
276
277
isValOcc (OccName VarName  _) = True
isValOcc (OccName DataName _) = True
isValOcc other		      = False

278
279
280
-- Data constructor operator (starts with ':', or '[]')
-- Pretty inefficient!
isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
281
isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
282
isDataSymOcc other		  = False
283

284
isDataOcc (OccName DataName _) = True
285
isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
286
isDataOcc other		       = False
287

288
289
290
291
292
-- Any operator (data constructor or variable)
-- Pretty inefficient!
isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
isSymOcc (OccName VarName s)  = isLexSym (decodeFS s)
\end{code}
293
294


295
296
297
298
299
300
301
302
303
304
305
\begin{code}
reportIfUnused :: OccName -> Bool
  -- Haskell 98 encourages compilers to suppress warnings about
  -- unused names in a pattern if they start with "_".
reportIfUnused occ = case occNameUserString occ of
			('_' : _) -> False
			zz_other  -> True
\end{code}



306
307
308
309
310
%************************************************************************
%*									*
\subsection{Making system names}
%*									*
%************************************************************************
311

312
Here's our convention for splitting up the interface file name space:
313

314
315
	d...		dictionary identifiers
			(local variables, so no name-clash worries)
316

317
	$f...		dict-fun identifiers (from inst decls)
318
	$dm...		default methods
319
320
	$p...		superclass selectors
	$w...		workers
apt's avatar
apt committed
321
322
	:T...		compiler-generated tycons for dictionaries
	:D...		...ditto data cons
323
	$sf..		specialised version of f
324

325
	in encoded form these appear as Zdfxxx etc
326

327
	:...		keywords (export:, letrec: etc.)
apt's avatar
apt committed
328
--- I THINK THIS IS WRONG!
329

330
331
332
This knowledge is encoded in the following functions.


apt's avatar
apt committed
333
@mk_deriv@ generates an @OccName@ from the prefix and a string.
334
NB: The string must already be encoded!
335
336

\begin{code}
337
338
339
mk_deriv :: NameSpace 
	 -> String		-- Distinguishes one sort of derived name from another
	 -> EncodedString	-- Must be already encoded!!  We don't want to encode it a 
340
				-- second time because encoding isn't idempotent
341
	 -> OccName
342

343
344
mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
\end{code}
345

346
\begin{code}
347
mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
348
 	   mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
349
350
   :: OccName -> OccName

351
-- These derived variables have a prefix that no Haskell value could have
352
mkDataConWrapperOcc = mk_simple_deriv varName  "$W"
353
354
355
356
mkWorkerOcc         = mk_simple_deriv varName  "$w"
mkDefaultMethodOcc  = mk_simple_deriv varName  "$dm"
mkDerivedTyConOcc   = mk_simple_deriv tcName   ":"	-- The : prefix makes sure it classifies
mkClassTyConOcc     = mk_simple_deriv tcName   ":T"	-- as a tycon/datacon
357
358
mkClassDataConOcc   = mk_simple_deriv dataName ":D"	-- We go straight to the "real" data con
							-- for datacons from classes
359
360
361
362
mkDictOcc	    = mk_simple_deriv varName  "$d"
mkIPOcc		    = mk_simple_deriv varName  "$i"
mkSpecOcc	    = mk_simple_deriv varName  "$s"
mkForeignExportOcc  = mk_simple_deriv varName  "$f"
363
364
mkGenOcc1           = mk_simple_deriv varName  "$gfrom"      -- Generics
mkGenOcc2           = mk_simple_deriv varName  "$gto"        -- Generics
365
mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
366
367
368
369
370
371


-- Data constructor workers are made by setting the name space
-- of the data constructor OccName (which should be a DataName)
-- to DataName
mkDataConWorkerOcc datacon_occ = setOccNameSpace varName datacon_occ 
372
373
374
375
376
\end{code}

\begin{code}
mkSuperDictSelOcc :: Int 	-- Index of superclass, eg 3
		  -> OccName 	-- Class, eg "Ord"
377
		  -> OccName	-- eg "$p3Ord"
378
mkSuperDictSelOcc index cls_occ
379
  = mk_deriv varName "$p" (show index ++ occNameString cls_occ)
380
381
382
383
384

mkLocalOcc :: Unique 		-- Unique
	   -> OccName		-- Local name (e.g. "sat")
	   -> OccName		-- Nice unique version ("$L23sat")
mkLocalOcc uniq occ
385
386
387
   = mk_deriv varName ("$L" ++ show uniq) (occNameString occ)
	-- The Unique might print with characters 
	-- that need encoding (e.g. 'z'!)
388
389
390
391
\end{code}


\begin{code}
392
mkDFunOcc :: EncodedString	-- Typically the class and type glommed together e.g. "OrdMaybe"
393
	  -> OccName		-- "$fOrdMaybe"
394

395
mkDFunOcc string = mk_deriv VarName "$f" string
396
397
\end{code}

398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
We used to add a '$m' to indicate a method, but that gives rise to bad
error messages from the type checker when we print the function name or pattern
of an instance-decl binding.  Why? Because the binding is zapped
to use the method name in place of the selector name.
(See TcClassDcl.tcMethodBind)

The way it is now, -ddump-xx output may look confusing, but
you can always say -dppr-debug to get the uniques.

However, we *do* have to zap the first character to be lower case,
because overloaded constructors (blarg) generate methods too.
And convert to VarName space

e.g. a call to constructor MkFoo where
	data (Ord a) => Foo a = MkFoo a

If this is necessary, we do it by prefixing '$m'.  These 
guys never show up in error messages.  What a hack.

\begin{code}
mkMethodOcc :: OccName -> OccName
mkMethodOcc occ@(OccName VarName fs) = occ
420
mkMethodOcc occ			     = mk_simple_deriv varName "$m" occ
421
422
\end{code}

423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441

%************************************************************************
%*									*
\subsection{Tidying them up}
%*									*
%************************************************************************

Before we print chunks of code we like to rename it so that
we don't have to print lots of silly uniques in it.  But we mustn't
accidentally introduce name clashes!  So the idea is that we leave the
OccName alone unless it accidentally clashes with one that is already
in scope; if so, we tack on '1' at the end and try again, then '2', and
so on till we find a unique one.

There's a wrinkle for operators.  Consider '>>='.  We can't use '>>=1' 
because that isn't a single lexeme.  So we encode it to 'lle' and *then*
tack on the '1', if necessary.

\begin{code}
442
type TidyOccEnv = FiniteMap FastString Int	-- The in-scope OccNames
443
444
445
emptyTidyOccEnv = emptyFM

initTidyOccEnv :: [OccName] -> TidyOccEnv	-- Initialise with names to avoid!
446
initTidyOccEnv = foldl (\env (OccName _ fs) -> addToFM env fs 1) emptyTidyOccEnv
447
448
449

tidyOccName :: TidyOccEnv -> OccName -> (TidyOccEnv, OccName)

450
451
452
tidyOccName in_scope occ@(OccName occ_sp fs)
  | not (fs `elemFM` in_scope)
  = (addToFM in_scope fs 1, occ)	-- First occurrence
453
454

  | otherwise				-- Already occurs
455
  = go in_scope (unpackFS fs)
456
457
458
459
460
461
462
  where

    go in_scope str = case lookupFM in_scope pk_str of
			Just n  -> go (addToFM in_scope pk_str (n+1)) (str ++ show n)
				-- Need to go round again, just in case "t3" (say) 
				-- clashes with a "t3" that's already in scope

463
			Nothing -> (addToFM in_scope pk_str 1, mkSysOccFS occ_sp pk_str)
464
465
				-- str is now unique
		    where
466
		      pk_str = mkFastString str
467
468
469
470
471
\end{code}


%************************************************************************
%*									*
472
\subsection{The 'Z' encoding}
473
474
475
%*									*
%************************************************************************

476
477
478
479
480
481
482
483
This is the main name-encoding and decoding function.  It encodes any
string into a string that is acceptable as a C name.  This is the name
by which things are known right through the compiler.

The basic encoding scheme is this.  

* Tuples (,,,) are coded as Z3T

484
* Alphabetic characters (upper and lower) and digits
485
486
487
488
489
	all translate to themselves; 
	except 'Z', which translates to 'ZZ'
	and    'z', which translates to 'zz'
  We need both so that we can preserve the variable/tycon distinction

490
* Most other printable characters translate to 'zx' or 'Zx' for some
491
492
	alphabetic character x

493
494
* The others translate as 'znnnU' where 'nnn' is the decimal number
        of the character
495
496
497
498

	Before		After
	--------------------------
	Trak		Trak
499
500
501
502
503
504
	foo_wib		foozuwib
	>		zg
	>1		zg1
	foo#		foozh
	foo##		foozhzh
	foo##1		foozhzh1
505
	fooZ		fooZZ	
apt's avatar
apt committed
506
507
508
509
510
511
	:+		ZCzp
	()		Z0T	0-tuple
	(,,,,)		Z5T	5-tuple  
	(# #)           Z1H     unboxed 1-tuple	(note the space)
	(#,,,,#)	Z5H	unboxed 5-tuple
		(NB: There is no Z1T nor Z0H.)
512
513

\begin{code}
514
515
516
517
518
519
-- alreadyEncoded is used in ASSERTs to check for encoded
-- strings.  It isn't fail-safe, of course, because, say 'zh' might
-- be encoded or not.
alreadyEncoded :: String -> Bool
alreadyEncoded s = all ok s
		 where
520
521
522
523
524
		   ok ' ' = True
			-- This is a bit of a lie; if we really wanted spaces
			-- in names we'd have to encode them.  But we do put
			-- spaces in ccall "occurrences", and we don't want to
			-- reject them here
sof's avatar
sof committed
525
		   ok ch  = isAlphaNum ch
526

527
528
alreadyEncodedFS :: FastString -> Bool
alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
529
530
531

encode :: UserString -> EncodedString
encode cs = case maybe_tuple cs of
532
		Just n  -> n		-- Tuples go to Z2T etc
533
534
535
536
537
		Nothing -> go cs
	  where
		go []     = []
		go (c:cs) = encode_ch c ++ go cs

apt's avatar
apt committed
538
maybe_tuple "(# #)" = Just("Z1H")
539
maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
apt's avatar
apt committed
540
				 (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
541
				 other		     -> Nothing
apt's avatar
apt committed
542
maybe_tuple "()" = Just("Z0T")
543
maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
apt's avatar
apt committed
544
				 (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
545
546
				 other	       -> Nothing
maybe_tuple other    	     = Nothing
547

548
549
550
count_commas :: Int -> String -> (Int, String)
count_commas n (',' : cs) = count_commas (n+1) cs
count_commas n cs	  = (n,cs)
551
552
553

encodeFS :: UserFS -> EncodedFS
encodeFS fast_str  | all unencodedChar str = fast_str
554
		   | otherwise	           = mkFastString (encode str)
555
		   where
556
		     str = unpackFS fast_str
557
558
559
560

unencodedChar :: Char -> Bool	-- True for chars that don't need encoding
unencodedChar 'Z' = False
unencodedChar 'z' = False
561
562
563
unencodedChar c   =  c >= 'a' && c <= 'z'
	          || c >= 'A' && c <= 'Z'
		  || c >= '0' && c <= '9'
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

encode_ch :: Char -> EncodedString
encode_ch c | unencodedChar c = [c]	-- Common case first

-- Constructors
encode_ch '('  = "ZL"	-- Needed for things like (,), and (->)
encode_ch ')'  = "ZR"	-- For symmetry with (
encode_ch '['  = "ZM"
encode_ch ']'  = "ZN"
encode_ch ':'  = "ZC"
encode_ch 'Z'  = "ZZ"

-- Variables
encode_ch 'z'  = "zz"
encode_ch '&'  = "za"
encode_ch '|'  = "zb"
580
encode_ch '^'  = "zc"
581
582
583
584
585
586
587
588
589
590
591
592
593
encode_ch '$'  = "zd"
encode_ch '='  = "ze"
encode_ch '>'  = "zg"
encode_ch '#'  = "zh"
encode_ch '.'  = "zi"
encode_ch '<'  = "zl"
encode_ch '-'  = "zm"
encode_ch '!'  = "zn"
encode_ch '+'  = "zp"
encode_ch '\'' = "zq"
encode_ch '\\' = "zr"
encode_ch '/'  = "zs"
encode_ch '*'  = "zt"
594
encode_ch '_'  = "zu"
595
encode_ch '%'  = "zv"
596
encode_ch c    = 'z' : shows (ord c) "U"
597
598
599
600
601
\end{code}

Decode is used for user printing.

\begin{code}
602
603
decodeFS :: FastString -> FastString
decodeFS fs = mkFastString (decode (unpackFS fs))
604
605
606
607
608
609
610
611
612
613
614
615
616

decode :: EncodedString -> UserString
decode [] = []
decode ('Z' : rest) = decode_escape rest
decode ('z' : rest) = decode_escape rest
decode (c   : rest) = c : decode rest

decode_escape :: EncodedString -> UserString

decode_escape ('L' : rest) = '(' : decode rest
decode_escape ('R' : rest) = ')' : decode rest
decode_escape ('M' : rest) = '[' : decode rest
decode_escape ('N' : rest) = ']' : decode rest
617
618
decode_escape ('C' : rest) = ':' : decode rest
decode_escape ('Z' : rest) = 'Z' : decode rest
619
620
621
622

decode_escape ('z' : rest) = 'z' : decode rest
decode_escape ('a' : rest) = '&' : decode rest
decode_escape ('b' : rest) = '|' : decode rest
623
decode_escape ('c' : rest) = '^' : decode rest
624
625
626
627
628
629
630
631
632
633
634
635
636
decode_escape ('d' : rest) = '$' : decode rest
decode_escape ('e' : rest) = '=' : decode rest
decode_escape ('g' : rest) = '>' : decode rest
decode_escape ('h' : rest) = '#' : decode rest
decode_escape ('i' : rest) = '.' : decode rest
decode_escape ('l' : rest) = '<' : decode rest
decode_escape ('m' : rest) = '-' : decode rest
decode_escape ('n' : rest) = '!' : decode rest
decode_escape ('p' : rest) = '+' : decode rest
decode_escape ('q' : rest) = '\'' : decode rest
decode_escape ('r' : rest) = '\\' : decode rest
decode_escape ('s' : rest) = '/' : decode rest
decode_escape ('t' : rest) = '*' : decode rest
637
decode_escape ('u' : rest) = '_' : decode rest
638
decode_escape ('v' : rest) = '%' : decode rest
639
640

-- Tuples are coded as Z23T
641
-- Characters not having a specific code are coded as z224U
642
643
644
645
decode_escape (c : rest)
  | isDigit c = go (digitToInt c) rest
  where
    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
apt's avatar
apt committed
646
647
648
649
    go 0 ('T' : rest) 		= "()" ++ (decode rest)
    go n ('T' : rest)		= '(' : replicate (n-1) ',' ++ ')' : decode rest
    go 1 ('H' : rest)		= "(# #)" ++ (decode rest)
    go n ('H' : rest)		= '(' : '#' : replicate (n-1) ',' ++ '#' : ')' : decode rest
650
    go n ('U' : rest)           = chr n : decode rest
651
652
653
    go n other = pprPanic "decode_escape" (ppr n <+> text (c:rest))

decode_escape (c : rest) = pprTrace "decode_escape" (char c) (decode rest)
654
decode_escape []	 = pprTrace "decode_escape" (text "empty") ""
655
656
657
658
659
\end{code}


%************************************************************************
%*									*
apt's avatar
apt committed
660
\subsection{Lexical categories}
661
662
663
%*									*
%************************************************************************

664
665
These functions test strings to see if they fit the lexical categories
defined in the Haskell report.
666
667

\begin{code}
668
669
isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
670

671
672
isLexCon cs = isLexConId  cs || isLexConSym cs
isLexVar cs = isLexVarId  cs || isLexVarSym cs
673

674
675
isLexId  cs = isLexConId  cs || isLexVarId  cs
isLexSym cs = isLexConSym cs || isLexVarSym cs
676

677
-------------
678

679
isLexConId cs				-- Prefix type or data constructors
680
  | nullFastString cs = False		-- 	e.g. "Foo", "[]", "(,)" 
681
  | cs == FSLIT("[]") = True
682
  | otherwise	      = startsConId (headFS cs)
683

684
isLexVarId cs				-- Ordinary prefix identifiers
685
686
  | nullFastString cs = False		-- 	e.g. "x", "_x"
  | otherwise         = startsVarId (headFS cs)
687

688
isLexConSym cs				-- Infix type or data constructors
689
  | nullFastString cs = False		--	e.g. ":-:", ":", "->"
690
  | cs == FSLIT("->") = True
691
  | otherwise	      = startsConSym (headFS cs)
692
693

isLexVarSym cs				-- Infix identifiers
694
695
  | nullFastString cs = False		-- 	e.g. "+"
  | otherwise         = startsVarSym (headFS cs)
696
697

-------------
698
699
700
701
702
703
704
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
startsVarSym c = isSymbolASCII c || isSymbolISO c	-- Infix Ids
startsConSym c = c == ':'				-- Infix data constructors
startsVarId c  = isLower c || isLowerISO c || c == '_'	-- Ordinary Ids
startsConId c  = isUpper c || isUpperISO c || c == '('	-- Ordinary type constructors and data constructors


705
706
707
708
709
710
isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
	--0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
	--0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
711
\end{code}
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
\begin{code}
{-* Generated by DrIFT-v1.0 : Look, but Don't Touch. *-}
instance Binary NameSpace where
    put_ bh VarName = do
	    putByte bh 0
    put_ bh DataName = do
	    putByte bh 1
    put_ bh TvName = do
	    putByte bh 2
    put_ bh TcClsName = do
	    putByte bh 3
    get bh = do
	    h <- getByte bh
	    case h of
	      0 -> do return VarName
	      1 -> do return DataName
	      2 -> do return TvName
	      _ -> do return TcClsName

instance Binary OccName where
    put_ bh (OccName aa ab) = do
	    put_ bh aa
	    put_ bh ab
    get bh = do
	  aa <- get bh
	  ab <- get bh
	  return (OccName aa ab)

--  Imported from other files :-

\end{code}