Commit 61bc7af5 authored by Ian Lynagh's avatar Ian Lynagh

Remove special handling for character types of characters >= 128, <= 255

Many of the character types were wrong. Now the asc* names really do mean
ASCII, rather than latin-1.
parent e761a777
......@@ -219,133 +219,5 @@ charType c = case c of
'\125' -> cAny -- }
'\126' -> cAny + cSymbol -- ~
'\127' -> 0 -- \177
'\128' -> 0 -- \200
'\129' -> 0 -- \201
'\130' -> 0 -- \202
'\131' -> 0 -- \203
'\132' -> 0 -- \204
'\133' -> 0 -- \205
'\134' -> 0 -- \206
'\135' -> 0 -- \207
'\136' -> 0 -- \210
'\137' -> 0 -- \211
'\138' -> 0 -- \212
'\139' -> 0 -- \213
'\140' -> 0 -- \214
'\141' -> 0 -- \215
'\142' -> 0 -- \216
'\143' -> 0 -- \217
'\144' -> 0 -- \220
'\145' -> 0 -- \221
'\146' -> 0 -- \222
'\147' -> 0 -- \223
'\148' -> 0 -- \224
'\149' -> 0 -- \225
'\150' -> 0 -- \226
'\151' -> 0 -- \227
'\152' -> 0 -- \230
'\153' -> 0 -- \231
'\154' -> 0 -- \232
'\155' -> 0 -- \233
'\156' -> 0 -- \234
'\157' -> 0 -- \235
'\158' -> 0 -- \236
'\159' -> 0 -- \237
'\160' -> cSpace --
'\161' -> cAny + cSymbol -- ¡
'\162' -> cAny + cSymbol -- ¢
'\163' -> cAny + cSymbol -- £
'\164' -> cAny + cSymbol -- ¤
'\165' -> cAny + cSymbol -- ¥
'\166' -> cAny + cSymbol -- ¦
'\167' -> cAny + cSymbol -- §
'\168' -> cAny + cSymbol -- ¨
'\169' -> cAny + cSymbol -- ©
'\170' -> cAny + cSymbol -- ª
'\171' -> cAny + cSymbol -- «
'\172' -> cAny + cSymbol -- ¬
'\173' -> cAny + cSymbol -- ­
'\174' -> cAny + cSymbol -- ®
'\175' -> cAny + cSymbol -- ¯
'\176' -> cAny + cSymbol -- °
'\177' -> cAny + cSymbol -- ±
'\178' -> cAny + cSymbol -- ²
'\179' -> cAny + cSymbol -- ³
'\180' -> cAny + cSymbol -- ´
'\181' -> cAny + cSymbol -- µ
'\182' -> cAny + cSymbol -- ¶
'\183' -> cAny + cSymbol -- ·
'\184' -> cAny + cSymbol -- ¸
'\185' -> cAny + cSymbol -- ¹
'\186' -> cAny + cSymbol -- º
'\187' -> cAny + cSymbol -- »
'\188' -> cAny + cSymbol -- ¼
'\189' -> cAny + cSymbol -- ½
'\190' -> cAny + cSymbol -- ¾
'\191' -> cAny + cSymbol -- ¿
'\192' -> cAny + cIdent + cUpper -- À
'\193' -> cAny + cIdent + cUpper -- Á
'\194' -> cAny + cIdent + cUpper -- Â
'\195' -> cAny + cIdent + cUpper -- Ã
'\196' -> cAny + cIdent + cUpper -- Ä
'\197' -> cAny + cIdent + cUpper -- Å
'\198' -> cAny + cIdent + cUpper -- Æ
'\199' -> cAny + cIdent + cUpper -- Ç
'\200' -> cAny + cIdent + cUpper -- È
'\201' -> cAny + cIdent + cUpper -- É
'\202' -> cAny + cIdent + cUpper -- Ê
'\203' -> cAny + cIdent + cUpper -- Ë
'\204' -> cAny + cIdent + cUpper -- Ì
'\205' -> cAny + cIdent + cUpper -- Í
'\206' -> cAny + cIdent + cUpper -- Î
'\207' -> cAny + cIdent + cUpper -- Ï
'\208' -> cAny + cIdent + cUpper -- Ð
'\209' -> cAny + cIdent + cUpper -- Ñ
'\210' -> cAny + cIdent + cUpper -- Ò
'\211' -> cAny + cIdent + cUpper -- Ó
'\212' -> cAny + cIdent + cUpper -- Ô
'\213' -> cAny + cIdent + cUpper -- Õ
'\214' -> cAny + cIdent + cUpper -- Ö
'\215' -> cAny + cSymbol + cLower -- ×
'\216' -> cAny + cIdent + cUpper -- Ø
'\217' -> cAny + cIdent + cUpper -- Ù
'\218' -> cAny + cIdent + cUpper -- Ú
'\219' -> cAny + cIdent + cUpper -- Û
'\220' -> cAny + cIdent + cUpper -- Ü
'\221' -> cAny + cIdent + cUpper -- Ý
'\222' -> cAny + cIdent + cUpper -- Þ
'\223' -> cAny + cIdent -- ß
'\224' -> cAny + cIdent + cLower -- à
'\225' -> cAny + cIdent + cLower -- á
'\226' -> cAny + cIdent + cLower -- â
'\227' -> cAny + cIdent + cLower -- ã
'\228' -> cAny + cIdent + cLower -- ä
'\229' -> cAny + cIdent + cLower -- å
'\230' -> cAny + cIdent + cLower -- æ
'\231' -> cAny + cIdent + cLower -- ç
'\232' -> cAny + cIdent + cLower -- è
'\233' -> cAny + cIdent + cLower -- é
'\234' -> cAny + cIdent + cLower -- ê
'\235' -> cAny + cIdent + cLower -- ë
'\236' -> cAny + cIdent + cLower -- ì
'\237' -> cAny + cIdent + cLower -- í
'\238' -> cAny + cIdent + cLower -- î
'\239' -> cAny + cIdent + cLower -- ï
'\240' -> cAny + cIdent + cLower -- ð
'\241' -> cAny + cIdent + cLower -- ñ
'\242' -> cAny + cIdent + cLower -- ò
'\243' -> cAny + cIdent + cLower -- ó
'\244' -> cAny + cIdent + cLower -- ô
'\245' -> cAny + cIdent + cLower -- õ
'\246' -> cAny + cIdent + cLower -- ö
'\247' -> cAny + cSymbol -- ÷
'\248' -> cAny + cIdent -- ø
'\249' -> cAny + cIdent + cLower -- ù
'\250' -> cAny + cIdent + cLower -- ú
'\251' -> cAny + cIdent + cLower -- û
'\252' -> cAny + cIdent + cLower -- ü
'\253' -> cAny + cIdent + cLower -- ý
'\254' -> cAny + cIdent + cLower -- þ
'\255' -> cAny + cIdent + cLower -- ÿ
_ -> panic ("charType: " ++ show c)
\end{code}
......@@ -68,7 +68,7 @@ import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
}
$unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
$whitechar = [\ \n\r\f\v\xa0 $unispace]
$whitechar = [\ \n\r\f\v $unispace]
$white_no_nl = $whitechar # \n
$tab = \t
......@@ -78,16 +78,16 @@ $decdigit = $ascdigit -- for now, should really be $digit (ToDo)
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
$asclarge = [A-Z]
$large = [$asclarge $unilarge]
$unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
$ascsmall = [a-z]
$small = [$ascsmall $unismall \_]
$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
......@@ -1218,7 +1218,7 @@ lex_char c inp = do
c | isAny c -> do setInput inp; return c
_other -> lit_error
isAny c | c > '\xff' = isPrint c
isAny c | c > '\x7f' = isPrint c
| otherwise = is_any c
lex_escape :: P Char
......@@ -1486,7 +1486,7 @@ alexGetChar (AI loc ofs s)
adj_c
| c <= '\x06' = non_graphic
| c <= '\xff' = c
| c <= '\x7f' = c
-- Alex doesn't handle Unicode, so when Unicode
-- character is encoutered we output these values
-- with the actual character value hidden in the state.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment