Commit 8e59ba46 authored by Simon Marlow's avatar Simon Marlow

Add support for Data.Char.generalCategory to libcompat

this is so that the stage1 compiler has proper support for Unicode.
Should fix these errors:

  lexical error in string/character literal at character '\8759'

when building the stage2 compiler.
parent 080c9600
......@@ -47,6 +47,12 @@ import DATA_BITS
import Data.Char
import Ratio
--import TRACE
#if __GLASGOW_HASKELL__ >= 605
import Data.Char ( GeneralCategory(..), generalCategory )
#else
import Compat.Unicode ( GeneralCategory(..), generalCategory )
#endif
}
$unispace = \x05
......@@ -1182,9 +1188,6 @@ alexGetChar (AI loc ofs s)
other_graphic = '\x6'
adj_c
#if __GLASGOW_HASKELL__ < 605
= c -- no Unicode support
#else
| c <= '\x06' = non_graphic
| c <= '\xff' = c
| otherwise =
......@@ -1213,7 +1216,6 @@ alexGetChar (AI loc ofs s)
OtherSymbol -> symbol
Space -> space
_other -> non_graphic
#endif
-- This version does not squash unicode characters, it is used when
-- lexing strings.
......
{-# OPTIONS -cpp #-}
module Compat.Unicode (
GeneralCategory(..), generalCategory,
) where
#if __GLASGOW_HASKELL__ > 604
import Data.Char (GeneralCategory(..), generalCategory)
#else
import Foreign.C ( CInt )
import Data.Char ( ord )
-- | Unicode General Categories (column 2 of the UnicodeData table)
-- in the order they are listed in the Unicode standard.
data GeneralCategory
= UppercaseLetter -- Lu Letter, Uppercase
| LowercaseLetter -- Ll Letter, Lowercase
| TitlecaseLetter -- Lt Letter, Titlecase
| ModifierLetter -- Lm Letter, Modifier
| OtherLetter -- Lo Letter, Other
| NonSpacingMark -- Mn Mark, Non-Spacing
| SpacingCombiningMark -- Mc Mark, Spacing Combining
| EnclosingMark -- Me Mark, Enclosing
| DecimalNumber -- Nd Number, Decimal
| LetterNumber -- Nl Number, Letter
| OtherNumber -- No Number, Other
| ConnectorPunctuation -- Pc Punctuation, Connector
| DashPunctuation -- Pd Punctuation, Dash
| OpenPunctuation -- Ps Punctuation, Open
| ClosePunctuation -- Pe Punctuation, Close
| InitialQuote -- Pi Punctuation, Initial quote
| FinalQuote -- Pf Punctuation, Final quote
| OtherPunctuation -- Po Punctuation, Other
| MathSymbol -- Sm Symbol, Math
| CurrencySymbol -- Sc Symbol, Currency
| ModifierSymbol -- Sk Symbol, Modifier
| OtherSymbol -- So Symbol, Other
| Space -- Zs Separator, Space
| LineSeparator -- Zl Separator, Line
| ParagraphSeparator -- Zp Separator, Paragraph
| Control -- Cc Other, Control
| Format -- Cf Other, Format
| Surrogate -- Cs Other, Surrogate
| PrivateUse -- Co Other, Private Use
| NotAssigned -- Cn Other, Not Assigned
deriving (Eq, Ord, Enum, Read, Show, Bounded)
-- | Retrieves the general Unicode category of the character.
generalCategory :: Char -> GeneralCategory
generalCategory c = toEnum (wgencat (fromIntegral (ord c)))
foreign import ccall unsafe "u_gencat"
wgencat :: CInt -> Int
#endif
......@@ -68,6 +68,9 @@ Distribution/ParseUtils.$(way_)o : $(FPTOOLS_TOP)/libraries/Cabal/Distribution/
Distribution/Compiler.$(way_)o : $(FPTOOLS_TOP)/libraries/Cabal/Distribution/Compiler.hs
Distribution/Version.$(way_)o : $(FPTOOLS_TOP)/libraries/Cabal/Distribution/Version.hs
Language/Haskell/Extension.$(way_)o : $(FPTOOLS_TOP)/libraries/Cabal/Language/Haskell/Extension.hs
cbits/unicode.o : $(FPTOOLS_TOP)/libraries/base/cbits/WCsubst.c $(FPTOOLS_TOP)/libraries/base/include/WCsubst.h
SRC_CC_OPTS += -I$(FPTOOLS_TOP)/libraries/base/cbits -I$(FPTOOLS_TOP)/libraries/base/include
# Make the #includes in the stubs independent of the current location
SRC_HC_OPTS += -I$(FPTOOLS_TOP)/libraries
......
#if __GLASGOW_HASKELL__ < 604
#include "WCsubst.c"
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment