CodePage.hs 6.08 KB
Newer Older
1
{-# LANGUAGE Trustworthy #-}
2
{-# LANGUAGE CPP, BangPatterns, NoImplicitPrelude,
3
             NondecreasingIndentation, MagicHash #-}
dterei's avatar
dterei committed
4

5
module GHC.IO.Encoding.CodePage(
ian@well-typed.com's avatar
ian@well-typed.com committed
6
#if defined(mingw32_HOST_OS)
7
8
                        codePageEncoding, mkCodePageEncoding,
                        localeEncoding, mkLocaleEncoding
ian@well-typed.com's avatar
ian@well-typed.com committed
9
#endif
10
11
                            ) where

ian@well-typed.com's avatar
ian@well-typed.com committed
12
13
14
#if !defined(mingw32_HOST_OS)
import GHC.Base () -- Build ordering
#else
15
import GHC.Base
16
import GHC.Show
17
18
19
20
import GHC.Num
import GHC.Enum
import GHC.Word
import GHC.IO (unsafePerformIO)
21
import GHC.IO.Encoding.Failure
22
23
24
25
import GHC.IO.Encoding.Types
import GHC.IO.Buffer
import Data.Bits
import Data.Maybe
26
import Data.OldList (lookup)
27

28
import qualified GHC.IO.Encoding.CodePage.API as API
29
30
import GHC.IO.Encoding.CodePage.Table

31
32
33
import GHC.IO.Encoding.UTF8 (mkUTF8)
import GHC.IO.Encoding.UTF16 (mkUTF16le, mkUTF16be)
import GHC.IO.Encoding.UTF32 (mkUTF32le, mkUTF32be)
34

35
36
37
38
39
40
41
42
43
44
#ifdef mingw32_HOST_OS
# if defined(i386_HOST_ARCH)
#  define WINDOWS_CCONV stdcall
# elif defined(x86_64_HOST_ARCH)
#  define WINDOWS_CCONV ccall
# else
#  error Unknown mingw32 arch
# endif
#endif

45
46
47
48
49
50
51
52
53
54
-- note CodePage = UInt which might not work on Win64.  But the Win32 package
-- also has this issue.
getCurrentCodePage :: IO Word32
getCurrentCodePage = do
    conCP <- getConsoleCP
    if conCP > 0
        then return conCP
        else getACP

-- Since the Win32 package depends on base, we have to import these ourselves:
55
foreign import WINDOWS_CCONV unsafe "windows.h GetConsoleCP"
56
57
    getConsoleCP :: IO Word32

58
foreign import WINDOWS_CCONV unsafe "windows.h GetACP"
59
60
    getACP :: IO Word32

61
62
63
64
{-# NOINLINE currentCodePage #-}
currentCodePage :: Word32
currentCodePage = unsafePerformIO getCurrentCodePage

65
localeEncoding :: TextEncoding
66
67
68
69
70
localeEncoding = mkLocaleEncoding ErrorOnCodingFailure

mkLocaleEncoding :: CodingFailureMode -> TextEncoding
mkLocaleEncoding cfm = mkCodePageEncoding cfm currentCodePage

71
72

codePageEncoding :: Word32 -> TextEncoding
73
74
75
76
77
78
79
80
codePageEncoding = mkCodePageEncoding ErrorOnCodingFailure

mkCodePageEncoding :: CodingFailureMode -> Word32 -> TextEncoding
mkCodePageEncoding cfm 65001 = mkUTF8 cfm
mkCodePageEncoding cfm 1200 = mkUTF16le cfm
mkCodePageEncoding cfm 1201 = mkUTF16be cfm
mkCodePageEncoding cfm 12000 = mkUTF32le cfm
mkCodePageEncoding cfm 12001 = mkUTF32be cfm
81
mkCodePageEncoding cfm cp = maybe (API.mkCodePageEncoding cfm cp) (buildEncoding cfm cp) (lookup cp codePageMap)
82
83
84

buildEncoding :: CodingFailureMode -> Word32 -> CodePageArrays -> TextEncoding
buildEncoding cfm cp SingleByteCP {decoderArray = dec, encoderArray = enc}
85
  = TextEncoding {
86
87
88
      textEncodingName = "CP" ++ show cp
    , mkTextDecoder = return $ simpleCodec (recoverDecode cfm) $ decodeFromSingleByte dec
    , mkTextEncoder = return $ simpleCodec (recoverEncode cfm) $ encodeToSingleByte enc
89
90
91
    }

simpleCodec :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
92
            -> (Buffer from -> Buffer to -> IO (CodingProgress, Buffer from, Buffer to))
93
                -> BufferCodec from to ()
94
95
96
97
98
99
100
simpleCodec r f = BufferCodec {
    encode = f,
    recover = r,
    close = return (),
    getState = return (),
    setState = return
  }
101
102
103
104
105
106

decodeFromSingleByte :: ConvArray Char -> DecodeBuffer
decodeFromSingleByte convArr
    input@Buffer  { bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
    output@Buffer { bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
  = let
107
108
109
110
        done why !ir !ow = return (why,
                                   if ir==iw then input{ bufL=0, bufR=0}
                                             else input{ bufL=ir},
                                   output {bufR=ow})
111
        loop !ir !ow
112
113
            | ow >= os  = done OutputUnderflow ir ow
            | ir >= iw  = done InputUnderflow ir ow
114
115
116
117
118
119
120
            | otherwise = do
                b <- readWord8Buf iraw ir
                let c = lookupConv convArr b
                if c=='\0' && b /= 0 then invalid else do
                ow' <- writeCharBuf oraw ow c
                loop (ir+1) ow'
          where
121
            invalid = done InvalidSequence ir ow
122
123
124
125
126
127
128
129
130
    in loop ir0 ow0

encodeToSingleByte :: CompactArray Char Word8 -> EncodeBuffer
encodeToSingleByte CompactArray { encoderMax = maxChar,
                         encoderIndices = indices,
                         encoderValues = values }
    input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
    output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
  = let
131
132
133
134
        done why !ir !ow = return (why,
                                   if ir==iw then input { bufL=0, bufR=0 }
                                             else input { bufL=ir },
                                   output {bufR=ow})
135
        loop !ir !ow
136
137
            | ow >= os  = done OutputUnderflow ir ow
            | ir >= iw  = done InputUnderflow ir ow
138
139
140
141
142
143
144
145
146
            | otherwise = do
                (c,ir') <- readCharBuf iraw ir
                case lookupCompact maxChar indices values c of
                    Nothing -> invalid
                    Just 0 | c /= '\0' -> invalid
                    Just b -> do
                        writeWord8Buf oraw ow b
                        loop ir' (ow+1)
            where
147
                invalid = done InvalidSequence ir ow
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    in
    loop ir0 ow0


--------------------------------------------
-- Array access functions

-- {-# INLINE lookupConv #-}
lookupConv :: ConvArray Char -> Word8 -> Char
lookupConv a = indexChar a . fromEnum

{-# INLINE lookupCompact #-}
lookupCompact :: Char -> ConvArray Int -> ConvArray Word8 -> Char -> Maybe Word8
lookupCompact maxVal indexes values x
    | x > maxVal = Nothing
    | otherwise = Just $ indexWord8 values $ j + (i .&. mask)
  where
    i = fromEnum x
    mask = (1 `shiftL` n) - 1
    k = i `shiftR` n
    j = indexInt indexes k
    n = blockBitSize

{-# INLINE indexInt #-}
indexInt :: ConvArray Int -> Int -> Int
indexInt (ConvArray p) (I# i) = I# (indexInt16OffAddr# p i)

{-# INLINE indexWord8 #-}
indexWord8 :: ConvArray Word8 -> Int -> Word8
indexWord8 (ConvArray p) (I# i) = W8# (indexWord8OffAddr# p i)

{-# INLINE indexChar #-}
indexChar :: ConvArray Char -> Int -> Char
indexChar (ConvArray p) (I# i) = C# (chr# (indexInt16OffAddr# p i))

#endif
dterei's avatar
dterei committed
184