Encoding.hs 18.6 KB
Newer Older
bos's avatar
bos committed
1
{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, MagicHash,
2
    UnliftedFFITypes #-}
3
4
5
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
6
7
-- |
-- Module      : Data.Text.Encoding
bos's avatar
bos committed
8
9
10
-- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
--               (c) 2009 Duncan Coutts,
--               (c) 2008, 2009 Tom Harper
11
12
--
-- License     : BSD-style
bos's avatar
bos committed
13
-- Maintainer  : bos@serpentine.com
14
15
16
17
-- Stability   : experimental
-- Portability : portable
--
-- Functions for converting 'Text' values to and from 'ByteString',
bos's avatar
bos committed
18
19
-- using several standard encodings.
--
bos's avatar
bos committed
20
21
-- To gain access to a much larger family of encodings, use the
-- @text-icu@ package: <http://hackage.haskell.org/package/text-icu>
22
23
24
25

module Data.Text.Encoding
    (
    -- * Decoding ByteStrings to Text
bos's avatar
bos committed
26
    -- $strict
27
      decodeASCII
28
    , decodeLatin1
29
30
31
32
33
    , decodeUtf8
    , decodeUtf16LE
    , decodeUtf16BE
    , decodeUtf32LE
    , decodeUtf32BE
bos's avatar
bos committed
34

bos's avatar
bos committed
35
36
37
    -- ** Catchable failure
    , decodeUtf8'

38
39
40
41
42
43
    -- ** Controllable error handling
    , decodeUtf8With
    , decodeUtf16LEWith
    , decodeUtf16BEWith
    , decodeUtf32LEWith
    , decodeUtf32BEWith
44

bos's avatar
bos committed
45
46
47
48
49
    -- ** Stream oriented decoding
    -- $stream
    , streamDecodeUtf8
    , streamDecodeUtf8With
    , Decoding(..)
50

51
52
53
54
55
56
    -- * Encoding Text to ByteStrings
    , encodeUtf8
    , encodeUtf16LE
    , encodeUtf16BE
    , encodeUtf32LE
    , encodeUtf32BE
57

bos's avatar
bos committed
58
#if MIN_VERSION_bytestring(0,10,4)
59
60
61
62
63
    -- * Encoding Text using ByteString Builders
    -- | /Note/ that these functions are only available if built against
    -- @bytestring >= 0.10.4.0@.
    , encodeUtf8Builder
    , encodeUtf8BuilderEscaped
bos's avatar
bos committed
64
#endif
65
    ) where
bos's avatar
bos committed
66

bos's avatar
bos committed
67
68
69
#if __GLASGOW_HASKELL__ >= 702
import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
#else
70
import Control.Monad.ST (unsafeIOToST, unsafeSTToIO)
bos's avatar
bos committed
71
#endif
72

bos's avatar
bos committed
73
#if MIN_VERSION_bytestring(0,10,4)
74
75
import Data.Bits ((.&.))
import Data.Text.Internal.Unsafe.Char (ord)
bos's avatar
bos committed
76
import qualified Data.ByteString.Builder as B
bos's avatar
bos committed
77
import qualified Data.ByteString.Builder.Internal as B hiding (empty, append)
bos's avatar
bos committed
78
import qualified Data.ByteString.Builder.Prim as BP
79
import qualified Data.ByteString.Builder.Prim.Internal as BP
80
import qualified Data.Text.Internal.Encoding.Utf16 as U16
bos's avatar
bos committed
81
#endif
82

bos's avatar
bos committed
83
84
85
86
import Control.Exception (evaluate, try)
import Control.Monad.ST (runST)
import Data.ByteString as B
import Data.ByteString.Internal as B hiding (c2w)
bos's avatar
bos committed
87
import Data.Text ()
bos's avatar
bos committed
88
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
bos's avatar
bos committed
89
import Data.Text.Internal (Text(..), safe, text)
bos's avatar
bos committed
90
import Data.Text.Internal.Private (runText)
91
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
bos's avatar
bos committed
92
93
import Data.Text.Internal.Unsafe.Shift (shiftR)
import Data.Text.Unsafe (unsafeDupablePerformIO)
bos's avatar
bos committed
94
import Data.Word (Word8, Word32)
95
import Foreign.C.Types (CSize(..))
96
import Foreign.ForeignPtr (withForeignPtr)
97
import Foreign.Marshal.Utils (with)
98
import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
bos's avatar
bos committed
99
import Foreign.Storable (Storable, peek, poke)
100
import GHC.Base (ByteArray#, MutableByteArray#)
bos's avatar
bos committed
101
import qualified Data.Text.Array as A
102
import qualified Data.Text.Internal.Encoding.Fusion as E
bos's avatar
bos committed
103
import qualified Data.Text.Internal.Fusion as F
104

bos's avatar
bos committed
105
106
#include "text_cbits.h"

bos's avatar
bos committed
107
108
109
110
111
112
113
114
115
116
117
118
119
-- $strict
--
-- All of the single-parameter functions for decoding bytestrings
-- encoded in one of the Unicode Transformation Formats (UTF) operate
-- in a /strict/ mode: each will throw an exception if given invalid
-- input.
--
-- Each function has a variant, whose name is suffixed with -'With',
-- that gives greater control over the handling of decoding errors.
-- For instance, 'decodeUtf8' will throw an exception, but
-- 'decodeUtf8With' allows the programmer to determine what to do on a
-- decoding error.

120
121
-- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
-- encoded text.
122
decodeASCII :: ByteString -> Text
123
124
decodeASCII = decodeUtf8
{-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
125

126
127
128
129
130
-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
--
-- 'decodeLatin1' is semantically equivalent to
--  @Data.Text.pack . Data.ByteString.Char8.unpack@
decodeLatin1 :: ByteString -> Text
bos's avatar
bos committed
131
decodeLatin1 (PS fp off len) = text a 0 len
132
133
134
135
136
137
 where
  a = A.run (A.new len >>= unsafeIOToST . go)
  go dest = withForeignPtr fp $ \ptr -> do
    c_decode_latin1 (A.maBA dest) (ptr `plusPtr` off) (ptr `plusPtr` (off+len))
    return dest

bos's avatar
bos committed
138
-- | Decode a 'ByteString' containing UTF-8 encoded text.
139
decodeUtf8With :: OnDecodeError -> ByteString -> Text
bos's avatar
bos committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
decodeUtf8With onErr (PS fp off len) = runText $ \done -> do
  let go dest = withForeignPtr fp $ \ptr ->
        with (0::CSize) $ \destOffPtr -> do
          let end = ptr `plusPtr` (off + len)
              loop curPtr = do
                curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
                if curPtr' == end
                  then do
                    n <- peek destOffPtr
                    unsafeSTToIO (done dest (fromIntegral n))
                  else do
                    x <- peek curPtr'
                    case onErr desc (Just x) of
                      Nothing -> loop $ curPtr' `plusPtr` 1
                      Just c -> do
                        destOff <- peek destOffPtr
                        w <- unsafeSTToIO $
157
                             unsafeWrite dest (fromIntegral destOff) (safe c)
bos's avatar
bos committed
158
159
160
161
                        poke destOffPtr (destOff + fromIntegral w)
                        loop $ curPtr' `plusPtr` 1
          loop (ptr `plusPtr` off)
  (unsafeIOToST . go) =<< A.new len
bos's avatar
bos committed
162
 where
163
  desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream"
164
{- INLINE[0] decodeUtf8With #-}
bos's avatar
bos committed
165

bos's avatar
bos committed
166
167
168
169
170
171
-- $stream
--
-- The 'streamDecodeUtf8' and 'streamDecodeUtf8With' functions accept
-- a 'ByteString' that represents a possibly incomplete input (e.g. a
-- packet from a network stream) that may not end on a UTF-8 boundary.
--
172
173
174
175
176
177
178
179
180
-- 1. The maximal prefix of 'Text' that could be decoded from the
--    given input.
--
-- 2. The suffix of the 'ByteString' that could not be decoded due to
--    insufficient input.
--
-- 3. A function that accepts another 'ByteString'.  That string will
--    be assumed to directly follow the string that was passed as
--    input to the original function, and it will in turn be decoded.
bos's avatar
bos committed
181
182
183
184
185
186
187
188
189
190
191
192
--
-- To help understand the use of these functions, consider the Unicode
-- string @\"hi &#9731;\"@. If encoded as UTF-8, this becomes @\"hi
-- \\xe2\\x98\\x83\"@; the final @\'&#9731;\'@ is encoded as 3 bytes.
--
-- Now suppose that we receive this encoded string as 3 packets that
-- are split up on untidy boundaries: @[\"hi \\xe2\", \"\\x98\",
-- \"\\x83\"]@. We cannot decode the entire Unicode string until we
-- have received all three packets, but we would like to make progress
-- as we receive each one.
--
-- @
193
194
195
-- ghci> let s0\@('Some' _ _ f0) = 'streamDecodeUtf8' \"hi \\xe2\"
-- ghci> s0
-- 'Some' \"hi \" \"\\xe2\" _
bos's avatar
bos committed
196
197
198
199
200
-- @
--
-- We use the continuation @f0@ to decode our second packet.
--
-- @
201
202
203
-- ghci> let s1\@('Some' _ _ f1) = f0 \"\\x98\"
-- ghci> s1
-- 'Some' \"\" \"\\xe2\\x98\"
bos's avatar
bos committed
204
205
206
207
208
209
210
-- @
--
-- We could not give @f0@ enough input to decode anything, so it
-- returned an empty string. Once we feed our second continuation @f1@
-- the last byte of input, it will make progress.
--
-- @
211
212
213
-- ghci> let s2\@('Some' _ _ f2) = f1 \"\\x83\"
-- ghci> s2
-- 'Some' \"\\x2603\" \"\" _
bos's avatar
bos committed
214
215
216
217
218
219
-- @
--
-- If given invalid input, an exception will be thrown by the function
-- or continuation where it is encountered.

-- | A stream oriented decoding result.
220
data Decoding = Some Text ByteString (ByteString -> Decoding)
bos's avatar
bos committed
221
222

instance Show Decoding where
223
224
225
226
227
    showsPrec d (Some t bs _) = showParen (d > prec) $
                                showString "Some " . showsPrec prec' t .
                                showChar ' ' . showsPrec prec' bs .
                                showString " _"
      where prec = 10; prec' = prec + 1
228

bos's avatar
bos committed
229
230
newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
231

bos's avatar
bos committed
232
233
234
235
236
237
238
239
240
241
242
243
244
-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
-- encoded text that is known to be valid.
--
-- If the input contains any invalid UTF-8 data, an exception will be
-- thrown (either by this function or a continuation) that cannot be
-- caught in pure code.  For more control over the handling of invalid
-- data, use 'streamDecodeUtf8With'.
streamDecodeUtf8 :: ByteString -> Decoding
streamDecodeUtf8 = streamDecodeUtf8With strictDecode

-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
-- encoded text.
streamDecodeUtf8With :: OnDecodeError -> ByteString -> Decoding
245
streamDecodeUtf8With onErr = decodeChunk B.empty 0 0
246
 where
bos's avatar
bos committed
247
  -- We create a slightly larger than necessary buffer to accommodate a
248
  -- potential surrogate pair started in the last buffer
249
250
251
  decodeChunk :: ByteString -> CodePoint -> DecoderState -> ByteString
              -> Decoding
  decodeChunk undecoded0 codepoint0 state0 bs@(PS fp off len) =
252
253
    runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1)
   where
bos's avatar
bos committed
254
    decodeChunkToBuffer :: A.MArray s -> IO Decoding
255
256
257
258
    decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
      with (0::CSize) $ \destOffPtr ->
      with codepoint0 $ \codepointPtr ->
      with state0 $ \statePtr ->
259
      with nullPtr $ \curPtrPtr ->
260
261
        let end = ptr `plusPtr` (off + len)
            loop curPtr = do
262
263
264
              poke curPtrPtr curPtr
              curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr
                         curPtrPtr end codepointPtr statePtr
265
266
              state <- peek statePtr
              case state of
bos's avatar
bos committed
267
                UTF8_REJECT -> do
268
269
                  -- We encountered an encoding error
                  x <- peek curPtr'
270
                  poke statePtr 0
271
272
273
274
275
276
277
278
                  case onErr desc (Just x) of
                    Nothing -> loop $ curPtr' `plusPtr` 1
                    Just c -> do
                      destOff <- peek destOffPtr
                      w <- unsafeSTToIO $
                           unsafeWrite dest (fromIntegral destOff) (safe c)
                      poke destOffPtr (destOff + fromIntegral w)
                      loop $ curPtr' `plusPtr` 1
bos's avatar
bos committed
279

280
281
282
283
284
285
                _ -> do
                  -- We encountered the end of the buffer while decoding
                  n <- peek destOffPtr
                  codepoint <- peek codepointPtr
                  chunkText <- unsafeSTToIO $ do
                      arr <- A.unsafeFreeze dest
bos's avatar
bos committed
286
                      return $! text arr 0 (fromIntegral n)
287
288
                  lastPtr <- peek curPtrPtr
                  let left = lastPtr `minusPtr` curPtr
289
                      !undecoded = case state of
290
291
292
293
                        UTF8_ACCEPT -> B.empty
                        _           -> B.append undecoded0 (B.drop left bs)
                  return $ Some chunkText undecoded
                           (decodeChunk undecoded codepoint state)
294
        in loop (ptr `plusPtr` off)
295
  desc = "Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream"
296

bos's avatar
bos committed
297
298
-- | Decode a 'ByteString' containing UTF-8 encoded text that is known
-- to be valid.
bos's avatar
bos committed
299
300
--
-- If the input contains any invalid UTF-8 data, an exception will be
bos's avatar
bos committed
301
302
-- thrown that cannot be caught in pure code.  For more control over
-- the handling of invalid data, use 'decodeUtf8'' or
bos's avatar
bos committed
303
-- 'decodeUtf8With'.
bos's avatar
bos committed
304
305
306
307
308
decodeUtf8 :: ByteString -> Text
decodeUtf8 = decodeUtf8With strictDecode
{-# INLINE[0] decodeUtf8 #-}
{-# RULES "STREAM stream/decodeUtf8 fusion" [1]
    forall bs. F.stream (decodeUtf8 bs) = E.streamUtf8 strictDecode bs #-}
bos's avatar
bos committed
309

bos's avatar
bos committed
310
-- | Decode a 'ByteString' containing UTF-8 encoded text.
bos's avatar
bos committed
311
312
313
314
--
-- If the input contains any invalid UTF-8 data, the relevant
-- exception will be returned, otherwise the decoded text.
decodeUtf8' :: ByteString -> Either UnicodeException Text
315
decodeUtf8' = unsafeDupablePerformIO . try . evaluate . decodeUtf8With strictDecode
bos's avatar
bos committed
316
317
{-# INLINE decodeUtf8' #-}

bos's avatar
bos committed
318
319
#if MIN_VERSION_bytestring(0,10,4)

320
321
322
-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
encodeUtf8Builder :: Text -> B.Builder
encodeUtf8Builder = encodeUtf8BuilderEscaped (BP.liftFixedToBounded BP.word8)
323

bos's avatar
bos committed
324
-- | Encode text using UTF-8 encoding and escape the ASCII characters using
325
326
327
328
329
330
331
332
333
334
335
-- a 'BP.BoundedPrim'.
--
-- Use this function is to implement efficient encoders for text-based formats
-- like JSON or HTML.
{-# INLINE encodeUtf8BuilderEscaped #-}
-- TODO: Extend documentation with references to source code in @blaze-html@
-- or @aeson@ that uses this function.
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
encodeUtf8BuilderEscaped be =
    -- manual eta-expansion to ensure inlining works as expected
    \txt -> B.builder (mkBuildstep txt)
bos's avatar
bos committed
336
  where
337
338
339
    bound = max 4 $ BP.sizeBound be

    mkBuildstep (Text arr off len) !k =
bos's avatar
bos committed
340
341
        outerLoop off
      where
342
343
        iend = off + len

bos's avatar
bos committed
344
        outerLoop !i0 !br@(B.BufferRange op0 ope)
345
346
          | i0 >= iend       = k br
          | outRemaining > 0 = goPartial (i0 + min outRemaining inpRemaining)
347
348
          -- TODO: Use a loop with an integrated bound's check if outRemaining
          -- is smaller than 8, as this will save on divisions.
349
          | otherwise        = return $ B.bufferFull bound op0 (outerLoop i0)
bos's avatar
bos committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
          where
            outRemaining = (ope `minusPtr` op0) `div` bound
            inpRemaining = iend - i0

            goPartial !iendTmp = go i0 op0
              where
                go !i !op
                  | i < iendTmp = case A.unsafeIndex arr i of
                      w | w <= 0x7F -> do
                            BP.runB be (fromIntegral w) op >>= go (i + 1)
                        | w <= 0x7FF -> do
                            poke8 0 $ (w `shiftR` 6) + 0xC0
                            poke8 1 $ (w .&. 0x3f) + 0x80
                            go (i + 1) (op `plusPtr` 2)
                        | 0xD800 <= w && w <= 0xDBFF -> do
                            let c = ord $ U16.chr2 w (A.unsafeIndex arr (i+1))
                            poke8 0 $ (c `shiftR` 18) + 0xF0
                            poke8 1 $ ((c `shiftR` 12) .&. 0x3F) + 0x80
                            poke8 2 $ ((c `shiftR` 6) .&. 0x3F) + 0x80
                            poke8 3 $ (c .&. 0x3F) + 0x80
                            go (i + 2) (op `plusPtr` 4)
                        | otherwise -> do
                            poke8 0 $ (w `shiftR` 12) + 0xE0
                            poke8 1 $ ((w `shiftR` 6) .&. 0x3F) + 0x80
                            poke8 2 $ (w .&. 0x3F) + 0x80
                            go (i + 1) (op `plusPtr` 3)
                  | otherwise =
                      outerLoop i (B.BufferRange op ope)
                  where
                    poke8 j v = poke (op `plusPtr` j) (fromIntegral v :: Word8)
380
#endif
bos's avatar
bos committed
381

382
383
-- | Encode text using UTF-8 encoding.
encodeUtf8 :: Text -> ByteString
384
encodeUtf8 (Text arr off len)
385
386
387
388
389
390
391
  | len == 0  = B.empty
  | otherwise = unsafeDupablePerformIO $ do
  fp <- mallocByteString (len*4)
  withForeignPtr fp $ \ptr ->
    with ptr $ \destPtr -> do
      c_encode_utf8 destPtr (A.aBA arr) (fromIntegral off) (fromIntegral len)
      newDest <- peek destPtr
392
393
394
395
396
397
398
399
      let utf8len = newDest `minusPtr` ptr
      if utf8len >= len `shiftR` 1
        then return (PS fp 0 utf8len)
        else do
          fp' <- mallocByteString utf8len
          withForeignPtr fp' $ \ptr' -> do
            memcpy ptr' ptr (fromIntegral utf8len)
            return (PS fp' 0 utf8len)
400

401
402
403
404
405
-- | Decode text from little endian UTF-16 encoding.
decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
{-# INLINE decodeUtf16LEWith #-}

406
-- | Decode text from little endian UTF-16 encoding.
bos's avatar
bos committed
407
408
409
410
--
-- If the input contains any invalid little endian UTF-16 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf16LEWith'.
411
decodeUtf16LE :: ByteString -> Text
412
decodeUtf16LE = decodeUtf16LEWith strictDecode
413
414
{-# INLINE decodeUtf16LE #-}

415
416
417
418
419
-- | Decode text from big endian UTF-16 encoding.
decodeUtf16BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
{-# INLINE decodeUtf16BEWith #-}

420
-- | Decode text from big endian UTF-16 encoding.
bos's avatar
bos committed
421
422
423
424
--
-- If the input contains any invalid big endian UTF-16 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf16BEWith'.
425
decodeUtf16BE :: ByteString -> Text
426
decodeUtf16BE = decodeUtf16BEWith strictDecode
427
428
429
430
431
432
433
434
435
436
437
438
{-# INLINE decodeUtf16BE #-}

-- | Encode text using little endian UTF-16 encoding.
encodeUtf16LE :: Text -> ByteString
encodeUtf16LE txt = E.unstream (E.restreamUtf16LE (F.stream txt))
{-# INLINE encodeUtf16LE #-}

-- | Encode text using big endian UTF-16 encoding.
encodeUtf16BE :: Text -> ByteString
encodeUtf16BE txt = E.unstream (E.restreamUtf16BE (F.stream txt))
{-# INLINE encodeUtf16BE #-}

439
440
441
442
443
-- | Decode text from little endian UTF-32 encoding.
decodeUtf32LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
{-# INLINE decodeUtf32LEWith #-}

444
-- | Decode text from little endian UTF-32 encoding.
bos's avatar
bos committed
445
446
447
448
--
-- If the input contains any invalid little endian UTF-32 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf32LEWith'.
449
decodeUtf32LE :: ByteString -> Text
450
decodeUtf32LE = decodeUtf32LEWith strictDecode
451
452
{-# INLINE decodeUtf32LE #-}

453
454
455
456
457
-- | Decode text from big endian UTF-32 encoding.
decodeUtf32BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
{-# INLINE decodeUtf32BEWith #-}

458
-- | Decode text from big endian UTF-32 encoding.
bos's avatar
bos committed
459
460
461
462
--
-- If the input contains any invalid big endian UTF-32 data, an
-- exception will be thrown.  For more control over the handling of
-- invalid data, use 'decodeUtf32BEWith'.
463
decodeUtf32BE :: ByteString -> Text
464
decodeUtf32BE = decodeUtf32BEWith strictDecode
465
466
467
468
469
470
471
472
473
474
475
{-# INLINE decodeUtf32BE #-}

-- | Encode text using little endian UTF-32 encoding.
encodeUtf32LE :: Text -> ByteString
encodeUtf32LE txt = E.unstream (E.restreamUtf32LE (F.stream txt))
{-# INLINE encodeUtf32LE #-}

-- | Encode text using big endian UTF-32 encoding.
encodeUtf32BE :: Text -> ByteString
encodeUtf32BE txt = E.unstream (E.restreamUtf32BE (F.stream txt))
{-# INLINE encodeUtf32BE #-}
476
477
478
479

foreign import ccall unsafe "_hs_text_decode_utf8" c_decode_utf8
    :: MutableByteArray# s -> Ptr CSize
    -> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
480

481
482
foreign import ccall unsafe "_hs_text_decode_utf8_state" c_decode_utf8_with_state
    :: MutableByteArray# s -> Ptr CSize
483
    -> Ptr (Ptr Word8) -> Ptr Word8
bos's avatar
bos committed
484
    -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)
485

486
487
foreign import ccall unsafe "_hs_text_decode_latin1" c_decode_latin1
    :: MutableByteArray# s -> Ptr Word8 -> Ptr Word8 -> IO ()
488
489
490

foreign import ccall unsafe "_hs_text_encode_utf8" c_encode_utf8
    :: Ptr (Ptr Word8) -> ByteArray# -> CSize -> CSize -> IO ()