Commit 1cc82d38 authored by Ben Gamari's avatar Ben Gamari Committed by Ben Gamari
Browse files

utils: Lazily decode UTF8 strings

Reviewers: austin, hvr

Subscribers: rwbarton, thomie

GHC Trac Issues: #13527

Differential Revision: https://phabricator.haskell.org/D3442
parent 3d3975f2
......@@ -17,7 +17,8 @@ module Encoding (
utf8PrevChar,
utf8CharStart,
utf8DecodeChar,
utf8DecodeString,
utf8DecodeByteString,
utf8DecodeStringLazy,
utf8EncodeChar,
utf8EncodeString,
utf8EncodedLength,
......@@ -33,9 +34,15 @@ module Encoding (
) where
import Foreign
import Foreign.ForeignPtr.Unsafe
import Data.Char
import qualified Data.Char as Char
import Numeric
import GHC.IO
import Data.ByteString (ByteString)
import qualified Data.ByteString.Internal as BS
import GHC.Exts
-- -----------------------------------------------------------------------------
......@@ -115,19 +122,24 @@ utf8CharStart p = go p
then go (p `plusPtr` (-1))
else return p
utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
utf8DecodeString ptr len
= unpack ptr
utf8DecodeByteString :: ByteString -> [Char]
utf8DecodeByteString (BS.PS ptr offset len)
= utf8DecodeStringLazy ptr offset len
utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeStringLazy fptr offset len
= unsafeDupablePerformIO $ unpack start
where
!end = ptr `plusPtr` len
!start = unsafeForeignPtrToPtr fptr `plusPtr` offset
!end = start `plusPtr` len
unpack p
| p >= end = return []
| otherwise =
case utf8DecodeChar# (unPtr p) of
(# c#, nBytes# #) -> do
chs <- unpack (p `plusPtr#` nBytes#)
return (C# c# : chs)
| p >= end = touchForeignPtr fptr >> return []
| otherwise =
case utf8DecodeChar# (unPtr p) of
(# c#, nBytes# #) -> do
rest <- unsafeDupableInterleaveIO $ unpack (p `plusPtr#` nBytes#)
return (C# c# : rest)
countUTF8Chars :: Ptr Word8 -> Int -> IO Int
countUTF8Chars ptr len = go ptr 0
......
......@@ -485,9 +485,7 @@ nullFS f = BS.null (fs_bs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
unpackFS (FastString _ _ bs _) =
inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
utf8DecodeString (castPtr ptr) len
unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> [Word8]
......
......@@ -251,9 +251,7 @@ lexemeToString :: StringBuffer
-> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
inlinePerformIO $
withForeignPtr buf $ \ptr ->
utf8DecodeString (ptr `plusPtr` cur) bytes
utf8DecodeStringLazy buf cur bytes
lexemeToFastString :: StringBuffer
-> Int -- ^ @n@, the number of bytes
......
......@@ -3525,8 +3525,7 @@ listAround pan do_highlight = do
prefixed = zipWith ($) highlighted bs_line_nos
output = BS.intercalate (BS.pack "\n") prefixed
utf8Decoded <- liftIO $ BS.useAsCStringLen output
$ \(p,n) -> utf8DecodeString (castPtr p) n
let utf8Decoded = utf8DecodeByteString output
liftIO $ putStrLn utf8Decoded
where
file = GHC.srcSpanFile pan
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment