Skip to content
Snippets Groups Projects
Commit d3b41587 authored by judah's avatar judah
Browse files

Make the POSIX key reader more robust.

Previously we read one, fixed buffer of input from the console; but this could
cause a problem if a multibyte character crossed the end of the buffer boundary.

Now, we detect that case from iconv and recover by reading one byte of input at
a time until the conversion succeeds.
parent f7ee107e
No related branches found
No related tags found
No related merge requests found
......@@ -2,18 +2,18 @@ module System.Console.Haskeline.Backend.IConv(
setLocale,
getCodeset,
openEncoder,
openDecoder
openDecoder,
openPartialDecoder,
Result(..)
) where
import Foreign.C
import Foreign
import Data.ByteString (ByteString, useAsCStringLen, empty, append)
import Data.ByteString.Internal (createAndTrim)
import Control.Monad(when)
import Data.ByteString (ByteString, useAsCStringLen, append)
import Data.ByteString.Internal (createAndTrim')
import qualified Data.ByteString as B
import qualified Data.ByteString.UTF8 as UTF8
import System.Console.Haskeline.MonadException
#include <locale.h>
#include <langinfo.h>
#include <iconv.h>
......@@ -21,12 +21,27 @@ import System.Console.Haskeline.MonadException
openEncoder :: String -> IO (String -> IO ByteString)
openEncoder codeset = do
encodeT <- iconvOpen codeset "UTF-8"
return $ iconv encodeT . UTF8.fromString
return $ simpleIConv encodeT . UTF8.fromString
openDecoder :: String -> IO (ByteString -> IO String)
openDecoder codeset = do
decodeT <- iconvOpen "UTF-8" codeset
return $ fmap UTF8.toString . iconv decodeT
return $ fmap UTF8.toString . simpleIConv decodeT
-- handle errors by dropping unuseable chars.
simpleIConv :: IConvT -> ByteString -> IO ByteString
simpleIConv t bs = do
(cs,result) <- iconv t bs
case result of
Invalid rest -> fmap (cs `append`) $ simpleIConv t (B.drop 1 rest)
_ -> return cs
openPartialDecoder :: String -> IO (ByteString -> IO (String, Result))
openPartialDecoder codeset = do
decodeT <- iconvOpen "UTF-8" codeset
return $ \bs -> do
(s,result) <- iconv decodeT bs
return (UTF8.toString s,result)
---------------------
-- Setting the locale
......@@ -46,7 +61,6 @@ foreign import ccall nl_langinfo :: NLItem -> IO CString
getCodeset :: IO String
getCodeset = nl_langinfo (#const CODESET) >>= peekCString
----------------
-- Iconv
......@@ -70,7 +84,12 @@ foreign import ccall "&" iconv_close :: FunPtr (IConvTPtr -> IO ())
foreign import ccall "iconv" c_iconv :: IConvTPtr -> Ptr CString -> Ptr CSize
-> Ptr CString -> Ptr CSize -> IO CSize
iconv :: IConvT -> ByteString -> IO ByteString
data Result = Successful
| Invalid ByteString
| Incomplete ByteString
deriving Show
iconv :: IConvT -> ByteString -> IO (ByteString,Result)
iconv cd inStr = useAsCStringLen inStr $ \(inPtr, inBuffLen) ->
with inPtr $ \inBuff ->
with (toEnum inBuffLen) $ \inBytesLeft -> do
......@@ -81,31 +100,31 @@ iconv cd inStr = useAsCStringLen inStr $ \(inPtr, inBuffLen) ->
-- and also maybe a different starting buffer size?
biggerBuffer = (+1)
loop outSize inBuff inBytesLeft = do
bytesLeft <- peek inBytesLeft
if bytesLeft <= 0
then return empty
else do
bs <- partialIconv cd outSize inBuff inBytesLeft
bs' <- loop (biggerBuffer outSize) inBuff inBytesLeft
return (bs `append` bs')
partialIconv :: IConvT -> Int -> Ptr CString -> Ptr CSize -> IO ByteString
(bs, errno) <- partialIconv cd outSize inBuff inBytesLeft
inLeft <- fmap fromEnum $ peek inBytesLeft
let rest = B.drop (B.length inStr - inLeft) inStr
case errno of
Nothing -> return (bs,Successful)
Just err
| err == e2BIG -> do -- output buffer too small
(bs',result) <- loop (biggerBuffer outSize) inBuff inBytesLeft
-- TODO: is this efficient enough?
return (bs `append` bs', result)
| err == eINVAL -> return (bs,Incomplete rest)
| otherwise -> return (bs, Invalid rest)
partialIconv :: IConvT -> Int -> Ptr CString -> Ptr CSize -> IO (ByteString, Maybe Errno)
partialIconv cd outSize inBuff inBytesLeft =
withForeignPtr cd $ \cd_p ->
createAndTrim outSize $ \outPtr ->
createAndTrim' outSize $ \outPtr ->
with outPtr $ \outBuff ->
with (toEnum outSize) $ \outBytesLeft -> do
ret <- c_iconv cd_p inBuff inBytesLeft
c_iconv cd_p inBuff inBytesLeft
(castPtr outBuff) outBytesLeft
-- if there was a problem converting (either incomplete or invalid errs)
-- skip the next unprocessed byte.
when (ret == -1) $ do
errno <- getErrno
when (errno /= e2BIG) $ do
modifyPtr (`plusPtr` 1) inBuff
modifyPtr (subtract 1) inBytesLeft
outLeft <- fmap fromEnum $ peek outBytesLeft
return (outSize - outLeft)
inLeft <- peek inBytesLeft
errno <- if inLeft > 0
then fmap Just getErrno
else return Nothing
return (0,outSize - outLeft,errno)
modifyPtr :: Storable a => (a -> a) -> Ptr a -> IO ()
modifyPtr f p = peek p >>= poke p . f
......@@ -186,7 +186,7 @@ withHandler signal handler f = do
getEvent :: Encoders -> TreeMap Char Key -> Chan Event -> IO Event
getEvent enc baseMap = keyEventLoop readKeyEvents
where
bufferSize = 100
bufferSize = 32
readKeyEvents = do
-- Read at least one character of input, and more if available.
-- In particular, the characters making up a control sequence will all
......@@ -194,8 +194,21 @@ getEvent enc baseMap = keyEventLoop readKeyEvents
threadWaitRead stdInput -- hWaitForInput doesn't work with -threaded on
-- ghc < 6.10 (#2363 in ghc's trac)
bs <- B.hGetNonBlocking stdin bufferSize
cs <- localeToUnicode enc bs
cs <- convert bs
return $ map KeyInput $ lexKeys baseMap cs
-- try to convert to the locale encoding using iconv.
-- if the buffer has an incomplete shift sequence,
-- read another byte of input and try again.
convert bs = do
(cs,result) <- localeToUnicode enc bs
case result of
Incomplete rest -> do
extra <- B.hGetNonBlocking stdin 1
if B.null extra
then return cs -- ignore the incomplete shift sequence
-- since no more input is available.
else fmap (cs ++) $ convert (rest `B.append` extra)
_ -> return cs
-- fails if stdin is not a handle or if we couldn't access /dev/tty.
openTTY :: IO (Maybe Handle)
......@@ -212,7 +225,7 @@ posixRunTerm tOps = do
fileRT <- fileRunTerm
codeset <- getCodeset
ttyH <- openTTY
encoders <- liftM2 Encoders (openEncoder codeset) (openDecoder codeset)
encoders <- liftM2 Encoders (openEncoder codeset) (openPartialDecoder codeset)
case ttyH of
Nothing -> return fileRT
Just h -> return fileRT {
......@@ -224,7 +237,7 @@ posixRunTerm tOps = do
type PosixT m = ReaderT Encoders (ReaderT Handle m)
data Encoders = Encoders {unicodeToLocale :: String -> IO B.ByteString,
localeToUnicode :: B.ByteString -> IO String}
localeToUnicode :: B.ByteString -> IO (String, Result)}
posixEncode :: (MonadIO m, MonadReader Encoders m) => String -> m B.ByteString
posixEncode str = do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment