diff --git a/Codec/Binary/UTF8/Generic.hs b/Codec/Binary/UTF8/Generic.hs index c807c4b6a5c0743c2e027c8018c1e8da4293400d..f03cec68ce8d0aaaf6a91bb810d421e4c891202e 100644 --- a/Codec/Binary/UTF8/Generic.hs +++ b/Codec/Binary/UTF8/Generic.hs @@ -32,6 +32,10 @@ import Prelude hiding (take,drop,splitAt,span,break,foldr,foldl,length,lines,nul import Codec.Binary.UTF8.String(encode) +#ifdef BYTESTRING_IN_BASE +import Data.ByteString.Base (unsafeHead, unsafeTail) +#endif + class (Num s, Ord s) => UTF8Bytes b s | b -> s where bsplit :: s -> b -> (b,b) bdrop :: s -> b -> b @@ -45,7 +49,7 @@ class (Num s, Ord s) => UTF8Bytes b s | b -> s where instance UTF8Bytes B.ByteString Int where bsplit = B.splitAt bdrop = B.drop - buncons = B.uncons + buncons = unconsB elemIndex = B.elemIndex empty = B.empty null = B.null @@ -55,7 +59,7 @@ instance UTF8Bytes B.ByteString Int where instance UTF8Bytes L.ByteString Int64 where bsplit = L.splitAt bdrop = L.drop - buncons = L.uncons + buncons = unconsL elemIndex = L.elemIndex empty = L.empty null = L.null @@ -267,3 +271,20 @@ lines' bs = case elemIndex 10 bs of in xs : lines' ys Nothing -> [bs] +----------- +-- Compatibility functions for base-2 + +unconsB :: B.ByteString -> Maybe (Word8,B.ByteString) +unconsL :: L.ByteString -> Maybe (Word8,L.ByteString) + +#ifdef BYTESTRING_IN_BASE +unconsB bs | B.null bs = Nothing + | otherwise = Just (unsafeHead bs, unsafeTail bs) + +unconsL bs = case L.toChunks bs of + (x:xs) | not (B.null x) -> Just (unsafeHead x, L.fromChunks (unsafeTail x:xs)) + _ -> Nothing +#else +unconsB = B.uncons +unconsL = L.uncons +#endif diff --git a/Data/ByteString/Lazy/UTF8.hs b/Data/ByteString/Lazy/UTF8.hs index 267f4b0821e064390642d1c885d9523fd6262ec3..b1ec7ad7419a3a3cbfdf6456f61f9371f8a0b376 100644 --- a/Data/ByteString/Lazy/UTF8.hs +++ b/Data/ByteString/Lazy/UTF8.hs @@ -28,6 +28,7 @@ import qualified Data.ByteString.Lazy as B import Prelude hiding (take,drop,splitAt,span,break,foldr,foldl,length,lines) import Codec.Binary.UTF8.String(encode) +import Codec.Binary.UTF8.Generic (buncons) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> B.ByteString @@ -51,7 +52,7 @@ replacement_char = '\xfffd' -- XXX: Should we combine sequences of errors into a single replacement -- character? decode :: B.ByteString -> Maybe (Char,Int64) -decode bs = do (c,cs) <- B.uncons bs +decode bs = do (c,cs) <- buncons bs return (choose (fromEnum c) cs) where choose :: Int -> B.ByteString -> (Char, Int64) @@ -75,7 +76,7 @@ decode bs = do (c,cs) <- B.uncons bs {-# INLINE get_follower #-} get_follower :: Int -> B.ByteString -> Maybe (Int, B.ByteString) - get_follower acc cs = do (x,xs) <- B.uncons cs + get_follower acc cs = do (x,xs) <- buncons cs acc1 <- follower acc x return (acc1,xs) diff --git a/Data/ByteString/UTF8.hs b/Data/ByteString/UTF8.hs index e6741153ceeb4ef340a442c018d34930265fbebf..76acaed31e493571010e93785387cd74075826f2 100644 --- a/Data/ByteString/UTF8.hs +++ b/Data/ByteString/UTF8.hs @@ -27,6 +27,7 @@ import qualified Data.ByteString as B import Prelude hiding (take,drop,splitAt,span,break,foldr,foldl,length,lines) import Codec.Binary.UTF8.String(encode) +import Codec.Binary.UTF8.Generic (buncons) -- | Converts a Haskell string into a UTF8 encoded bytestring. fromString :: String -> B.ByteString @@ -50,7 +51,7 @@ replacement_char = '\xfffd' -- XXX: Should we combine sequences of errors into a single replacement -- character? decode :: B.ByteString -> Maybe (Char,Int) -decode bs = do (c,cs) <- B.uncons bs +decode bs = do (c,cs) <- buncons bs return (choose (fromEnum c) cs) where choose :: Int -> B.ByteString -> (Char, Int) @@ -74,7 +75,7 @@ decode bs = do (c,cs) <- B.uncons bs {-# INLINE get_follower #-} get_follower :: Int -> B.ByteString -> Maybe (Int, B.ByteString) - get_follower acc cs = do (x,xs) <- B.uncons cs + get_follower acc cs = do (x,xs) <- buncons cs acc1 <- follower acc x return (acc1,xs) diff --git a/System/IO/UTF8.hs b/System/IO/UTF8.hs index be4796c72bede0d41f2b83840edc60e1f209f308..e5f88fcf3b451407bdcd1380f238b3d105646932 100644 --- a/System/IO/UTF8.hs +++ b/System/IO/UTF8.hs @@ -36,6 +36,7 @@ import Prelude (String, (=<<), (.), map, Enum(toEnum, fromEnum), Read, Show(..)) import System.IO (Handle, IO, FilePath, IOMode(AppendMode, ReadMode, WriteMode)) import qualified System.IO as IO +import Control.Exception (bracket) import Codec.Binary.UTF8.String (encode, decode) @@ -83,7 +84,7 @@ openBinaryFile :: FilePath -> IOMode -> IO Handle openBinaryFile n m = IO.openBinaryFile (encodeString n) m withBinaryFile :: FilePath -> IOMode -> (Handle -> IO a) -> IO a -withBinaryFile n m f = IO.withBinaryFile (encodeString n) m f +withBinaryFile n m f = bracket (openBinaryFile n m) IO.hClose f -- | The 'readFile' function reads a file and -- returns the contents of the file as a UTF8 string. diff --git a/utf8-string.cabal b/utf8-string.cabal index 437d86529062e40b976c34bd5dc63e389966c3e1..0585cf35bb0503c18f91d0c3f6c7b275cb0d01ad 100644 --- a/utf8-string.cabal +++ b/utf8-string.cabal @@ -1,5 +1,5 @@ Name: utf8-string -Version: 0.3.3 +Version: 0.3.4 Author: Eric Mertens Maintainer: emertens@galois.com License: BSD3 @@ -21,9 +21,11 @@ library if flag(bytestring-in-base) build-depends: base >= 2.0 && < 2.2 + cpp-options: -DBYTESTRING_IN_BASE else build-depends: base < 2.0 || >= 3, bytestring >= 0.9 + Extensions: CPP Exposed-modules: Codec.Binary.UTF8.String Codec.Binary.UTF8.Generic System.IO.UTF8