Commit 872cfad3 authored by Bodigrim's avatar Bodigrim
Browse files

Speed up encodeUtf8 for strict and lazy Text

parent 3ae58ac1
......@@ -39,6 +39,7 @@ module Data.Text.Array
, unsafeFreeze
, unsafeIndex
, new
, newPinned
, unsafeWrite
) where
......@@ -71,6 +72,17 @@ new (I# len#)
(# s2#, marr# #) -> (# s2#, MutableByteArray marr# #)
{-# INLINE new #-}
-- | Create an uninitialized mutable pinned array.
newPinned :: forall s. Int -> ST s (MArray s)
newPinned (I# len#)
#if defined(ASSERTS)
| I# len# < 0 = error "Data.Text.Array.newPinned: size overflow"
#endif
| otherwise = ST $ \s1# ->
case newPinnedByteArray# len# s1# of
(# s2#, marr# #) -> (# s2#, MutableByteArray marr# #)
{-# INLINE newPinned #-}
-- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
unsafeFreeze :: MArray s -> ST s Array
unsafeFreeze (MutableByteArray marr) = ST $ \s1# ->
......
......@@ -65,7 +65,7 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
import Control.Exception (evaluate, try, throwIO, ErrorCall(ErrorCall))
import Control.Monad.ST (runST)
import Data.ByteString as B
import qualified Data.ByteString.Short.Internal as SBS
import qualified Data.ByteString.Internal as B
import Data.Foldable (traverse_)
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode, lenientDecode)
import Data.Text.Internal (Text(..), safe, text)
......@@ -79,7 +79,8 @@ import Foreign.C.Types (CSize)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
import Foreign.Storable (Storable, peek, poke)
import GHC.Base (MutableByteArray#)
import GHC.Exts (MutableByteArray#, byteArrayContents#, unsafeCoerce#)
import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents(PlainPtr))
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Internal as B hiding (empty, append)
import qualified Data.ByteString.Builder.Prim as BP
......@@ -436,9 +437,16 @@ encodeUtf8BuilderEscaped be =
-- | Encode text using UTF-8 encoding.
encodeUtf8 :: Text -> ByteString
encodeUtf8 (Text (A.ByteArray arr) off len)
encodeUtf8 (Text arr off len)
| len == 0 = B.empty
| otherwise = B.take len $ B.drop off $ SBS.fromShort $ SBS.SBS arr
-- It would be easier to use Data.ByteString.Short.fromShort and slice later,
-- but this is undesirable when len is significantly smaller than length arr.
| otherwise = unsafeDupablePerformIO $ do
marr@(A.MutableByteArray mba) <- unsafeSTToIO $ A.newPinned len
unsafeSTToIO $ A.copyI len marr 0 arr off
let fp = ForeignPtr (byteArrayContents# (unsafeCoerce# mba))
(PlainPtr mba)
pure $ B.fromForeignPtr fp 0 len
-- | Decode text from little endian UTF-16 encoding.
decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
......
......@@ -55,7 +55,6 @@ import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
import Data.Word (Word8)
import qualified Data.ByteString as S
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Extra as B (safeStrategy, toLazyByteStringWith)
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
......@@ -139,17 +138,7 @@ decodeUtf8' bs = unsafeDupablePerformIO $ do
-- | Encode text using UTF-8 encoding.
encodeUtf8 :: Text -> B.ByteString
encodeUtf8 Empty = B.empty
encodeUtf8 lt@(Chunk t _) =
B.toLazyByteStringWith strategy B.empty $ encodeUtf8Builder lt
where
-- To improve our small string performance, we use a strategy that
-- allocates a buffer that is guaranteed to be large enough for the
-- encoding of the first chunk, but not larger than the default
-- B.smallChunkSize. We clamp the firstChunkSize to ensure that we don't
-- generate too large buffers which hamper streaming.
firstChunkSize = min B.smallChunkSize (4 * (T.length t + 1))
strategy = B.safeStrategy firstChunkSize B.defaultChunkSize
encodeUtf8 = foldrChunks (B.Chunk . TE.encodeUtf8) B.Empty
-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
--
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment