From 6dbd7d36a643f48416170430c7c5b361df2f0812 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan <bos@serpentine.com> Date: Tue, 7 Jan 2014 00:23:40 -0800 Subject: [PATCH] encodeUtf8_1: refactor the last loop body This requires a bit more torturing to maintain performance. For some unknown reason, doing the same refactoring on go4 decreases performance on russian-small.txt by half! --- Data/Text/Encoding.hs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Data/Text/Encoding.hs b/Data/Text/Encoding.hs index 58078ca8..d6180a33 100644 --- a/Data/Text/Encoding.hs +++ b/Data/Text/Encoding.hs @@ -101,7 +101,7 @@ import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode) import Data.Text.Internal (Text(..), safe, textP) import Data.Text.Internal.Private (runText) import Data.Text.Internal.Unsafe.Char (ord, unsafeWrite) -import Data.Word (Word8, Word32) +import Data.Word (Word8, Word16, Word32) import Foreign.C.Types (CSize) import Foreign.ForeignPtr (ForeignPtr, touchForeignPtr, withForeignPtr) import Foreign.Marshal.Utils (with) @@ -450,6 +450,10 @@ encodeUtf8_1 (Text arr off len) fp' <- resize k m fp go n m fp' (unsafeForeignPtrToPtr fp') do1 ptr n m w k = poke8 ptr m w >> k (n+1) (m+1) + loop :: (Word16 -> Int -> Int -> ForeignPtr Word8 -> Ptr Word8 + -> (Int -> Int -> IO ByteString) + -> IO ByteString) -> Int -> Int -> ForeignPtr Word8 -> Ptr Word8 + -> IO ByteString loop act !n0 !m0 fp ptr = hot n0 m0 where hot !n !m | n == offLen = touchForeignPtr fp >> return (PS fp 0 m) @@ -478,17 +482,14 @@ encodeUtf8_1 (Text arr off len) poke8 ptr (m+1) $ ((w `shiftR` 6) .&. 0x3F) + 0x80 poke8 ptr (m+2) $ (w .&. 0x3F) + 0x80 k (n+1) (m+3) - go3 !n0 !m0 fp ptr = do - let hot !n !m - | n == offLen = touchForeignPtr fp >> return (PS fp 0 m) - | otherwise = do - case A.unsafeIndex arr n of - w| w <= 0x7F -> do1 ptr n m w hot - | w <= 0x7FF -> do2 ptr n m w hot - | w < 0xD800 -> do3 ptr n m w hot - | w > 0xDBFF -> do3 ptr n m w hot - | otherwise -> ensure 4 n m fp go4 - hot n0 m0 + go3 = loop body where body w !n !m fp ptr cont = + case w of + _| w <= 0x7F -> do1 ptr n m w cont + | w <= 0x7FF -> do2 ptr n m w cont + | w < 0xD800 -> do3 ptr n m w cont + | w > 0xDBFF -> do3 ptr n m w cont + | otherwise -> ensure 4 n m fp go4 + {-# INLINE body #-} go4 !n0 !m0 fp ptr = do let hot !n !m | n == offLen = touchForeignPtr fp >> return (PS fp 0 m) -- GitLab