From a31c0f7df8fea235633a2eb82fb8d89b85a2fd47 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@serpentine.com>
Date: Fri, 23 Dec 2011 12:24:17 -0800
Subject: [PATCH] Improve ASCII encoding performance in a safer way.

---
 Data/Text/Encoding.hs | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/Data/Text/Encoding.hs b/Data/Text/Encoding.hs
index e6a99e08..b0fd388e 100644
--- a/Data/Text/Encoding.hs
+++ b/Data/Text/Encoding.hs
@@ -153,8 +153,9 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
    where
     loop n1 m1 ptr = go n1 m1
      where
+      offLen = off + len
       go !n !m
-        | n == off+len = return (PS fp 0 m)
+        | n == offLen = return (PS fp 0 m)
         | otherwise = do
             let poke8 k v = poke (ptr `plusPtr` k) (fromIntegral v :: Word8)
                 ensure k act
@@ -168,8 +169,17 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
                 {-# INLINE ensure #-}
             case A.unsafeIndex arr n of
              w| w <= 0x7F  -> ensure 1 $ do
-                  poke8 m w
-                  go (n+1) (m+1)
+                  poke (ptr `plusPtr` m) (fromIntegral w :: Word8)
+                  -- A single ASCII octet is likely to start a run of
+                  -- them.  We see better performance when we
+                  -- special-case this assumption.
+                  let ascii !t !u
+                        | t == offLen || u == size || v >= 0x80 = go t u
+                        | otherwise = do
+                            poke (ptr `plusPtr` u) (fromIntegral v :: Word8)
+                            ascii (t+1) (u+1)
+                        where v = A.unsafeIndex arr t
+                  ascii (n+1) (m+1)
               | w <= 0x7FF -> ensure 2 $ do
                   poke8 m     $ (w `shiftR` 6) + 0xC0
                   poke8 (m+1) $ (w .&. 0x3f) + 0x80
-- 
GitLab