From 0f3435bad4f68725c12a70695ae0beac40d8efce Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@serpentine.com>
Date: Thu, 2 Jan 2014 16:55:19 -0800
Subject: [PATCH] Drop some special-casing for ASCII during UTF-8 encoding

I somehow forgot that we allocate the initial ByteString to contain
the same number of bytes as the Text contains code units. This means
that we never need to ensure that the ByteString is big enough, nor
(with this observation) does a special-cased ASCII-only loop help
performance.
---
 Data/Text/Encoding.hs | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/Data/Text/Encoding.hs b/Data/Text/Encoding.hs
index 73645244..501fda87 100644
--- a/Data/Text/Encoding.hs
+++ b/Data/Text/Encoding.hs
@@ -384,20 +384,9 @@ encodeUtf8 (Text arr off len) = unsafeDupablePerformIO $ do
                       start newSize n m fp'
                 {-# INLINE ensure #-}
             case A.unsafeIndex arr n of
-             w| w <= 0x7F  -> ensure 1 $ do
+             w| w <= 0x7F  -> do
                   poke (ptr `plusPtr` m) (fromIntegral w :: Word8)
-                  -- A single ASCII octet is likely to start a run of
-                  -- them.  We see better performance when we
-                  -- special-case this assumption.
-                  let end = ptr `plusPtr` size
-                      ascii !t !u
-                        | t == offLen || u == end || v >= 0x80 =
-                            go t (u `minusPtr` ptr)
-                        | otherwise = do
-                            poke u (fromIntegral v :: Word8)
-                            ascii (t+1) (u `plusPtr` 1)
-                        where v = A.unsafeIndex arr t
-                  ascii (n+1) (ptr `plusPtr` (m+1))
+                  go (n+1) (m+1)
               | w <= 0x7FF -> ensure 2 $ do
                   poke8 m     $ (w `shiftR` 6) + 0xC0
                   poke8 (m+1) $ (w .&. 0x3f) + 0x80
-- 
GitLab