diff --git a/Data/Text/Encoding.hs b/Data/Text/Encoding.hs
index e6a99e0839b3f582a95d8fcc0fc2e8e659d3e445..b0fd388eedc7c203bcb72ce27bb6b555ad25aa32 100644
--- a/Data/Text/Encoding.hs
+++ b/Data/Text/Encoding.hs
@@ -153,8 +153,9 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
    where
     loop n1 m1 ptr = go n1 m1
      where
+      offLen = off + len
       go !n !m
-        | n == off+len = return (PS fp 0 m)
+        | n == offLen = return (PS fp 0 m)
         | otherwise = do
             let poke8 k v = poke (ptr `plusPtr` k) (fromIntegral v :: Word8)
                 ensure k act
@@ -168,8 +169,17 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
                 {-# INLINE ensure #-}
             case A.unsafeIndex arr n of
              w| w <= 0x7F  -> ensure 1 $ do
-                  poke8 m w
-                  go (n+1) (m+1)
+                  poke (ptr `plusPtr` m) (fromIntegral w :: Word8)
+                  -- A single ASCII octet is likely to start a run of
+                  -- them.  We see better performance when we
+                  -- special-case this assumption.
+                  let ascii !t !u
+                        | t == offLen || u == size || v >= 0x80 = go t u
+                        | otherwise = do
+                            poke (ptr `plusPtr` u) (fromIntegral v :: Word8)
+                            ascii (t+1) (u+1)
+                        where v = A.unsafeIndex arr t
+                  ascii (n+1) (m+1)
               | w <= 0x7FF -> ensure 2 $ do
                   poke8 m     $ (w `shiftR` 6) + 0xC0
                   poke8 (m+1) $ (w .&. 0x3f) + 0x80