From b7aa0a0b51e79186980e55403cd9018795fe3692 Mon Sep 17 00:00:00 2001
From: "Thomas M. DuBuisson" <Thomas.DuBuisson@gmail.com>
Date: Thu, 17 Nov 2011 13:06:44 -0800
Subject: [PATCH] Add BangPatterns for an accumulator.  Inline decode. 
 Specialize take, drop.

When considering a simple benchmark [1]:

   let p = ((== "en") . U.take 2)
   print . length . filter p . U.lines =<< L.readFile . head =<< getArgs

It's apparent that we are underperforming (compare to the shell script
solution, this take twice as long).  The accumulator of splitAt wasn't
strict (used bang patterns).  Also, even with -O2, the construction /
destruction of the tuple for take & drop's lifting of splitAt wasn't
being optimized away.  Perhaps more investigation should be done re:
GHC, but the cheap solution is to just duplicate code for take and
drop, which is what this patch does.

Original: user    0m1.704s
Final: user    0m1.092s

[1] http://stackoverflow.com/questions/8172889/quickly-parse-large-utf-8-text-file-in-haskell
---
 Data/ByteString/Lazy/UTF8.hs | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/Data/ByteString/Lazy/UTF8.hs b/Data/ByteString/Lazy/UTF8.hs
index e4420fe..2056b85 100644
--- a/Data/ByteString/Lazy/UTF8.hs
+++ b/Data/ByteString/Lazy/UTF8.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP, BangPatterns #-}
 #if __GLASGOW_HASKELL__ >= 701
 {-# LANGUAGE Trustworthy #-}
 #endif
@@ -123,26 +123,35 @@ decode bs = do (c,cs) <- buncons bs
               _ -> (replacement_char, 3)
           _ -> (replacement_char, 2)
       _ -> (replacement_char, 1)
+{-# INLINE decode #-}
 
 
 -- | Split after a given number of characters.
 -- Negative values are treated as if they are 0.
 splitAt :: Int64 -> B.ByteString -> (B.ByteString,B.ByteString)
 splitAt x bs = loop 0 x bs
-  where loop a n _ | n <= 0 = B.splitAt a bs
-        loop a n bs1 = case decode bs1 of
+  where loop !a n _ | n <= 0 = B.splitAt a bs
+        loop !a n bs1 = case decode bs1 of
                          Just (_,y) -> loop (a+y) (n-1) (B.drop y bs1)
                          Nothing    -> (bs, B.empty)
 
 -- | @take n s@ returns the first @n@ characters of @s@.
 -- If @s@ has less than @n@ characters, then we return the whole of @s@.
 take :: Int64 -> B.ByteString -> B.ByteString
-take n bs = fst (splitAt n bs)
+take x bs = loop 0 x bs
+  where loop !a n _ | n <= 0 = B.take a bs
+        loop !a n bs1 = case decode bs1 of
+                         Just (_,y) -> loop (a+y) (n-1) (B.drop y bs1)
+                         Nothing    -> bs
 
 -- | @drop n s@ returns the @s@ without its first @n@ characters.
 -- If @s@ has less than @n@ characters, then we return an empty string.
 drop :: Int64 -> B.ByteString -> B.ByteString
-drop n bs = snd (splitAt n bs)
+drop x bs = loop 0 x bs
+  where loop !a n _ | n <= 0 = B.drop a bs
+        loop !a n bs1 = case decode bs1 of
+                         Just (_,y) -> loop (a+y) (n-1) (B.drop y bs1)
+                         Nothing    -> B.empty
 
 -- | Split a string into two parts:  the first is the longest prefix
 -- that contains only characters that satisfy the predicate; the second
-- 
GitLab