Commit cde3a77f authored by David Feuer's avatar David Feuer Committed by Joachim Breitner

Make Data.List.Inits fast

Fixes #9345. Use a modified banker's queue to achieve amortized optimal
performance for inits. The previous implementation was extremely slow.

Reviewed By: nomeata, ekmett, austin

Differential Revision: https://phabricator.haskell.org/D329
parent 4b69d96b
......@@ -208,6 +208,7 @@ module Data.OldList
) where
import Data.Maybe
import Data.Bits ( (.&.) )
import Data.Char ( isSpace )
import Data.Ord ( comparing )
import Data.Tuple ( fst, snd )
......@@ -767,11 +768,16 @@ groupBy eq (x:xs) = (x:ys) : groupBy eq zs
-- > inits "abc" == ["","a","ab","abc"]
--
-- Note that 'inits' has the following strictness property:
-- @inits (xs ++ _|_) = inits xs ++ _|_@
--
-- In particular,
-- @inits _|_ = [] : _|_@
inits :: [a] -> [[a]]
inits xs = [] : case xs of
[] -> []
x : xs' -> map (x :) (inits xs')
inits = map toListSB . scanl' snocSB emptySB
{-# NOINLINE inits #-}
-- We do not allow inits to inline, because it plays havoc with Call Arity
-- if it fuses with a consumer, and it would generally lead to serious
-- loss of sharing if allowed to fuse with a producer.
-- | The 'tails' function returns all final segments of the argument,
-- longest first. For example,
......@@ -1130,3 +1136,51 @@ unwords [] = ""
unwords [w] = w
unwords (w:ws) = w ++ ' ' : unwords ws
#endif
{- A "SnocBuilder" is a version of Chris Okasaki's banker's queue that supports
toListSB instead of uncons. In single-threaded use, its performance
characteristics are similar to John Hughes's functional difference lists, but
likely somewhat worse. In heavily persistent settings, however, it does much
better, because it takes advantage of sharing. The banker's queue guarantees
(amortized) O(1) snoc and O(1) uncons, meaning that we can think of toListSB as
an O(1) conversion to a list-like structure a constant factor slower than
normal lists--we pay the O(n) cost incrementally as we consume the list. Using
functional difference lists, on the other hand, we would have to pay the whole
cost up front for each output list. -}
{- We store a front list, a rear list, and the length of the queue. Because we
only snoc onto the queue and never uncons, we know it's time to rotate when the
length of the queue plus 1 is a power of 2. Note that we rely on the value of
the length field only for performance. In the unlikely event of overflow, the
performance will suffer but the semantics will remain correct. -}
data SnocBuilder a = SnocBuilder {-# UNPACK #-} !Word [a] [a]
{- Smart constructor that rotates the builder when lp is one minus a power of
2. Does not rotate very small builders because doing so is not worth the
trouble. The lp < 255 test goes first because the power-of-2 test gives awful
branch prediction for very small n (there are 5 powers of 2 between 1 and
16). Putting the well-predicted lp < 255 test first avoids branching on the
power-of-2 test until powers of 2 have become sufficiently rare to be predicted
well. -}
{-# INLINE sb #-}
sb :: Word -> [a] -> [a] -> SnocBuilder a
sb lp f r
| lp < 255 || (lp .&. (lp + 1)) /= 0 = SnocBuilder lp f r
| otherwise = SnocBuilder lp (f ++ reverse r) []
-- The empty builder
emptySB :: SnocBuilder a
emptySB = SnocBuilder 0 [] []
-- Add an element to the end of a queue.
snocSB :: SnocBuilder a -> a -> SnocBuilder a
snocSB (SnocBuilder lp f r) x = sb (lp + 1) f (x:r)
-- Convert a builder to a list
toListSB :: SnocBuilder a -> [a]
toListSB (SnocBuilder _ f r) = f ++ reverse r
......@@ -23,6 +23,7 @@ test('readInteger001', normal, compile_and_run, [''])
test('readFixed001', normal, compile_and_run, [''])
test('lex001', normal, compile_and_run, [''])
test('take001', extra_run_opts('1'), compile_and_run, [''])
test('inits', normal, compile_and_run, [''])
test('genericNegative001', extra_run_opts('-1'), compile_and_run, [''])
test('ix001', normal, compile_and_run, [''])
......
{-# LANGUAGE RankNTypes #-}
module Main (main) where
import Data.List
-- A simple implementation of inits that should be obviously correct.
{-# NOINLINE initsR #-}
initsR :: [a] -> [[a]]
initsR = map reverse . scanl (flip (:)) []
-- The inits implementation added in 7.10 uses a queue rotated around
-- powers of 2, starting the rotation only at size 255, so we want to check
-- around powers of 2 and around the switch.
ranges :: [Int]
ranges = [0..20] ++ [252..259] ++ [508..515]
simple :: (forall a . [a] -> [[a]]) -> [[[Int]]]
simple impl = [impl [1..n] | n <- ranges]
-- We want inits (xs ++ undefined) = inits xs ++ undefined
laziness :: Bool
laziness = [take (n+1) (inits $ [1..n] ++ undefined) | n <- ranges]
== simple inits
main :: IO ()
main | simple initsR /= simple inits = error "inits failed simple test"
| not laziness = error "inits failed laziness test"
| otherwise = return ()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment