Commit 55358a4c authored by Bodigrim's avatar Bodigrim
Browse files

Speed up lines and unlines

parent 634c5657
......@@ -1754,42 +1754,29 @@ isAsciiSpace :: Word8 -> Bool
isAsciiSpace w = w .&. 0x50 == 0 && w < 0x80 && (w == 0x20 || w - 0x09 < 5)
{-# INLINE isAsciiSpace #-}
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
-- newline 'Char's. The resulting strings do not contain newlines.
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
-- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
--
-- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
lines :: Text -> [Text]
lines ps | null ps = []
| otherwise = h : if null t
then []
else lines (unsafeTail t)
where (# h,t #) = span_ (/= '\n') ps
lines (Text arr@(A.ByteArray arr#) off len) = go off
where
go !n
| n >= len + off = []
| delta < 0 = [Text arr n (len + off - n)]
| otherwise = Text arr n delta : go (n + delta + 1)
where
delta = cSsizeToInt $ unsafeDupablePerformIO $
memchr arr# (intToCSize n) (intToCSize (len + off - n)) 0x0A
{-# INLINE lines #-}
{-
-- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
-- boundaries.
--
-- A line boundary is considered to be either a line feed, a carriage
-- return immediately followed by a line feed, or a carriage return.
-- This accounts for both Unix and Windows line ending conventions,
-- and for the old convention used on Mac OS 9 and earlier.
lines' :: Text -> [Text]
lines' ps | null ps = []
| otherwise = h : case uncons t of
Nothing -> []
Just (c,t')
| c == '\n' -> lines t'
| c == '\r' -> case uncons t' of
Just ('\n',t'') -> lines t''
_ -> lines t'
where (h,t) = span notEOL ps
notEOL c = c /= '\n' && c /= '\r'
{-# INLINE lines' #-}
-}
foreign import ccall unsafe "_hs_text_memchr" memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> IO CSsize
-- | /O(n)/ Joins lines, after appending a terminating newline to
-- each.
unlines :: [Text] -> Text
unlines = concat . L.map (`snoc` '\n')
unlines = concat . L.foldr (\t acc -> t : singleton '\n' : acc) []
{-# INLINE unlines #-}
-- | /O(n)/ Joins words using single space characters.
......
......@@ -1411,13 +1411,32 @@ chunksOf k = go
| otherwise -> a : go b
{-# INLINE chunksOf #-}
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
-- newline 'Char's. The resulting strings do not contain newlines.
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
-- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
--
-- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
lines :: Text -> [Text]
lines Empty = []
lines t = let (l,t') = break ((==) '\n') t
in l : if null t' then []
else lines (tail t')
lines (Chunk c cs)
| hasNlEnd c = P.map fromStrict (T.lines c) ++ lines cs
| otherwise = case T.lines c of
[] -> error "lines: unexpected empty chunk"
l : ls -> go l ls cs
where
go l [] Empty = [fromStrict l]
go l [] (Chunk x xs) = case T.lines x of
[] -> error "lines: unexpected empty chunk"
[xl]
| hasNlEnd x -> chunk l (fromStrict xl) : lines xs
| otherwise -> go (l `T.append` xl) [] xs
xl : yl : yls -> chunk l (fromStrict xl) :
if hasNlEnd x
then P.map fromStrict (yl : yls) ++ lines xs
else go yl yls xs
go l (m : ms) xs = fromStrict l : go m ms xs
hasNlEnd :: T.Text -> Bool
hasNlEnd (T.Text arr off len) = A.unsafeIndex arr (off + len - 1) == 0x0A
-- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
-- representing white space.
......@@ -1428,7 +1447,7 @@ words = L.filter (not . null) . split isSpace
-- | /O(n)/ Joins lines, after appending a terminating newline to
-- each.
unlines :: [Text] -> Text
unlines = concat . L.map (`snoc` '\n')
unlines = concat . L.foldr (\t acc -> t : singleton '\n' : acc) []
{-# INLINE unlines #-}
-- | /O(n)/ Joins words using single space characters.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment