Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Glasgow Haskell Compiler
Packages
text
Commits
55358a4c
Commit
55358a4c
authored
Aug 30, 2021
by
Bodigrim
Browse files
Speed up lines and unlines
parent
634c5657
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/Data/Text.hs
View file @
55358a4c
...
...
@@ -1754,42 +1754,29 @@ isAsciiSpace :: Word8 -> Bool
isAsciiSpace
w
=
w
.&.
0x50
==
0
&&
w
<
0x80
&&
(
w
==
0x20
||
w
-
0x09
<
5
)
{-# INLINE isAsciiSpace #-}
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
-- newline 'Char's. The resulting strings do not contain newlines.
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
-- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
--
-- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
lines
::
Text
->
[
Text
]
lines
ps
|
null
ps
=
[]
|
otherwise
=
h
:
if
null
t
then
[]
else
lines
(
unsafeTail
t
)
where
(
#
h
,
t
#
)
=
span_
(
/=
'
\n
'
)
ps
lines
(
Text
arr
@
(
A
.
ByteArray
arr
#
)
off
len
)
=
go
off
where
go
!
n
|
n
>=
len
+
off
=
[]
|
delta
<
0
=
[
Text
arr
n
(
len
+
off
-
n
)]
|
otherwise
=
Text
arr
n
delta
:
go
(
n
+
delta
+
1
)
where
delta
=
cSsizeToInt
$
unsafeDupablePerformIO
$
memchr
arr
#
(
intToCSize
n
)
(
intToCSize
(
len
+
off
-
n
))
0x0A
{-# INLINE lines #-}
{-
-- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
-- boundaries.
--
-- A line boundary is considered to be either a line feed, a carriage
-- return immediately followed by a line feed, or a carriage return.
-- This accounts for both Unix and Windows line ending conventions,
-- and for the old convention used on Mac OS 9 and earlier.
lines' :: Text -> [Text]
lines' ps | null ps = []
| otherwise = h : case uncons t of
Nothing -> []
Just (c,t')
| c == '\n' -> lines t'
| c == '\r' -> case uncons t' of
Just ('\n',t'') -> lines t''
_ -> lines t'
where (h,t) = span notEOL ps
notEOL c = c /= '\n' && c /= '\r'
{-# INLINE lines' #-}
-}
foreign
import
ccall
unsafe
"_hs_text_memchr"
memchr
::
ByteArray
#
->
CSize
->
CSize
->
Word8
->
IO
CSsize
-- | /O(n)/ Joins lines, after appending a terminating newline to
-- each.
unlines
::
[
Text
]
->
Text
unlines
=
concat
.
L
.
map
(`
snoc
`
'
\n
'
)
unlines
=
concat
.
L
.
foldr
(
\
t
acc
->
t
:
singleton
'
\n
'
:
acc
)
[]
{-# INLINE unlines #-}
-- | /O(n)/ Joins words using single space characters.
...
...
src/Data/Text/Lazy.hs
View file @
55358a4c
...
...
@@ -1411,13 +1411,32 @@ chunksOf k = go
|
otherwise
->
a
:
go
b
{-# INLINE chunksOf #-}
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
-- newline 'Char's. The resulting strings do not contain newlines.
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
-- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
--
-- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
lines
::
Text
->
[
Text
]
lines
Empty
=
[]
lines
t
=
let
(
l
,
t'
)
=
break
((
==
)
'
\n
'
)
t
in
l
:
if
null
t'
then
[]
else
lines
(
tail
t'
)
lines
(
Chunk
c
cs
)
|
hasNlEnd
c
=
P
.
map
fromStrict
(
T
.
lines
c
)
++
lines
cs
|
otherwise
=
case
T
.
lines
c
of
[]
->
error
"lines: unexpected empty chunk"
l
:
ls
->
go
l
ls
cs
where
go
l
[]
Empty
=
[
fromStrict
l
]
go
l
[]
(
Chunk
x
xs
)
=
case
T
.
lines
x
of
[]
->
error
"lines: unexpected empty chunk"
[
xl
]
|
hasNlEnd
x
->
chunk
l
(
fromStrict
xl
)
:
lines
xs
|
otherwise
->
go
(
l
`
T
.
append
`
xl
)
[]
xs
xl
:
yl
:
yls
->
chunk
l
(
fromStrict
xl
)
:
if
hasNlEnd
x
then
P
.
map
fromStrict
(
yl
:
yls
)
++
lines
xs
else
go
yl
yls
xs
go
l
(
m
:
ms
)
xs
=
fromStrict
l
:
go
m
ms
xs
hasNlEnd
::
T
.
Text
->
Bool
hasNlEnd
(
T
.
Text
arr
off
len
)
=
A
.
unsafeIndex
arr
(
off
+
len
-
1
)
==
0x0A
-- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
-- representing white space.
...
...
@@ -1428,7 +1447,7 @@ words = L.filter (not . null) . split isSpace
-- | /O(n)/ Joins lines, after appending a terminating newline to
-- each.
unlines
::
[
Text
]
->
Text
unlines
=
concat
.
L
.
map
(`
snoc
`
'
\n
'
)
unlines
=
concat
.
L
.
foldr
(
\
t
acc
->
t
:
singleton
'
\n
'
:
acc
)
[]
{-# INLINE unlines #-}
-- | /O(n)/ Joins words using single space characters.
...
...
Bodigrim
@Bodigrim
mentioned in commit
0630bd31
·
Oct 14, 2021
mentioned in commit
0630bd31
mentioned in commit 0630bd31a9d1c4c9b9adb411cc409958bb4956b4
Toggle commit list
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment