Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Glasgow Haskell Compiler
Packages
text
Commits
872cfad3
Commit
872cfad3
authored
Jun 25, 2021
by
Bodigrim
Browse files
Speed up encodeUtf8 for strict and lazy Text
parent
3ae58ac1
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/Data/Text/Array.hs
View file @
872cfad3
...
...
@@ -39,6 +39,7 @@ module Data.Text.Array
,
unsafeFreeze
,
unsafeIndex
,
new
,
newPinned
,
unsafeWrite
)
where
...
...
@@ -71,6 +72,17 @@ new (I# len#)
(
#
s2
#
,
marr
#
#
)
->
(
#
s2
#
,
MutableByteArray
marr
#
#
)
{-# INLINE new #-}
-- | Create an uninitialized mutable pinned array.
newPinned
::
forall
s
.
Int
->
ST
s
(
MArray
s
)
newPinned
(
I
#
len
#
)
#
if
defined
(
ASSERTS
)
|
I
#
len
#
<
0
=
error
"Data.Text.Array.newPinned: size overflow"
#
endif
|
otherwise
=
ST
$
\
s1
#
->
case
newPinnedByteArray
#
len
#
s1
#
of
(
#
s2
#
,
marr
#
#
)
->
(
#
s2
#
,
MutableByteArray
marr
#
#
)
{-# INLINE newPinned #-}
-- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
unsafeFreeze
::
MArray
s
->
ST
s
Array
unsafeFreeze
(
MutableByteArray
marr
)
=
ST
$
\
s1
#
->
...
...
src/Data/Text/Encoding.hs
View file @
872cfad3
...
...
@@ -65,7 +65,7 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
import
Control.Exception
(
evaluate
,
try
,
throwIO
,
ErrorCall
(
ErrorCall
))
import
Control.Monad.ST
(
runST
)
import
Data.ByteString
as
B
import
qualified
Data.ByteString.
Short.
Internal
as
SBS
import
qualified
Data.ByteString.Internal
as
B
import
Data.Foldable
(
traverse_
)
import
Data.Text.Encoding.Error
(
OnDecodeError
,
UnicodeException
,
strictDecode
,
lenientDecode
)
import
Data.Text.Internal
(
Text
(
..
),
safe
,
text
)
...
...
@@ -79,7 +79,8 @@ import Foreign.C.Types (CSize)
import
Foreign.Marshal.Utils
(
with
)
import
Foreign.Ptr
(
Ptr
,
minusPtr
,
nullPtr
,
plusPtr
)
import
Foreign.Storable
(
Storable
,
peek
,
poke
)
import
GHC.Base
(
MutableByteArray
#
)
import
GHC.Exts
(
MutableByteArray
#
,
byteArrayContents
#
,
unsafeCoerce
#
)
import
GHC.ForeignPtr
(
ForeignPtr
(
..
),
ForeignPtrContents
(
PlainPtr
))
import
qualified
Data.ByteString.Builder
as
B
import
qualified
Data.ByteString.Builder.Internal
as
B
hiding
(
empty
,
append
)
import
qualified
Data.ByteString.Builder.Prim
as
BP
...
...
@@ -436,9 +437,16 @@ encodeUtf8BuilderEscaped be =
-- | Encode text using UTF-8 encoding.
encodeUtf8
::
Text
->
ByteString
encodeUtf8
(
Text
(
A
.
ByteArray
arr
)
off
len
)
encodeUtf8
(
Text
arr
off
len
)
|
len
==
0
=
B
.
empty
|
otherwise
=
B
.
take
len
$
B
.
drop
off
$
SBS
.
fromShort
$
SBS
.
SBS
arr
-- It would be easier to use Data.ByteString.Short.fromShort and slice later,
-- but this is undesirable when len is significantly smaller than length arr.
|
otherwise
=
unsafeDupablePerformIO
$
do
marr
@
(
A
.
MutableByteArray
mba
)
<-
unsafeSTToIO
$
A
.
newPinned
len
unsafeSTToIO
$
A
.
copyI
len
marr
0
arr
off
let
fp
=
ForeignPtr
(
byteArrayContents
#
(
unsafeCoerce
#
mba
))
(
PlainPtr
mba
)
pure
$
B
.
fromForeignPtr
fp
0
len
-- | Decode text from little endian UTF-16 encoding.
decodeUtf16LEWith
::
OnDecodeError
->
ByteString
->
Text
...
...
src/Data/Text/Lazy/Encoding.hs
View file @
872cfad3
...
...
@@ -55,7 +55,6 @@ import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
import
Data.Word
(
Word8
)
import
qualified
Data.ByteString
as
S
import
qualified
Data.ByteString.Builder
as
B
import
qualified
Data.ByteString.Builder.Extra
as
B
(
safeStrategy
,
toLazyByteStringWith
)
import
qualified
Data.ByteString.Builder.Prim
as
BP
import
qualified
Data.ByteString.Lazy
as
B
import
qualified
Data.ByteString.Lazy.Internal
as
B
...
...
@@ -139,17 +138,7 @@ decodeUtf8' bs = unsafeDupablePerformIO $ do
-- | Encode text using UTF-8 encoding.
encodeUtf8
::
Text
->
B
.
ByteString
encodeUtf8
Empty
=
B
.
empty
encodeUtf8
lt
@
(
Chunk
t
_
)
=
B
.
toLazyByteStringWith
strategy
B
.
empty
$
encodeUtf8Builder
lt
where
-- To improve our small string performance, we use a strategy that
-- allocates a buffer that is guaranteed to be large enough for the
-- encoding of the first chunk, but not larger than the default
-- B.smallChunkSize. We clamp the firstChunkSize to ensure that we don't
-- generate too large buffers which hamper streaming.
firstChunkSize
=
min
B
.
smallChunkSize
(
4
*
(
T
.
length
t
+
1
))
strategy
=
B
.
safeStrategy
firstChunkSize
B
.
defaultChunkSize
encodeUtf8
=
foldrChunks
(
B
.
Chunk
.
TE
.
encodeUtf8
)
B
.
Empty
-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
--
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment