Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Glasgow Haskell Compiler
Packages
text
Commits
2e632506
Commit
2e632506
authored
Jul 14, 2021
by
Bodigrim
Browse files
Enable asserts in ord{2,3,4} and inline
parent
6b1b8938
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/Data/Text/Internal/Encoding/Utf8.hs
View file @
2e632506
...
...
@@ -35,6 +35,10 @@ module Data.Text.Internal.Encoding.Utf8
,
validate4
)
where
#
if
defined
(
ASSERTS
)
import
Control.Exception
(
assert
)
import
GHC.Stack
(
HasCallStack
)
#
endif
import
Data.Bits
(
Bits
(
..
))
import
Data.Char
(
ord
)
import
GHC.Exts
...
...
@@ -71,34 +75,48 @@ utf8LengthByLeader w
|
w
<
0xF0
=
3
|
otherwise
=
4
ord2
::
Char
->
(
Word8
,
Word8
)
ord2
::
#
if
defined
(
ASSERTS
)
HasCallStack
=>
#
endif
Char
->
(
Word8
,
Word8
)
ord2
c
=
-- ord2 is used only in test suite to construct a deliberately invalid ByteString,
-- actually violating the assertion, so it is commented out
-- assert (n >= 0x80 && n <= 0x07ff)
#
if
defined
(
ASSERTS
)
assert
(
n
>=
0x80
&&
n
<=
0x07ff
)
#
endif
(
x1
,
x2
)
where
n
=
ord
c
x1
=
intToWord8
$
(
n
`
shiftR
`
6
)
+
0xC0
x2
=
intToWord8
$
(
n
.&.
0x3F
)
+
0x80
{-# INLINE ord2 #-}
ord3
::
Char
->
(
Word8
,
Word8
,
Word8
)
ord3
::
#
if
defined
(
ASSERTS
)
HasCallStack
=>
#
endif
Char
->
(
Word8
,
Word8
,
Word8
)
ord3
c
=
-- ord3 is used only in test suite to construct a deliberately invalid ByteString,
-- actually violating the assertion, so it is commented out
-- assert (n >= 0x0800 && n <= 0xffff)
#
if
defined
(
ASSERTS
)
assert
(
n
>=
0x0800
&&
n
<=
0xffff
)
#
endif
(
x1
,
x2
,
x3
)
where
n
=
ord
c
x1
=
intToWord8
$
(
n
`
shiftR
`
12
)
+
0xE0
x2
=
intToWord8
$
((
n
`
shiftR
`
6
)
.&.
0x3F
)
+
0x80
x3
=
intToWord8
$
(
n
.&.
0x3F
)
+
0x80
{-# INLINE ord3 #-}
ord4
::
Char
->
(
Word8
,
Word8
,
Word8
,
Word8
)
ord4
::
#
if
defined
(
ASSERTS
)
HasCallStack
=>
#
endif
Char
->
(
Word8
,
Word8
,
Word8
,
Word8
)
ord4
c
=
-- ord4 is used only in test suite to construct a deliberately invalid ByteString,
-- actually violating the assertion, so it is commented out
-- assert (n >= 0x10000)
#
if
defined
(
ASSERTS
)
assert
(
n
>=
0x10000
)
#
endif
(
x1
,
x2
,
x3
,
x4
)
where
n
=
ord
c
...
...
@@ -106,6 +124,7 @@ ord4 c =
x2
=
intToWord8
$
((
n
`
shiftR
`
12
)
.&.
0x3F
)
+
0x80
x3
=
intToWord8
$
((
n
`
shiftR
`
6
)
.&.
0x3F
)
+
0x80
x4
=
intToWord8
$
(
n
.&.
0x3F
)
+
0x80
{-# INLINE ord4 #-}
chr2
::
Word8
->
Word8
->
Char
chr2
(
W8
#
x1
#
)
(
W8
#
x2
#
)
=
C
#
(
chr
#
(
z1
#
+#
z2
#
))
...
...
@@ -127,7 +146,7 @@ chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
!
z3
#
=
y3
#
-#
0x80
#
{-# INLINE chr3 #-}
chr4
::
Word8
->
Word8
->
Word8
->
Word8
->
Char
chr4
::
Word8
->
Word8
->
Word8
->
Word8
->
Char
chr4
(
W8
#
x1
#
)
(
W8
#
x2
#
)
(
W8
#
x3
#
)
(
W8
#
x4
#
)
=
C
#
(
chr
#
(
z1
#
+#
z2
#
+#
z3
#
+#
z4
#
))
where
...
...
tests/Tests/Properties/Transcoding.hs
View file @
2e632506
...
...
@@ -7,7 +7,7 @@ module Tests.Properties.Transcoding
)
where
import
Control.Applicative
((
<$>
),
(
<*>
))
import
Data.Bits
((
.&.
))
import
Data.Bits
((
.&.
)
,
shiftR
)
import
Data.Char
(
chr
,
ord
)
import
Data.Text.Encoding.Error
(
UnicodeException
)
import
Data.Text.Internal.Encoding.Utf8
(
ord2
,
ord3
,
ord4
)
...
...
@@ -141,21 +141,21 @@ genInvalidUTF8 = B.pack <$> oneof [
-- short 4-byte sequence
,
(
:
)
<$>
choose
(
0xF0
,
0xF4
)
<*>
upTo
2
contByte
-- overlong encoding
,
do
k
<-
choose
(
0
,
0xFFFF
)
let
c
=
chr
k
,
do
k
<-
choose
(
0
::
Int
,
0xFFFF
)
case
k
of
_
|
k
<
0x80
->
oneof
[
let
(
w
,
x
)
=
ord2
c
in
return
[
w
,
x
]
,
let
(
w
,
x
,
y
)
=
ord3
c
in
return
[
w
,
x
,
y
]
,
let
(
w
,
x
,
y
,
z
)
=
ord4
c
in
return
[
w
,
x
,
y
,
z
]
]
|
k
<
0x7FF
->
oneof
[
let
(
w
,
x
,
y
)
=
ord3
c
in
return
[
w
,
x
,
y
]
,
let
(
w
,
x
,
y
,
z
)
=
ord4
c
in
return
[
w
,
x
,
y
,
z
]
]
|
otherwise
->
let
(
w
,
x
,
y
,
z
)
=
ord4
c
in
return
[
w
,
x
,
y
,
z
]
_
|
k
<
0x80
->
elements
[
ord2_
k
,
ord3_
k
,
ord4_
k
]
|
k
<
0x7FF
->
elements
[
ord3_
k
,
ord4_
k
]
|
otherwise
->
return
(
ord4_
k
)
]
where
contByte
=
(
0x80
+
)
<$>
choose
(
0
,
0x3f
)
upTo
n
gen
=
do
k
<-
choose
(
0
,
n
)
vectorOf
k
gen
-- Data.Text.Internal.Encoding.Utf8.ord{2,3,4} withous sanity checks
ord2_
n
=
map
fromIntegral
[(
n
`
shiftR
`
6
)
+
0xC0
,
(
n
.&.
0x3F
)
+
0x80
]
ord3_
n
=
map
fromIntegral
[(
n
`
shiftR
`
12
)
+
0xE0
,
((
n
`
shiftR
`
6
)
.&.
0x3F
)
+
0x80
,
(
n
.&.
0x3F
)
+
0x80
]
ord4_
n
=
map
fromIntegral
[(
n
`
shiftR
`
18
)
+
0xF0
,
((
n
`
shiftR
`
12
)
.&.
0x3F
)
+
0x80
,
((
n
`
shiftR
`
6
)
.&.
0x3F
)
+
0x80
,
(
n
.&.
0x3F
)
+
0x80
]
decodeLL
::
BL
.
ByteString
->
TL
.
Text
decodeLL
=
EL
.
decodeUtf8With
E
.
lenientDecode
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment