Commit 2e632506 authored by Bodigrim's avatar Bodigrim
Browse files

Enable asserts in ord{2,3,4} and inline

parent 6b1b8938
......@@ -35,6 +35,10 @@ module Data.Text.Internal.Encoding.Utf8
, validate4
) where
#if defined(ASSERTS)
import Control.Exception (assert)
import GHC.Stack (HasCallStack)
#endif
import Data.Bits (Bits(..))
import Data.Char (ord)
import GHC.Exts
......@@ -71,34 +75,48 @@ utf8LengthByLeader w
| w < 0xF0 = 3
| otherwise = 4
ord2 :: Char -> (Word8,Word8)
ord2 ::
#if defined(ASSERTS)
HasCallStack =>
#endif
Char -> (Word8,Word8)
ord2 c =
-- ord2 is used only in test suite to construct a deliberately invalid ByteString,
-- actually violating the assertion, so it is commented out
-- assert (n >= 0x80 && n <= 0x07ff)
#if defined(ASSERTS)
assert (n >= 0x80 && n <= 0x07ff)
#endif
(x1,x2)
where
n = ord c
x1 = intToWord8 $ (n `shiftR` 6) + 0xC0
x2 = intToWord8 $ (n .&. 0x3F) + 0x80
{-# INLINE ord2 #-}
ord3 :: Char -> (Word8,Word8,Word8)
ord3 ::
#if defined(ASSERTS)
HasCallStack =>
#endif
Char -> (Word8,Word8,Word8)
ord3 c =
-- ord3 is used only in test suite to construct a deliberately invalid ByteString,
-- actually violating the assertion, so it is commented out
-- assert (n >= 0x0800 && n <= 0xffff)
#if defined(ASSERTS)
assert (n >= 0x0800 && n <= 0xffff)
#endif
(x1,x2,x3)
where
n = ord c
x1 = intToWord8 $ (n `shiftR` 12) + 0xE0
x2 = intToWord8 $ ((n `shiftR` 6) .&. 0x3F) + 0x80
x3 = intToWord8 $ (n .&. 0x3F) + 0x80
{-# INLINE ord3 #-}
ord4 :: Char -> (Word8,Word8,Word8,Word8)
ord4 ::
#if defined(ASSERTS)
HasCallStack =>
#endif
Char -> (Word8,Word8,Word8,Word8)
ord4 c =
-- ord4 is used only in test suite to construct a deliberately invalid ByteString,
-- actually violating the assertion, so it is commented out
-- assert (n >= 0x10000)
#if defined(ASSERTS)
assert (n >= 0x10000)
#endif
(x1,x2,x3,x4)
where
n = ord c
......@@ -106,6 +124,7 @@ ord4 c =
x2 = intToWord8 $ ((n `shiftR` 12) .&. 0x3F) + 0x80
x3 = intToWord8 $ ((n `shiftR` 6) .&. 0x3F) + 0x80
x4 = intToWord8 $ (n .&. 0x3F) + 0x80
{-# INLINE ord4 #-}
chr2 :: Word8 -> Word8 -> Char
chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
......@@ -127,7 +146,7 @@ chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
!z3# = y3# -# 0x80#
{-# INLINE chr3 #-}
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
C# (chr# (z1# +# z2# +# z3# +# z4#))
where
......
......@@ -7,7 +7,7 @@ module Tests.Properties.Transcoding
) where
import Control.Applicative ((<$>), (<*>))
import Data.Bits ((.&.))
import Data.Bits ((.&.), shiftR)
import Data.Char (chr, ord)
import Data.Text.Encoding.Error (UnicodeException)
import Data.Text.Internal.Encoding.Utf8 (ord2, ord3, ord4)
......@@ -141,21 +141,21 @@ genInvalidUTF8 = B.pack <$> oneof [
-- short 4-byte sequence
, (:) <$> choose (0xF0, 0xF4) <*> upTo 2 contByte
-- overlong encoding
, do k <- choose (0,0xFFFF)
let c = chr k
, do k <- choose (0 :: Int, 0xFFFF)
case k of
_ | k < 0x80 -> oneof [ let (w,x) = ord2 c in return [w,x]
, let (w,x,y) = ord3 c in return [w,x,y]
, let (w,x,y,z) = ord4 c in return [w,x,y,z] ]
| k < 0x7FF -> oneof [ let (w,x,y) = ord3 c in return [w,x,y]
, let (w,x,y,z) = ord4 c in return [w,x,y,z] ]
| otherwise -> let (w,x,y,z) = ord4 c in return [w,x,y,z]
_ | k < 0x80 -> elements [ord2_ k, ord3_ k, ord4_ k]
| k < 0x7FF -> elements [ord3_ k, ord4_ k]
| otherwise -> return (ord4_ k)
]
where
contByte = (0x80 +) <$> choose (0, 0x3f)
upTo n gen = do
k <- choose (0,n)
vectorOf k gen
-- Data.Text.Internal.Encoding.Utf8.ord{2,3,4} withous sanity checks
ord2_ n = map fromIntegral [(n `shiftR` 6) + 0xC0, (n .&. 0x3F) + 0x80]
ord3_ n = map fromIntegral [(n `shiftR` 12) + 0xE0, ((n `shiftR` 6) .&. 0x3F) + 0x80, (n .&. 0x3F) + 0x80]
ord4_ n = map fromIntegral [(n `shiftR` 18) + 0xF0, ((n `shiftR` 12) .&. 0x3F) + 0x80, ((n `shiftR` 6) .&. 0x3F) + 0x80, (n .&. 0x3F) + 0x80]
decodeLL :: BL.ByteString -> TL.Text
decodeLL = EL.decodeUtf8With E.lenientDecode
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment