Commit 8dc09ca9 authored by Bodigrim's avatar Bodigrim
Browse files

Implement utf8LengthByLeader via bit magic

parent d057bd74
......@@ -39,7 +39,7 @@ module Data.Text.Internal.Encoding.Utf8
import Control.Exception (assert)
import GHC.Stack (HasCallStack)
#endif
import Data.Bits (Bits(..))
import Data.Bits (Bits(..), FiniteBits(..))
import Data.Char (ord)
import GHC.Exts
import GHC.Word (Word8(..))
......@@ -68,12 +68,21 @@ utf8Length :: Char -> Int
utf8Length (C# c) = I# ((1# +# geChar# c (chr# 0x80#)) +# (geChar# c (chr# 0x800#) +# geChar# c (chr# 0x10000#)))
{-# INLINE utf8Length #-}
-- This is a branchless version of
-- utf8LengthByLeader w
-- | w < 0x80 = 1
-- | w < 0xE0 = 2
-- | w < 0xF0 = 3
-- | otherwise = 4
--
-- c `xor` I# (c# <=# 0#) is a branchless equivalent of c `max` 1.
-- It is crucial to write c# <=# 0# and not c# ==# 0#, otherwise
-- GHC is tempted to "optimize" by introduction of branches.
utf8LengthByLeader :: Word8 -> Int
utf8LengthByLeader w
| w < 0x80 = 1
| w < 0xE0 = 2
| w < 0xF0 = 3
| otherwise = 4
utf8LengthByLeader w = c `xor` I# (c# <=# 0#)
where
!c@(I# c#) = countLeadingZeros (complement w)
{-# INLINE utf8LengthByLeader #-}
ord2 ::
#if defined(ASSERTS)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment