From 9ead2aba5ac72f4acc74b12ec45972f865600916 Mon Sep 17 00:00:00 2001 From: Moritz Angermann <moritz.angermann@gmail.com> Date: Sun, 1 Nov 2020 15:15:16 +0800 Subject: [PATCH] [Cmm Sized] Adjust Int/Word --- src/Data/Text/Array.hs | 13 +++++++++-- src/Data/Text/Internal/Encoding/Utf16.hs | 16 ++++++++++---- src/Data/Text/Internal/Encoding/Utf8.hs | 28 +++++++++++++++--------- src/Data/Text/Internal/Unsafe/Char.hs | 18 +++++++++++---- src/Data/Text/Internal/Unsafe/Shift.hs | 21 +++++++++++++----- text.cabal | 2 +- 6 files changed, 72 insertions(+), 26 deletions(-) diff --git a/src/Data/Text/Array.hs b/src/Data/Text/Array.hs index cf5cb8cb..5a1415be 100644 --- a/src/Data/Text/Array.hs +++ b/src/Data/Text/Array.hs @@ -78,6 +78,15 @@ import GHC.ST (ST(..), runST) import GHC.Word (Word16(..)) import Prelude hiding (length, read) +#if MIN_VERSION_ghc_prim(0,8,0) +import GHC.Base (narrowWord16#, extendWord16#) +#else +import GHC.Prim (Word#) +narrowWord16#, extendWord16# :: Word# -> Word# +narrowWord16# w = w +extendWord16# w = w +#endif + -- | Immutable array type. -- -- The 'Array' constructor is exposed since @text-1.1.1.3@ @@ -153,7 +162,7 @@ bytesInArray n = n `shiftL` 1 unsafeIndex :: Array -> Int -> Word16 unsafeIndex Array{..} i@(I# i#) = CHECK_BOUNDS("unsafeIndex",aLen,i) - case indexWord16Array# aBA i# of r# -> (W16# r#) + case indexWord16Array# aBA i# of r# -> (W16# (narrowWord16# r#)) {-# INLINE unsafeIndex #-} -- | Unchecked write of a mutable array. May return garbage or crash @@ -161,7 +170,7 @@ unsafeIndex Array{..} i@(I# i#) = unsafeWrite :: MArray s -> Int -> Word16 -> ST s () unsafeWrite MArray{..} i@(I# i#) (W16# e#) = ST $ \s1# -> CHECK_BOUNDS("unsafeWrite",maLen,i) - case writeWord16Array# maBA i# e# s1# of + case writeWord16Array# maBA i# (extendWord16# e#) s1# of s2# -> (# s2#, () #) {-# INLINE unsafeWrite #-} diff --git a/src/Data/Text/Internal/Encoding/Utf16.hs b/src/Data/Text/Internal/Encoding/Utf16.hs index e5e3c49e..d2ad6e48 100644 --- a/src/Data/Text/Internal/Encoding/Utf16.hs +++ b/src/Data/Text/Internal/Encoding/Utf16.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE MagicHash, BangPatterns #-} +{-# LANGUAGE MagicHash, BangPatterns, CPP #-} -- | -- Module : Data.Text.Internal.Encoding.Utf16 @@ -23,14 +23,22 @@ module Data.Text.Internal.Encoding.Utf16 , validate2 ) where -import GHC.Exts +import GHC.Exts hiding (extendWord16#) import GHC.Word (Word16(..)) +#if MIN_VERSION_ghc_prim(0,8,0) +import GHC.Base (extendWord16#) +#else +import GHC.Prim (Word#) +extendWord16# :: Word# -> Word# +extendWord16# w = w +#endif + chr2 :: Word16 -> Word16 -> Char chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#)) where - !x# = word2Int# a# - !y# = word2Int# b# + !x# = word2Int# (extendWord16# a#) + !y# = word2Int# (extendWord16# b#) !upper# = uncheckedIShiftL# (x# -# 0xD800#) 10# !lower# = y# -# 0xDC00# {-# INLINE chr2 #-} diff --git a/src/Data/Text/Internal/Encoding/Utf8.hs b/src/Data/Text/Internal/Encoding/Utf8.hs index 5ec469b2..e378d8c1 100644 --- a/src/Data/Text/Internal/Encoding/Utf8.hs +++ b/src/Data/Text/Internal/Encoding/Utf8.hs @@ -43,9 +43,17 @@ import Control.Exception (assert) import Data.Bits ((.&.)) import Data.Text.Internal.Unsafe.Char (ord) import Data.Text.Internal.Unsafe.Shift (shiftR) -import GHC.Exts +import GHC.Exts hiding (extendWord8#) import GHC.Word (Word8(..)) +#if MIN_VERSION_ghc_prim(0,8,0) +import GHC.Base (extendWord8#) +#else +import GHC.Prim (Word#) +extendWord8# :: Word# -> Word# +extendWord8# w = w +#endif + default(Int) between :: Word8 -- ^ byte to check @@ -94,8 +102,8 @@ ord4 c = chr2 :: Word8 -> Word8 -> Char chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#)) where - !y1# = word2Int# x1# - !y2# = word2Int# x2# + !y1# = word2Int# (extendWord8# x1#) + !y2# = word2Int# (extendWord8# x2#) !z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6# !z2# = y2# -# 0x80# {-# INLINE chr2 #-} @@ -103,9 +111,9 @@ chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#)) chr3 :: Word8 -> Word8 -> Word8 -> Char chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#)) where - !y1# = word2Int# x1# - !y2# = word2Int# x2# - !y3# = word2Int# x3# + !y1# = word2Int# (extendWord8# x1#) + !y2# = word2Int# (extendWord8# x2#) + !y3# = word2Int# (extendWord8# x3#) !z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12# !z2# = uncheckedIShiftL# (y2# -# 0x80#) 6# !z3# = y3# -# 0x80# @@ -115,10 +123,10 @@ chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) = C# (chr# (z1# +# z2# +# z3# +# z4#)) where - !y1# = word2Int# x1# - !y2# = word2Int# x2# - !y3# = word2Int# x3# - !y4# = word2Int# x4# + !y1# = word2Int# (extendWord8# x1#) + !y2# = word2Int# (extendWord8# x2#) + !y3# = word2Int# (extendWord8# x3#) + !y4# = word2Int# (extendWord8# x4#) !z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18# !z2# = uncheckedIShiftL# (y2# -# 0x80#) 12# !z3# = uncheckedIShiftL# (y3# -# 0x80#) 6# diff --git a/src/Data/Text/Internal/Unsafe/Char.hs b/src/Data/Text/Internal/Unsafe/Char.hs index d208e3f0..285fb50a 100644 --- a/src/Data/Text/Internal/Unsafe/Char.hs +++ b/src/Data/Text/Internal/Unsafe/Char.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, MagicHash #-} +{-# LANGUAGE CPP, MagicHash, CPP #-} -- | -- Module : Data.Text.Internal.Unsafe.Char @@ -36,20 +36,30 @@ import GHC.Exts (Char(..), Int(..), chr#, ord#, word2Int#) import GHC.Word (Word8(..), Word16(..), Word32(..)) import qualified Data.Text.Array as A +#if MIN_VERSION_ghc_prim(0,8,0) +import GHC.Base (extendWord8#, extendWord16#, extendWord32#) +#else +import GHC.Prim (Word#) +extendWord8#, extendWord16#, extendWord32# :: Word# -> Word# +extendWord8# w = w +extendWord16# w = w +extendWord32# w = w +#endif + ord :: Char -> Int ord (C# c#) = I# (ord# c#) {-# INLINE ord #-} unsafeChr :: Word16 -> Char -unsafeChr (W16# w#) = C# (chr# (word2Int# w#)) +unsafeChr (W16# w#) = C# (chr# (word2Int# (extendWord16# w#))) {-# INLINE unsafeChr #-} unsafeChr8 :: Word8 -> Char -unsafeChr8 (W8# w#) = C# (chr# (word2Int# w#)) +unsafeChr8 (W8# w#) = C# (chr# (word2Int# (extendWord8# w#))) {-# INLINE unsafeChr8 #-} unsafeChr32 :: Word32 -> Char -unsafeChr32 (W32# w#) = C# (chr# (word2Int# w#)) +unsafeChr32 (W32# w#) = C# (chr# (word2Int# (extendWord32# w#))) {-# INLINE unsafeChr32 #-} -- | Write a character into the array at the given offset. Returns diff --git a/src/Data/Text/Internal/Unsafe/Shift.hs b/src/Data/Text/Internal/Unsafe/Shift.hs index b2fef9b6..1e0969ff 100644 --- a/src/Data/Text/Internal/Unsafe/Shift.hs +++ b/src/Data/Text/Internal/Unsafe/Shift.hs @@ -1,4 +1,5 @@ {-# LANGUAGE MagicHash #-} +{-# LANGUAGE CPP #-} -- | -- Module : Data.Text.Internal.Unsafe.Shift @@ -21,8 +22,18 @@ module Data.Text.Internal.Unsafe.Shift ) where -- import qualified Data.Bits as Bits -import GHC.Base import GHC.Word +#if MIN_VERSION_ghc_prim(0,8,0) +import GHC.Base +#else +import GHC.Base hiding (narrowWord16#, extendWord16#, narrowWord32#, extendWord32#) +narrowWord16#, extendWord16#, narrowWord32#, extendWord32# :: Word# -> Word# +narrowWord16# w = w +extendWord16# w = w +narrowWord32# w = w +extendWord32# w = w +#endif + -- | This is a workaround for poor optimisation in GHC 6.8.2. It -- fails to notice constant-width shifts, and adds a test and branch @@ -36,17 +47,17 @@ class UnsafeShift a where instance UnsafeShift Word16 where {-# INLINE shiftL #-} - shiftL (W16# x#) (I# i#) = W16# (narrow16Word# (x# `uncheckedShiftL#` i#)) + shiftL (W16# x#) (I# i#) = W16# (narrowWord16# (narrow16Word# ((extendWord16# x#) `uncheckedShiftL#` i#))) {-# INLINE shiftR #-} - shiftR (W16# x#) (I# i#) = W16# (x# `uncheckedShiftRL#` i#) + shiftR (W16# x#) (I# i#) = W16# (narrowWord16# ((extendWord16# x#) `uncheckedShiftRL#` i#)) instance UnsafeShift Word32 where {-# INLINE shiftL #-} - shiftL (W32# x#) (I# i#) = W32# (narrow32Word# (x# `uncheckedShiftL#` i#)) + shiftL (W32# x#) (I# i#) = W32# (narrowWord32# (narrow32Word# ((extendWord32# x#) `uncheckedShiftL#` i#))) {-# INLINE shiftR #-} - shiftR (W32# x#) (I# i#) = W32# (x# `uncheckedShiftRL#` i#) + shiftR (W32# x#) (I# i#) = W32# (narrowWord32# ((extendWord32# x#) `uncheckedShiftRL#` i#)) instance UnsafeShift Word64 where {-# INLINE shiftL #-} diff --git a/text.cabal b/text.cabal index b7060c2e..647fea02 100644 --- a/text.cabal +++ b/text.cabal @@ -162,7 +162,7 @@ library base >= 4.3 && < 5, binary >= 0.5 && < 0.9, deepseq >= 1.1 && < 1.5, - ghc-prim >= 0.2 && < 0.8, + ghc-prim >= 0.2 && < 0.9, template-haskell >= 2.5 && < 2.18 if flag(bytestring-builder) -- GitLab