Commit 271b3a66 authored by Bodigrim's avatar Bodigrim
Browse files

Bump version and update changelog

parent 7532a9f8
### 1.3
### 2.0
* [Switch internal representation of text from UTF-16 to UTF-8](https://github.com/haskell/text/pull/365):
* Functions in `Data.Text.Array` now operate over arrays of `Word8` instead of `Word16`.
* Rename constructors of `Array` and `MArray` to `ByteArray` and `MutableByteArray`.
* Rename functions and types in `Data.Text.Foreign` to reflect switch
from `Word16` to `Word8`.
* Rename slicing functions in `Data.Text.Unsafe` to reflect switch
from `Word16` to `Word8`.
* Rename `Data.Text.Internal.Unsafe.Char.unsafeChr` to `unsafeChr16`.
* Change semantics and order of arguments of `Data.Text.Array.copyI`:
pass length, not end offset.
* Extend `Data.Text.Internal.Encoding.Utf8` to provide more UTF-8 related routines.
* Extend interface of `Data.Text.Array` with more utility functions.
* Add `instance Show Data.Text.Unsafe.Iter`.
* Add `Data.Text.measureOff`.
* Extend `Data.Text.Unsafe` with `iterArray` and `reverseIterArray`.
* Export `Data.Text.Internal.Lazy.equal`.
* Export `Data.Text.Internal.append`.
* Add `Data.Text.Internal.Private.spanAscii_`.
* Replacement characters in `decodeUtf8With` are no longer limited to Basic Multilingual Plane.
* [Disable implicit fusion rules](https://github.com/haskell/text/pull/348)
* [Add `Data.Text.Encoding.decodeUtf8Lenient`](https://github.com/haskell/text/pull/342)
* [Remove `Data.Text.Internal.Unsafe.Shift`](https://github.com/haskell/text/pull/343)
......
......@@ -1225,6 +1225,8 @@ take n t@(Text arr off len)
--
-- This function is used to implement 'take', 'drop', 'splitAt' and 'length'
-- and is useful on its own in streaming and parsing libraries.
--
-- @since 2.0
measureOff :: Int -> Text -> Int
measureOff !n (Text (A.ByteArray arr) off len) = if len == 0 then 0 else
cSsizeToInt $ unsafeDupablePerformIO $
......
......@@ -80,6 +80,8 @@ new (I# len#)
{-# INLINE new #-}
-- | Create an uninitialized mutable pinned array.
--
-- @since 2.0
newPinned :: forall s. Int -> ST s (MArray s)
newPinned (I# len#)
#if defined(ASSERTS)
......@@ -90,6 +92,7 @@ newPinned (I# len#)
(# s2#, marr# #) -> (# s2#, MutableByteArray marr# #)
{-# INLINE newPinned #-}
-- | @since 2.0
newFilled :: Int -> Int -> ST s (MArray s)
newFilled (I# len#) (I# c#) = ST $ \s1# ->
case newByteArray# len# s1# of
......@@ -97,6 +100,7 @@ newFilled (I# len#) (I# c#) = ST $ \s1# ->
s3# -> (# s3#, MutableByteArray marr# #)
{-# INLINE newFilled #-}
-- | @since 2.0
tile :: MArray s -> Int -> ST s ()
tile marr tileLen = do
totalLen <- getSizeofMArray marr
......@@ -130,8 +134,10 @@ unsafeIndex (ByteArray arr) i@(I# i#) =
case indexWord8Array# arr i# of r# -> (W8# r#)
{-# INLINE unsafeIndex #-}
-- sizeofMutableByteArray# is deprecated, because it is unsafe in the presence of
-- shrinkMutableByteArray# and resizeMutableByteArray#.
-- | 'sizeofMutableByteArray#' is deprecated, because it is unsafe in the presence of
-- 'shrinkMutableByteArray#' and 'resizeMutableByteArray#'.
--
-- @since 2.0
getSizeofMArray :: MArray s -> ST s Int
getSizeofMArray (MutableByteArray marr) = ST $ \s0# ->
case getSizeofMutableByteArray# marr s0# of
......@@ -185,12 +191,14 @@ run2 k = runST (do
return (arr,b))
{-# INLINE run2 #-}
-- | @since 2.0
resizeM :: MArray s -> Int -> ST s (MArray s)
resizeM (MutableByteArray ma) i@(I# i#) = ST $ \s1# ->
case resizeMutableByteArray# ma i# s1# of
(# s2#, newArr #) -> (# s2#, MutableByteArray newArr #)
{-# INLINE resizeM #-}
-- | @since 2.0
shrinkM ::
#if defined(ASSERTS)
HasCallStack =>
......@@ -253,6 +261,8 @@ copyI count@(I# count#) (MutableByteArray dst#) dstOff@(I# dstOff#) (ByteArray s
{-# INLINE copyI #-}
-- | Copy from pointer.
--
-- @since 2.0
copyFromPointer
:: MArray s -- ^ Destination
-> Int -- ^ Destination offset
......@@ -270,6 +280,8 @@ copyFromPointer (MutableByteArray dst#) dstOff@(I# dstOff#) (Ptr src#) count@(I#
{-# INLINE copyFromPointer #-}
-- | Copy to pointer.
--
-- @since 2.0
copyToPointer
:: Array -- ^ Source
-> Int -- ^ Source offset
......@@ -293,6 +305,8 @@ equal src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count == 0
{-# INLINE equal #-}
-- | Compare portions of two arrays. No bounds checking is performed.
--
-- @since 2.0
compare :: Array -> Int -> Array -> Int -> Int -> Ordering
compare src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count `Prelude.compare` 0
{-# INLINE compare #-}
......
......@@ -234,7 +234,7 @@ decodeUtf8With2 onErr bs1@(B.length -> len1) bs2@(B.length -> len2) = runST $ do
, bs <- B.drop (srcOff - len1) (B.take guessUtf8Boundary bs2)
, isValidBS bs = do
withBS bs $ \fp _ -> unsafeIOToST $ unsafeWithForeignPtr fp $ \src ->
unsafeSTToIO $ A.copyP dst dstOff src (len1 + guessUtf8Boundary - srcOff)
unsafeSTToIO $ A.copyFromPointer dst dstOff src (len1 + guessUtf8Boundary - srcOff)
inner (len1 + guessUtf8Boundary) (dstOff + (len1 + guessUtf8Boundary - srcOff))
| dstOff + 4 > dstLen = do
......
......@@ -59,6 +59,8 @@ import qualified Data.Text.Array as A
-- the functions in the 'Data.Text.Encoding' module.
-- | A type representing a number of UTF-8 code units.
--
-- @since 2.0
newtype I8 = I8 Int
deriving (Bounded, Enum, Eq, Integral, Num, Ord, Read, Real, Show)
......@@ -86,6 +88,8 @@ fromPtr ptr (I8 len) = unsafeSTToIO $ do
-- If @n@ would cause the 'Text' to end inside a code point, the
-- end of the prefix will be advanced by several additional 'Word8' units
-- to maintain its validity.
--
-- @since 2.0
takeWord8 :: I8 -> Text -> Text
takeWord8 = (fst .) . splitAtWord8
......@@ -95,6 +99,8 @@ takeWord8 = (fst .) . splitAtWord8
-- If @n@ would cause the 'Text' to begin inside a code point, the
-- beginning of the suffix will be advanced by several additional 'Word8'
-- unit to maintain its validity.
--
-- @since 2.0
dropWord8 :: I8 -> Text -> Text
dropWord8 = (snd .) . splitAtWord8
......
......@@ -68,6 +68,8 @@ between x y z = x >= y && x <= z
-- | ord c < 0x800 = 2
-- | ord c < 0x10000 = 3
-- | otherwise = 4
-- | @since 2.0
utf8Length :: Char -> Int
utf8Length (C# c) = I# ((1# +# geChar# c (chr# 0x80#)) +# (geChar# c (chr# 0x800#) +# geChar# c (chr# 0x10000#)))
{-# INLINE utf8Length #-}
......@@ -82,6 +84,8 @@ utf8Length (C# c) = I# ((1# +# geChar# c (chr# 0x80#)) +# (geChar# c (chr# 0x800
-- c `xor` I# (c# <=# 0#) is a branchless equivalent of c `max` 1.
-- It is crucial to write c# <=# 0# and not c# ==# 0#, otherwise
-- GHC is tempted to "optimize" by introduction of branches.
-- | @since 2.0
utf8LengthByLeader :: Word8 -> Int
utf8LengthByLeader w = c `xor` I# (c# <=# 0#)
where
......@@ -256,11 +260,13 @@ updateState (ByteClass c) (DecoderState s) = DecoderState (W8# el#)
newtype CodePoint = CodePoint Int
-- | @since 2.0
data DecoderResult
= Accept !Char
| Incomplete !DecoderState !CodePoint
| Reject
-- | @since 2.0
utf8DecodeStart :: Word8 -> DecoderResult
utf8DecodeStart w
| st == utf8AcceptState = Accept (chr (word8ToInt w))
......@@ -271,6 +277,7 @@ utf8DecodeStart w
st = updateState cl utf8AcceptState
cp = word8ToInt $ (0xff `shiftR` word8ToInt cl') .&. w
-- | @since 2.0
utf8DecodeContinue :: Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue w st (CodePoint cp)
| st' == utf8AcceptState = Accept (chr cp')
......
......@@ -38,6 +38,8 @@ span_ p t@(Text arr off len) = (# hd,tl #)
-- | For the sake of performance this function does not check
-- that a char is in ASCII range; it is a responsibility of @p@.
--
-- @since 2.0
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ p (Text arr off len) = (# hd, tl #)
where hd = text arr off k
......
......@@ -39,6 +39,7 @@ ord :: Char -> Int
ord (C# c#) = I# (ord# c#)
{-# INLINE ord #-}
-- | @since 2.0
unsafeChr16 :: Word16 -> Char
unsafeChr16 (W16# w#) = C# (chr# (word2Int# (word16ToWord# w#)))
{-# INLINE unsafeChr16 #-}
......
......@@ -79,6 +79,7 @@ iter ::
iter (Text arr off _len) i = iterArray arr (off + i)
{-# INLINE iter #-}
-- | @since 2.0
iterArray :: A.Array -> Int -> Iter
iterArray arr j = Iter chr l
where m0 = A.unsafeIndex arr j
......@@ -107,6 +108,7 @@ reverseIter :: Text -> Int -> Iter
reverseIter (Text arr off _len) i = reverseIterArray arr (off + i)
{-# INLINE reverseIter #-}
-- | @since 2.0
reverseIterArray :: A.Array -> Int -> Iter
reverseIterArray arr j
| m0 < 0x80 = Iter (unsafeChr8 m0) (-1)
......@@ -139,16 +141,22 @@ reverseIter_ (Text arr off _len) i
-- | /O(1)/ Return the length of a 'Text' in units of 'Word8'. This
-- is useful for sizing a target array appropriately before using
-- 'unsafeCopyToPtr'.
--
-- @since 2.0
lengthWord8 :: Text -> Int
lengthWord8 (Text _arr _off len) = len
{-# INLINE lengthWord8 #-}
-- | /O(1)/ Unchecked take of 'k' 'Word8's from the front of a 'Text'.
--
-- @since 2.0
takeWord8 :: Int -> Text -> Text
takeWord8 k (Text arr off _len) = Text arr off k
{-# INLINE takeWord8 #-}
-- | /O(1)/ Unchecked drop of 'k' 'Word8's from the front of a 'Text'.
--
-- @since 2.0
dropWord8 :: Int -> Text -> Text
dropWord8 k (Text arr off len) = Text arr (off+k) (len-k)
{-# INLINE dropWord8 #-}
cabal-version: 2.2
name: text
version: 1.2.5.0
version: 2.0
homepage: https://github.com/haskell/text
bug-reports: https://github.com/haskell/text/issues
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment