Commit 634c5657 authored by Bodigrim's avatar Bodigrim
Browse files

Employ lexicographical comparison for compare

parent ce9916fb
......@@ -411,17 +411,11 @@ textDataType = mkDataType "Data.Text.Text" [packConstr]
-- | /O(n)/ Compare two 'Text' values lexicographically.
compareText :: Text -> Text -> Ordering
compareText ta@(Text _arrA _offA lenA) tb@(Text _arrB _offB lenB)
| lenA == 0 && lenB == 0 = EQ
| otherwise = go 0 0
where
go !i !j
| i >= lenA || j >= lenB = compare lenA lenB
| a < b = LT
| a > b = GT
| otherwise = go (i+di) (j+dj)
where Iter a di = iter ta i
Iter b dj = iter tb j
compareText (Text arrA offA lenA) (Text arrB offB lenB) =
A.compare arrA offA arrB offB (min lenA lenB) <> compare lenA lenB
-- This is not a mistake: on contrary to UTF-16 (https://github.com/haskell/text/pull/208),
-- lexicographic ordering of UTF-8 encoded strings matches lexicographic ordering
-- of underlying bytearrays, no decoding is needed.
-- -----------------------------------------------------------------------------
-- * Conversion to/from 'Text'
......
......@@ -33,6 +33,7 @@ module Data.Text.Array
, copyI
, empty
, equal
, compare
, run
, run2
, toList
......@@ -56,7 +57,8 @@ import Foreign.C.Types (CInt(..))
import GHC.Exts hiding (toList)
import GHC.ST (ST(..), runST)
import GHC.Word (Word8(..))
import Prelude hiding (length, read)
import qualified Prelude
import Prelude hiding (length, read, compare)
-- | Immutable array type.
data Array = ByteArray ByteArray#
......@@ -250,13 +252,23 @@ copyI count@(I# count#) (MutableByteArray dst#) dstOff@(I# dstOff#) (ByteArray s
-- | Compare portions of two arrays for equality. No bounds checking
-- is performed.
equal :: Array -- ^ First
equal :: Array -> Int -> Array -> Int -> Int -> Bool
equal src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count == 0
{-# INLINE equal #-}
-- | Compare portions of two arrays. No bounds checking is performed.
compare :: Array -> Int -> Array -> Int -> Int -> Ordering
compare src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count `Prelude.compare` 0
{-# INLINE compare #-}
compareInternal
:: Array -- ^ First
-> Int -- ^ Offset into first
-> Array -- ^ Second
-> Int -- ^ Offset into second
-> Int -- ^ Count
-> Bool
equal (ByteArray src1#) (I# off1#) (ByteArray src2#) (I# off2#) (I# count#) = i == 0
-> Int
compareInternal (ByteArray src1#) (I# off1#) (ByteArray src2#) (I# off2#) (I# count#) = i
where
#if MIN_VERSION_base(4,11,0)
i = I# (compareByteArrays# src1# off1# src2# off2# count#)
......@@ -266,4 +278,4 @@ equal (ByteArray src1#) (I# off1#) (ByteArray src2#) (I# off2#) (I# count#) = i
foreign import ccall unsafe "_hs_text_memcmp" memcmp
:: ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> IO CInt
#endif
{-# INLINE equal #-}
{-# INLINE compareInternal #-}
......@@ -215,6 +215,7 @@ import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import qualified Data.Text as T
import qualified Data.Text.Array as A
import qualified Data.Text.Internal as T
import qualified Data.Text.Internal.Fusion.Common as S
import qualified Data.Text.Unsafe as T
......@@ -286,18 +287,14 @@ compareText :: Text -> Text -> Ordering
compareText Empty Empty = EQ
compareText Empty _ = LT
compareText _ Empty = GT
compareText (Chunk a0 as) (Chunk b0 bs) = outer a0 b0
where
outer ta@(T.Text arrA offA lenA) tb@(T.Text arrB offB lenB) = go 0 0
where
go !i !j
| i >= lenA = compareText as (chunk (T.Text arrB (offB+j) (lenB-j)) bs)
| j >= lenB = compareText (chunk (T.Text arrA (offA+i) (lenA-i)) as) bs
| a < b = LT
| a > b = GT
| otherwise = go (i+di) (j+dj)
where T.Iter a di = T.iter ta i
T.Iter b dj = T.iter tb j
compareText (Chunk (T.Text arrA offA lenA) as) (Chunk (T.Text arrB offB lenB) bs) =
A.compare arrA offA arrB offB (min lenA lenB) <> case lenA `compare` lenB of
LT -> compareText as (Chunk (T.Text arrB (offB + lenA) (lenB - lenA)) bs)
EQ -> compareText as bs
GT -> compareText (Chunk (T.Text arrA (offA + lenB) (lenA - lenB)) as) bs
-- This is not a mistake: on contrary to UTF-16 (https://github.com/haskell/text/pull/208),
-- lexicographic ordering of UTF-8 encoded strings matches lexicographic ordering
-- of underlying bytearrays, no decoding is needed.
instance Show Text where
showsPrec p ps r = showsPrec p (unpack ps) r
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment