Commit eba656a7 authored by Bodigrim's avatar Bodigrim
Browse files

Speed up strict and lazy reading of numbers

parent 55358a4c
......@@ -13,12 +13,14 @@ module Data.Text.Internal.Private
(
runText
, span_
, spanAscii_
) where
import Control.Monad.ST (ST, runST)
import Data.Text.Internal (Text(..), text)
import Data.Text.Unsafe (Iter(..), iter)
import qualified Data.Text.Array as A
import Data.Word (Word8)
#if defined(ASSERTS)
import GHC.Stack (HasCallStack)
......@@ -34,6 +36,17 @@ span_ p t@(Text arr off len) = (# hd,tl #)
where Iter c d = iter t i
{-# INLINE span_ #-}
-- | For the sake of performance this function does not check
-- that a char is in ASCII range; it is a responsibility of @p@.
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ p (Text arr off len) = (# hd, tl #)
where hd = text arr off k
tl = text arr (off + k) (len - k)
!k = loop 0
loop !i | i < len && p (A.unsafeIndex arr (off + i)) = loop (i + 1)
| otherwise = i
{-# INLINE spanAscii_ #-}
runText ::
#if defined(ASSERTS)
HasCallStack =>
......
......@@ -61,9 +61,20 @@ perhaps def m = P $ \t -> case runP m t of
hexDigitToInt :: Char -> Int
hexDigitToInt c
| c >= '0' && c <= '9' = ord c - ord '0'
| c >= 'a' && c <= 'f' = ord c - (ord 'a' - 10)
| otherwise = ord c - (ord 'A' - 10)
| to0 < 10 = wordToInt to0
| toa < 6 = wordToInt toa + 10
| otherwise = wordToInt toA + 10
where
ordW = intToWord (ord c)
to0 = ordW - intToWord (ord '0')
toa = ordW - intToWord (ord 'a')
toA = ordW - intToWord (ord 'A')
digitToInt :: Char -> Int
digitToInt c = ord c - ord '0'
intToWord :: Int -> Word
intToWord = fromIntegral
wordToInt :: Word -> Int
wordToInt = fromIntegral
{-# LANGUAGE OverloadedStrings, CPP #-}
{-# LANGUAGE Safe #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE UnboxedTuples #-}
-- |
-- Module : Data.Text.Lazy.Read
......@@ -21,11 +23,15 @@ module Data.Text.Lazy.Read
) where
import Control.Monad (liftM)
import Data.Char (isDigit, isHexDigit)
import Data.Char (ord)
import Data.Int (Int8, Int16, Int32, Int64)
import Data.Ratio ((%))
import Data.Text.Internal.Read
import Data.Text.Array as A
import Data.Text.Lazy as T
import Data.Text.Internal.Lazy as T (Text(..))
import qualified Data.Text.Internal as T (Text(..))
import qualified Data.Text.Internal.Private as T (spanAscii_)
import Data.Word (Word, Word8, Word16, Word32, Word64)
-- | Read some text. If the read succeeds, return its value and the
......@@ -59,7 +65,7 @@ decimal :: Integral a => Reader a
decimal txt
| T.null h = Left "input does not start with a digit"
| otherwise = Right (T.foldl' go 0 h, t)
where (h,t) = T.span isDigit txt
where (# h, t #) = spanAscii_ (\w -> w - ord8 '0' < 10) txt
go n d = (n * 10 + fromIntegral (digitToInt d))
-- | Read a hexadecimal integer, consisting of an optional leading
......@@ -97,7 +103,7 @@ hex :: Integral a => Reader a
hex txt
| T.null h = Left "input does not start with a hexadecimal digit"
| otherwise = Right (T.foldl' go 0 h, t)
where (h,t) = T.span isHexDigit txt
where (# h, t #) = spanAscii_ (\w -> w - ord8 '0' < 10 || w - ord8 'A' < 6 || w - ord8 'a' < 6) txt
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
......@@ -156,26 +162,30 @@ signa :: Num a => Parser a -> Parser a
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
signa p = do
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
if sign == '+' then p else negate `liftM` p
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
if sign == ord8 '+' then p else negate `liftM` p
char :: (Char -> Bool) -> Parser Char
char p = P $ \t -> case T.uncons t of
Just (c,t') | p c -> Right (c,t')
_ -> Left "character does not match"
charAscii :: (Word8 -> Bool) -> Parser Word8
charAscii p = P $ \case
Empty -> Left "character does not match"
-- len is > 0, unless the internal invariant of Text is violated
Chunk (T.Text arr off len) ts -> let c = A.unsafeIndex arr off in
if p c
then Right (c, if len <= 1 then ts else Chunk (T.Text arr (off + 1) (len - 1)) ts)
else Left "character does not match"
floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
{-# INLINE floaty #-}
floaty f = runP $ do
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
real <- P decimal
T fraction fracDigits <- perhaps (T 0 0) $ do
_ <- char (=='.')
digits <- P $ \t -> Right (int64ToInt . T.length $ T.takeWhile isDigit t, t)
_ <- charAscii (== ord8 '.')
digits <- P $ \t -> Right (let (# hd, _ #) = spanAscii_ (\w -> w - ord8 '0' < 10) t in int64ToInt (T.length hd), t)
n <- P decimal
return $ T n digits
let e c = c == 'e' || c == 'E'
power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int)
let e c = c == ord8 'e' || c == ord8 'E'
power <- perhaps 0 (charAscii e >> signa (P decimal) :: Parser Int)
let n = if fracDigits == 0
then if power == 0
then fromInteger real
......@@ -183,9 +193,23 @@ floaty f = runP $ do
else if power == 0
then f real fraction (10 ^ fracDigits)
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
return $! if sign == '+'
return $! if sign == ord8 '+'
then n
else -n
int64ToInt :: Int64 -> Int
int64ToInt = fromIntegral
ord8 :: Char -> Word8
ord8 = fromIntegral . ord
-- | For the sake of performance this function does not check
-- that a char is in ASCII range; it is a responsibility of @p@.
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ p = loop
where
loop Empty = (# Empty, Empty #)
loop (Chunk t ts) = let (# t', t''@(T.Text _ _ len) #) = T.spanAscii_ p t in
if len == 0
then let (# ts', ts'' #) = loop ts in (# Chunk t ts', ts'' #)
else (# Chunk t' Empty, Chunk t'' ts #)
......@@ -21,11 +21,13 @@ module Data.Text.Read
) where
import Control.Monad (liftM)
import Data.Char (isDigit, isHexDigit)
import Data.Char (ord)
import Data.Int (Int8, Int16, Int32, Int64)
import Data.Ratio ((%))
import Data.Text as T
import Data.Text.Internal.Private (span_)
import Data.Text.Internal as T (Text(..))
import Data.Text.Array as A
import Data.Text.Internal.Private (spanAscii_)
import Data.Text.Internal.Read
import Data.Word (Word, Word8, Word16, Word32, Word64)
......@@ -60,7 +62,7 @@ decimal :: Integral a => Reader a
decimal txt
| T.null h = Left "input does not start with a digit"
| otherwise = Right (T.foldl' go 0 h, t)
where (# h,t #) = span_ isDigit txt
where (# h,t #) = spanAscii_ (\w -> w - ord8 '0' < 10) txt
go n d = (n * 10 + fromIntegral (digitToInt d))
-- | Read a hexadecimal integer, consisting of an optional leading
......@@ -107,7 +109,7 @@ hex :: Integral a => Reader a
hex txt
| T.null h = Left "input does not start with a hexadecimal digit"
| otherwise = Right (T.foldl' go 0 h, t)
where (# h,t #) = span_ isHexDigit txt
where (# h,t #) = spanAscii_ (\w -> w - ord8 '0' < 10 || w - ord8 'A' < 6 || w - ord8 'a' < 6) txt
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
......@@ -166,26 +168,27 @@ signa :: Num a => Parser a -> Parser a
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
signa p = do
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
if sign == '+' then p else negate `liftM` p
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
if sign == ord8 '+' then p else negate `liftM` p
char :: (Char -> Bool) -> Parser Char
char p = P $ \t -> case T.uncons t of
Just (c,t') | p c -> Right (c,t')
_ -> Left "character does not match"
charAscii :: (Word8 -> Bool) -> Parser Word8
charAscii p = P $ \(Text arr off len) -> let c = A.unsafeIndex arr off in
if len > 0 && p c
then Right (c, Text arr (off + 1) (len - 1))
else Left "character does not match"
floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
{-# INLINE floaty #-}
floaty f = runP $ do
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
sign <- perhaps (ord8 '+') $ charAscii (\c -> c == ord8 '-' || c == ord8 '+')
real <- P decimal
T fraction fracDigits <- perhaps (T 0 0) $ do
_ <- char (=='.')
digits <- P $ \t -> Right (T.length $ T.takeWhile isDigit t, t)
_ <- charAscii (== ord8 '.')
digits <- P $ \t -> Right (let (# hd, _ #) = spanAscii_ (\w -> w - ord8 '0' < 10) t in T.length hd, t)
n <- P decimal
return $ T n digits
let e c = c == 'e' || c == 'E'
power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int)
let e c = c == ord8 'e' || c == ord8 'E'
power <- perhaps 0 (charAscii e >> signa (P decimal) :: Parser Int)
let n = if fracDigits == 0
then if power == 0
then fromInteger real
......@@ -193,6 +196,9 @@ floaty f = runP $ do
else if power == 0
then f real fraction (10 ^ fracDigits)
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
return $! if sign == '+'
return $! if sign == ord8 '+'
then n
else -n
ord8 :: Char -> Word8
ord8 = fromIntegral . ord
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment