Skip to content
Snippets Groups Projects
Commit 82b8f491 authored by Alec Theriault's avatar Alec Theriault
Browse files

Faster 'Text' driven parser combinators

Perf only change:

  * use 'getParserState'/'setParserState' to make 'Text'-optimized
    parser combinators
  * minimize uses of 'Data.Text.{pack,unpack,cons,snoc}'
parent d9c87e47
No related branches found
No related tags found
No related merge requests found
......@@ -360,32 +360,34 @@ table = do
parseFirstRow :: Parser Text
parseFirstRow = do
skipHorizontalSpace
-- upper-left corner is +
c <- Parsec.char '+'
cs <- some (Parsec.char '-' <|> Parsec.char '+')
cs <- takeWhile (\c -> c == '-' || c == '+')
-- upper right corner is + too
guard (last cs == '+')
-- upper-left and upper-right corners are `+`
guard (T.length cs >= 2 &&
T.head cs == '+' &&
T.last cs == '+')
-- trailing space
skipHorizontalSpace
_ <- Parsec.newline
return (T.cons c $ T.pack cs)
return cs
parseRestRows :: Int -> Parser Text
parseRestRows l = do
skipHorizontalSpace
bs <- scan predicate l
c <- Parsec.char '|' <|> Parsec.char '+'
bs <- scan predicate (l - 2)
c2 <- Parsec.char '|' <|> Parsec.char '+'
-- Left and right edges are `|` or `+`
guard (T.length bs >= 2 &&
(T.head bs == '|' || T.head bs == '+') &&
(T.last bs == '|' || T.last bs == '+'))
-- trailing space
skipHorizontalSpace
_ <- Parsec.newline
return (T.cons c (T.snoc bs c2))
return bs
where
predicate n c
| n <= 0 = Nothing
......@@ -662,7 +664,7 @@ nonSpace xs
-- Doesn't discard the trailing newline.
takeNonEmptyLine :: Parser Text
takeNonEmptyLine = do
l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace
l <- takeWhile1 (/= '\n') >>= nonSpace
_ <- "\n"
pure (l <> "\n")
......@@ -732,7 +734,7 @@ nonEmptyLine :: Parser Text
nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)
takeLine :: Parser Text
takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine)
takeLine = try (takeWhile (/= '\n') <* endOfLine)
endOfLine :: Parser ()
endOfLine = void "\n" <|> Parsec.eof
......@@ -742,7 +744,7 @@ endOfLine = void "\n" <|> Parsec.eof
-- >>> snd <$> parseOnly property "prop> hello world"
-- Right (DocProperty "hello world")
property :: Parser (DocH mod a)
property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n"))
property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n'))
-- |
-- Paragraph level codeblock. Anything between the two delimiting \@ is parsed
......@@ -816,7 +818,7 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
autoUrl :: Parser (DocH mod a)
autoUrl = mkLink <$> url
where
url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace))
url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace)
mkLink :: Text -> DocH mod a
mkLink s = case T.unsnoc s of
......
......@@ -9,10 +9,15 @@ module Documentation.Haddock.Parser.Monad where
import qualified Text.Parsec.Char as Parsec
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos ( updatePosChar )
import Text.Parsec ( State(..)
, getParserState, setParserState )
import qualified Data.Text as T
import Data.Text ( Text )
import Control.Monad ( mfilter )
import Data.Functor ( ($>) )
import Data.String ( IsString(..) )
import Data.Bits ( Bits(..) )
import Data.Char ( ord )
......@@ -20,7 +25,11 @@ import Data.List ( foldl' )
import Control.Applicative as App
import Documentation.Haddock.Types ( Version )
import Prelude hiding (takeWhile)
-- | The only bit of information we really care about truding along with us
-- through parsing is the version attached to a @\@since@ annotation - if
-- the doc even contained one.
newtype ParserState = ParserState {
parserStateSince :: Maybe Version
} deriving (Eq, Show)
......@@ -29,7 +38,7 @@ initialParserState :: ParserState
initialParserState = ParserState Nothing
setSince :: Version -> Parser ()
setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since})
setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })
type Parser = Parsec.Parsec Text ParserState
......@@ -44,38 +53,75 @@ parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
-- consume input.
--
-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
-- more efficient.
peekChar :: Parser (Maybe Char)
peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar
peekChar = headOpt . stateInput <$> getParserState
where headOpt t | T.null t = Nothing
| otherwise = Just (T.head t)
{-# INLINE peekChar #-}
-- | Fails if at the end of input. Does not consume input.
--
-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
peekChar' :: Parser Char
peekChar' = Parsec.lookAhead Parsec.anyChar
peekChar' = headFail . stateInput =<< getParserState
where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF"
| otherwise = App.pure (T.head t)
{-# INLINE peekChar' #-}
-- | Parses the given string. Returns the parsed string.
--
-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
string :: Text -> Parser Text
string t = Parsec.string (T.unpack t) *> App.pure t
string t = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
case T.stripPrefix t inp of
Nothing -> Parsec.parserFail "string: Failed to match the input string"
Just inp' ->
let pos' = T.foldl updatePosChar pos t
s' = s{ stateInput = inp', statePos = pos' }
in setParserState s' $> t
-- | Keep matching characters as long as the predicate function holds (and
-- return them).
--
-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile f = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
let (t, inp') = T.span f inp
pos' = T.foldl updatePosChar pos t
s' = s{ stateInput = inp', statePos = pos' }
setParserState s' $> t
-- | Like 'takeWhile', but fails if no characters matched.
--
-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 = mfilter (not . T.null) . takeWhile
-- | Scan the input text, accumulating characters as long as the scanning
-- function returns true.
scan :: (s -> Char -> Maybe s) -- ^ scan function
-> s -- ^ initial state
-> Parser Text
scan f = fmap T.pack . go
where go s1 = do { cOpt <- peekChar
; case cOpt >>= f s1 of
Nothing -> pure ""
Just s2 -> (:) <$> Parsec.anyChar <*> go s2
}
-- | Apply a parser for a character zero or more times and collect the result in
-- a string.
takeWhile :: Parser Char -> Parser Text
takeWhile = fmap T.pack . Parsec.many
-- | Apply a parser for a character one or more times and collect the result in
-- a string.
takeWhile1 :: Parser Char -> Parser Text
takeWhile1 = fmap T.pack . Parsec.many1
scan f st = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
go inp st pos 0 $ \inp' pos' n ->
let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' }
in setParserState s' $> T.take n inp
where
go inp s !pos !n cont
= case T.uncons inp of
Nothing -> cont inp pos n -- ran out of input
Just (c, inp') ->
case f s c of
Nothing -> cont inp pos n -- scan function failed
Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont
-- | Parse a decimal number.
decimal :: Integral a => Parser a
......
......@@ -40,7 +40,7 @@ skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)
-- | Take leading horizontal space
takeHorizontalSpace :: Parser Text
takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace)
takeHorizontalSpace = takeWhile (`elem` horizontalSpace)
makeLabeled :: (String -> Maybe String -> a) -> Text -> a
makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment