Skip to content
Snippets Groups Projects
Commit 76dac21f authored by Alex Biehl's avatar Alex Biehl Committed by GitHub
Browse files

Consequently use inClass and notInClass in haddock-library (#617)

These allow attoparsec to do some clever lookup optimization
parent 6d9c8cb8
No related branches found
No related tags found
No related merge requests found
......@@ -143,7 +143,7 @@ specialChar = "_/<@\"&'`# "
-- to ensure that we have already given a chance to more meaningful parsers
-- before capturing their characers.
string' :: Parser (DocH mod a)
string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar)
where
unescape "" = ""
unescape ('\\':x:xs) = x : unescape xs
......@@ -153,7 +153,7 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialCh
-- This is done to skip over any special characters belonging to other
-- elements but which were not deemed meaningful at their positions.
skipSpecialChar :: Parser (DocH mod a)
skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
skipSpecialChar = DocString . return <$> satisfy (inClass specialChar)
-- | Emphasis parser.
--
......@@ -215,7 +215,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
-- accept {small | large | digit | ' } here. But as we can't
-- match on unicode characters, this is currently not possible.
-- Note that we allow ‘#’ to suport anchors.
<*> (decodeUtf8 <$> takeWhile (`notElem` (" .&[{}(=*)+]!|@/;,^?\"\n"::String)))
<*> (decodeUtf8 <$> takeWhile (notInClass " .&[{}(=*)+]!|@/;,^?\"\n"))
-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
-- a title for the picture.
......@@ -338,7 +338,7 @@ definitionList :: BS.ByteString -> Parser (DocH mod Identifier)
definitionList indent = DocDefList <$> p
where
p = do
label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
label <- "[" *> (parseStringBS <$> takeWhile1 (notInClass "]\n")) <* ("]" <* optional ":")
c <- takeLine
(cs, items) <- more indent p
let contents = parseString . dropNLs . unlines $ c : cs
......@@ -561,7 +561,7 @@ autoUrl = mkLink <$> url
url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
mkLink :: BS.ByteString -> DocH mod a
mkLink s = case unsnoc s of
Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
_ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-- | Parses strings between identifier delimiters. Consumes all input that it
......@@ -570,8 +570,13 @@ autoUrl = mkLink <$> url
parseValid :: Parser String
parseValid = p some
where
idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String))
<|> digit <|> letter_ascii
idChar =
satisfy (\c -> isAlpha_ascii c
|| isDigit c
-- N.B. '-' is placed first otherwise attoparsec thinks
-- it belongs to a character class
|| inClass "-_.!#$%&*+/<=>?@\\|~:^" c)
p p' = do
vs' <- p' $ utf8String "⋆" <|> return <$> idChar
let vs = concat vs'
......@@ -594,4 +599,4 @@ identifier = do
e <- idDelim
return $ DocIdentifier (o, vid, e)
where
idDelim = char '\'' <|> char '`'
idDelim = satisfy (\c -> c == '\'' || c == '`')
......@@ -22,7 +22,7 @@ module Documentation.Haddock.Parser.Util (
import Control.Applicative
import Control.Monad (mfilter)
import Documentation.Haddock.Parser.Monad
import Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import Prelude hiding (takeWhile)
......@@ -40,11 +40,14 @@ unsnoc bs
strip :: String -> String
strip = (\f -> f . f) $ dropWhile isSpace . reverse
isHorizontalSpace :: Char -> Bool
isHorizontalSpace = inClass " \t\f\v\r"
skipHorizontalSpace :: Parser ()
skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
skipHorizontalSpace = skipWhile isHorizontalSpace
takeHorizontalSpace :: Parser BS.ByteString
takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
takeHorizontalSpace = takeWhile isHorizontalSpace
makeLabeled :: (String -> Maybe String -> a) -> String -> a
makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment