Commit cf7baddf authored by Duncan Coutts's avatar Duncan Coutts
Browse files

Check for invalid UTF8 when parsing .cabal files

This assumes a permissive UTF8 decoder has inserted '\xfffd' as a replacement
character. We should check strictly for errors when we decode instead, though
it's nice to do that where we have some kind of error reporting infrastructure
which is why I've added it to the parser for the moment. The current error
message is not too bad, as it reports the line number.
parent 25f40183
......@@ -90,6 +90,7 @@ type LineNo = Int
data PError = AmbigousParse String LineNo
| NoParse String LineNo
| TabsError LineNo
| UTFError LineNo
| FromString String (Maybe LineNo)
deriving Show
......@@ -129,6 +130,7 @@ locatedErrorMsg :: PError -> (Maybe LineNo, String)
locatedErrorMsg (AmbigousParse f n) = (Just n, "Ambiguous parse in field '"++f++"'")
locatedErrorMsg (NoParse f n) = (Just n, "Parse of field '"++f++"' failed: ")
locatedErrorMsg (TabsError n) = (Just n, "Tab used as indentation.")
locatedErrorMsg (UTFError n) = (Just n, "Invalid UTF-8 text.")
locatedErrorMsg (FromString s n) = (n, s)
syntaxError :: LineNo -> String -> ParseResult a
......@@ -137,6 +139,9 @@ syntaxError n s = ParseFailed $ FromString s (Just n)
tabsError :: LineNo -> ParseResult a
tabsError ln = ParseFailed $ TabsError ln
utf8Error :: LineNo -> ParseResult a
utf8Error ln = ParseFailed $ UTFError ln
warning :: String -> ParseResult ()
warning s = ParseOk [s] ()
......@@ -263,15 +268,18 @@ fName (Section _ n _ _) = n
fName _ = error "fname: not a field or section"
readFields :: String -> ParseResult [Field]
readFields input =
ifelse
=<< mapM (mkField 0)
=<< mkTree (tokenise input)
where tokenise = concatMap tokeniseLine
. trimLines
. lines
. normaliseLineEndings
readFields input =
case [ n | (n,l) <- zip [1..] ls
, '\xfffd' `elem` l ] of
(n:_) -> utf8Error n
[] -> ifelse
=<< mapM (mkField 0)
=<< mkTree tokens
where ls = (lines . normaliseLineEndings) input
tokens = (concatMap tokeniseLine . trimLines) ls
-- attach line number and determine indentation
trimLines :: [String] -> [(LineNo, Indent, HasTabs, String)]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment