Commit d95e4b59 authored by Duncan Coutts's avatar Duncan Coutts
Browse files

Rewrite the parser for the configuration structure to allow laout or braces

Joint work with Thomas Schilling.
The sections and indeed fields (and if/else) can now use either explicit
brace {} style layout or indentation eg:
> library
>   exposed-modules: Blah
or
> library {
>   exposed-modules:
> }
layout style can be nested within explict braces style and vica versa.
Also add some more checks and relax the tab checks.
Unrecognised sections, like unrecognised fields, are not fatal errors,
so we could add sections in future without breaking old cabal.
parent e78f34e8
......@@ -814,26 +814,7 @@ readHookedBuildInfo verbosity = readAndParseFile verbosity parseHookedBuildInfo
readPackageDescription :: Verbosity -> FilePath -> IO GenericPackageDescription
readPackageDescription verbosity =
readAndParseFile verbosity parseDescription
{-
parseDescription :: String -> ParseResult PackageDescription
parseDescription str = do
all_fields0 <- readFields str
-- detectCabalFormat all_fields0
all_fields <- mapM deprecField all_fields0
let (st:sts) = stanzas all_fields
pkg <- parseFields basic_field_descrs emptyPackageDescription st
foldM parseExtraStanza pkg sts
where
parseExtraStanza pkg st@((F _lineNo "executable" _eName):_) = do
exe <- parseFields executableFieldDescrs emptyExecutable st
return pkg{executables= executables pkg ++ [exe]}
parseExtraStanza _ x = error ("This shouldn't happen!" ++ show x)
basic_field_descrs :: [FieldDescr PackageDescription]
basic_field_descrs = pkgDescrFieldDescrs ++ map liftToPkg libFieldDescrs
where liftToPkg = liftField (fromMaybe emptyLibrary . library)
(\lib pkg -> pkg{library = Just lib})
-}
stanzas :: [Field] -> [[Field]]
stanzas [] = []
stanzas (f:fields) = (f:this) : stanzas rest
......@@ -894,17 +875,6 @@ buildInfoNames :: [String]
buildInfoNames = map fieldName binfoFieldDescrs
++ map fst deprecatedFieldsBuildInfo
{-
-- Just to make the structure explicit
data CabalFile = MkCabalFile
{ headerFields :: [Field]
, cfFlags :: [Flag]
, exeFields :: [(String,CondTree ConfVar Dependency Field)]
, libFields :: CondTree ConfVar Dependency Field
} -- deriving Show
-}
-- A minimal implementation of the StateT monad transformer to avoid depending
-- on the 'mtl' package.
newtype StT s m a = StT { runStT :: s -> m (a,s) }
......@@ -954,16 +924,13 @@ parseDescription file = do
fields0 <- readFields file `catchParseError` \err ->
case err of
-- In case of a TabsError report them all at once.
TabsError _ -> reportTabsError tabs
TabsError tabLineNo -> reportTabsError
-- but only report the ones including and following
-- the one that caused the actual error
[ t | t@(lineNo',_) <- tabs
, lineNo' >= tabLineNo ]
_ -> parseFail err
-- Parsing might have been successful, but if the new syntax was used with
-- tabs we can't be quite sure the parse was correct. (It is possible to
-- allow tabs in non-indented fields, but that would be inconsistent so we
-- disallow tabs as indentation alltogether.)
when (not (oldSyntax fields0) && not (null tabs)) $
reportTabsError tabs
let sf = sectionizeFields fields0
fields <- mapSimpleFields deprecField sf
......@@ -1004,16 +971,14 @@ parseDescription file = do
let (hdr0, exes0) = break ((=="executable") . fName) fs
(hdr, libfs0) = partition (not . (`elem` libFieldNames) . fName) hdr0
-- XXX: In traditional cabal files, dependencies are global.
-- However, we now have library dependencies and
-- per-executable dependencies, of which only the library
-- dependencies are used for flag resolution.
--
-- The right solution would be to add global dependencies to
-- each non-empty section and resolve dependencies for each.
-- The workaround, for now, is to allow library sections that
-- only consist of dependency specifications.
--
-- The 'build-depends' field was global so far. Now it's
-- supported in each section.
-- XXX: we actially have two options here
-- (1) put all dependencies into the library section, if the
-- library section would be empty, mark it as not buildable
-- (2) duplicate all dependencies in each section, libraries
-- and executables
-- Right now we go with (1)
(deps, libfs1) = partition ((`elem` constraintFieldNames) . fName) libfs0
libfs = if null libfs1 && not (null deps)
-- mark library as not buildable
......@@ -1048,32 +1013,33 @@ parseDescription file = do
-- all simple fields at the beginning of the file are (considered) header
-- fields
getHeader :: [Field] -> PM [Field]
getHeader acc = peekField >>= \mf -> case mf of
Just f@(F _ _ _) -> skipField >> getHeader (f:acc)
_ -> return (reverse acc)
--
-- body ::= [ flags ] { library | executable }+ -- at most one lib
-- body ::= flag* { library | executable }+ -- at most one lib
--
-- The body consists of an optional sequence of flag declarations and after
-- that an arbitrary number of executables and an optional library. The
-- order of the latter doesn't play a role.
getBody :: PM ([Flag]
,Maybe (CondTree ConfVar [Dependency] Library)
,[(String, CondTree ConfVar [Dependency] Executable)])
getBody = do
mf <- peekField
case mf of
-- Just f@(F l n v)
-- | n `elem` libFieldNames -> compatParse f -- old-style format
-- | n == "executable" -> compatParse f
-- | otherwise -> error $ "???" ++ show f -- XXX
Just (Section l sn _label _fields)
Just (Section _ sn _label _fields)
| sn == "flag" -> do
-- don't skipField here. it's simpler to let getFlags do it
-- itself
flags <- getFlags []
(lib, exes) <- getLibOrExe (Lit True)
(lib, exes) <- getLibOrExe
return (flags, lib, exes)
| sn `elem` ["library", "executable"] -> do
(lib,exes) <- getLibOrExe (Lit True)
| otherwise -> do
(lib,exes) <- getLibOrExe
return ([], lib, exes)
| otherwise ->
lift $ syntaxError l $ "Unknown section type: " ++ sn
Nothing -> do lift $ warning "No library or executable specified"
return ([], Nothing, [])
Just f -> lift $ syntaxError (lineNo f) $
......@@ -1082,6 +1048,7 @@ parseDescription file = do
--
-- flags ::= "flag:" name { flag_prop }
--
getFlags :: [Flag] -> StT [Field] ParseResult [Flag]
getFlags acc = peekField >>= \mf -> case mf of
Just (Section _ sn sl fs)
| sn == "flag" -> do
......@@ -1091,24 +1058,33 @@ parseDescription file = do
fs
skipField >> getFlags (fl : acc)
_ -> return (reverse acc)
getLibOrExe cond = peekField >>= \mf -> case mf of
Just (Section _ sn sl fs)
getLibOrExe :: PM (Maybe (CondTree ConfVar [Dependency] Library)
,[(String, CondTree ConfVar [Dependency] Executable)])
getLibOrExe = peekField >>= \mf -> case mf of
Just (Section n sn sl fs)
| sn == "executable" -> do
when (null sl) $ lift $
syntaxError n "'executable' needs one argument (the executable's name)"
exename <- lift $ runP n "executable" parseTokenQ sl
flds <- collectFields parseExeFields fs
skipField
(lib, exes) <- getLibOrExe cond
return (lib, exes ++ [(sl, flds)])
(lib, exes) <- getLibOrExe
return (lib, exes ++ [(exename, flds)])
| sn == "library" -> do
when (not (null sl)) $ lift $
syntaxError n "'library' expects no argument"
flds <- collectFields parseLibFields fs
skipField
(lib, exes) <- getLibOrExe cond
(lib, exes) <- getLibOrExe
return (maybe (Just flds)
(const (error "Multiple libraries specified"))
lib
, exes)
Just x -> lift $ syntaxError (lineNo x) $
"Library or Executable section expected."
| otherwise -> do
lift $ warning $ "Unknown section type: " ++ sn ++ " ignoring..."
return (Nothing, []) -- yep
Just x -> lift $ syntaxError (lineNo x) $ "Section expected."
Nothing -> return (Nothing, [])
-- extracts all fields in a block, possibly add dependencies to the
......@@ -1136,8 +1112,10 @@ parseDescription file = do
return (cnd, t', e')
processIfs _ = bug "processIfs called with wrong field type"
parseLibFields :: [Field] -> StT s ParseResult Library
parseLibFields = lift . parseFields libFieldDescrs nullLibrary
parseExeFields :: [Field] -> StT s ParseResult Executable
parseExeFields = lift . parseFields executableFieldDescrs nullExecutable
......@@ -1342,7 +1320,6 @@ bug msg = error $ msg ++ ". Consider this a bug."
-- * Testing
-- ------------------------------------------------------------
#ifdef DEBUG
-- disabled for now
compatTestPkgDesc :: String
compatTestPkgDesc = unlines [
......
......@@ -66,9 +66,17 @@ import Distribution.Package ( parsePackageName )
import Distribution.Compat.ReadP as ReadP hiding (get)
import Language.Haskell.Extension (Extension)
import Text.PrettyPrint.HughesPJ
import Text.PrettyPrint.HughesPJ hiding (braces)
import Data.Char (isSpace, isUpper, toLower, isAlphaNum)
import Data.Maybe ( fromMaybe)
import Data.List (intersperse)
#ifdef DEBUG
import Test.HUnit (Test(..), assertBool, Assertion, runTestTT, Counts, assertEqual)
import IO
import System.Environment ( getArgs )
import Control.Monad ( zipWithM_ )
#endif
-- -----------------------------------------------------------------------------
......@@ -186,13 +194,6 @@ optsField name flavor get set =
------------------------------------------------------------------------------
trimTrailingSpaces :: String -> String
trimTrailingSpaces = reverse . dropWhile isSpace . reverse
dropSpaces :: String -> String
dropSpaces = dropWhile isSpace
-- The data type for our three syntactic categories
data Field
= F LineNo String String
......@@ -227,168 +228,224 @@ lineNo (IfBlock n _ _ _) = n
fName :: Field -> String
fName (F _ n _) = n
fName (Section _ n _ _) = n
fName _ = undefined
fName _ = error "fname: not a field or section"
-- sectionname ::= "library" | "executable"
sectionNames :: [String]
sectionNames = ["library", "executable", "flag"]
-- |Split a file into "Field: value" groups
readFields :: String -> ParseResult [Field]
readFields = mkStanza
-- . merge
. filter validLine
. zip [1..]
. map trimTrailingSpaces . lines
where validLine (_,s) = case dropSpaces s of
'-':'-':_ -> False -- Comment
[] -> False -- blank line
_ -> True
mkStanza :: [(Int,String)] -> ParseResult [Field]
mkStanza lines0 = parseLines lines0 []
where
parseLines [] fs = return (reverse fs)
parseLines ls fs = do (f, ls') <- getField ls
parseLines ls' $ maybe fs (:fs) f
-- parses:
--
-- field ::= '#' directive value '\n'
-- | [<indent>] fieldname ':' space* fieldvalue
-- | "if" space cond
-- | sectionname section
--
getField :: [(Int,String)] -> ParseResult (Maybe Field,[(Int,String)])
getField [] = return (Nothing, [])
getField ((_,[]):ls) = return (Nothing,ls)
getField ((n,'#':xs):ls) | not (isSpace (head xs)) = do
return (Just $ F n ('#':dir) (dropSpaces val), ls)
where (dir,val) = break isSpace xs
getField ((lineno,line0):lines0) =
let (spaces,line) = span (==' ') line0
indent = length spaces in
case break (`elem` " :{") line of
('\t':_,_) -> tabsError lineno
(fld0, ':':val0) -> do -- regular field
let fld = map toLower fld0
(val, lines') <- getFieldValue indent (dropWhile isSpace val0) lines0
return (Just $ F lineno fld val, lines')
(blkName, ' ':rest)
| map toLower blkName == "if" -> getIf (lineno,rest) lines0
| map toLower blkName `elem` sectionNames ->
getSection (map toLower blkName) (lineno,rest) lines0
| otherwise -> syntaxError lineno $
"Missing colon after field label or invalid section name"
(blkName, '{':rest)
| map toLower blkName `elem` sectionNames ->
getSection (map toLower blkName) (lineno,'{':rest) lines0
("","") -> return (Nothing,lines0)
(_,_) -> syntaxError lineno $
"Unrecognized field format: '" ++ line ++ "'"
-- parses:
--
-- cond ::= (any - '}')* block [ space* "else" block ]
--
getIf :: (Int,String) -> [(Int,String)] -> ParseResult (Maybe Field,[(Int,String)])
getIf (n,rest) ls = do
(cond, ifBlock, lines') <-
case break (=='{') (dropSpaces rest) of
(cond, '{':cs) ->
do (b,ls') <- getBlock (n,'{':cs) ls
return (cond, b, ls')
(_, _) -> -- condition spans more than one line
syntaxError n "Multi-line conditions currently not supported."
(elseBlock, lines'') <- tryElseBlock lines'
return (Just $ IfBlock n cond ifBlock elseBlock, lines'')
where
tryElseBlock [] = return ([], [])
tryElseBlock ((m,l):ls') =
if all isSpace l then return ([],ls')
else case (splitAt 4 . dropSpaces) l of
(kw, rst) ->
if kw == "else" then
getBlock (m,dropSpaces rst) ls'
else syntaxError m "Only 'else' may appear after an if-Block"
-- parses:
--
-- block ::= space* '{' space* '\n'
-- field*
-- space* '}' space* '\n'
--
getBlock :: (Int,String) -> [(Int,String)] -> ParseResult ([Field],[(Int,String)])
getBlock (lnum,rest) lines0 = do
lines' <- checkBlockStart (lnum,dropSpaces rest) lines0
munchTillEndOfBlock lines' []
where
checkBlockStart (n,'{':cs) ls =
if all isSpace cs then return ls
else syntaxError n "Invalid characters after '{'"
checkBlockStart (_,[]) ((n,l):ls) =
checkBlockStart (n,dropSpaces l) ls
checkBlockStart (n,_) _ = syntaxError n "'{' expected"
munchTillEndOfBlock [] _ = syntaxError (-1) "missing '}' at end of file"
munchTillEndOfBlock lines1@((n,l):ls) fs =
case break (=='}') l of
(spaces, '}':rst) ->
if all isSpace spaces
then return ( reverse fs
, (n, rst):ls)
else syntaxError n "'}' must be first character on the line"
_ -> do (f,ls') <- getField lines1
munchTillEndOfBlock ls' $ maybe fs (:fs) f
-- parses:
--
-- section ::= space* [ blocklabel ] space* block
--
getSection :: String -> (Int,String) -> [(Int,String)]
-> ParseResult (Maybe Field,[(Int,String)])
getSection sectName (n,l) lines0 =
case break (=='{') (dropSpaces l) of
(sectLabel, '{':rest) ->
do (b,lines') <- getBlock (n,'{':rest) lines0
return (Just $ Section n sectName (trimTrailingSpaces sectLabel) b, lines')
(_,_) -> error "getSection got a line without a '{'. Consider this a bug."
-- Get the field value of a field at given indentation
getFieldValue :: Int -> String -> [(Int,String)]
-> ParseResult (String,[(Int,String)])
getFieldValue indent val lines0 = do
(valrest, lines') <- getValRest lines0
let v = val' ++ rest valrest
return ( v
, lines')
where
val' = dropWhile isSpace val
rest valrest =
-- don't include initial newline if it would be the first
-- character
(if val' == "" then safeTail else id) $
concatMap (getContinuation) valrest
safeTail (_:xs) = xs
safeTail [] = []
-- the continuation of a field value is everything that is indented
-- relative to the field's label
getValRest [] = return ([],[])
getValRest lls@((n,l):ls) =
let (ind, v') = span isSpace l in
case () of
_ | indent > 0 && '\t' `elem` ind -> tabsError n
| length ind <= indent || null v' -> return ([], lls)
| otherwise ->
do (valrest, lines') <- getValRest ls
return (v':valrest, lines')
getContinuation line = '\n':stripDot (dropWhile isSpace line)
stripDot "." = ""
stripDot s = s
readFields input =
ifelse
=<< mapM (mkField 0)
=<< mkTree (tokenise input)
where tokenise = concatMap tokeniseLine
. trimLines
. lines
. normaliseLineEndings
-- TODO: should decode UTF8
-- attach line number and determine indentation
trimLines :: [String] -> [(LineNo, Indent, HasTabs, String)]
trimLines ls = [ (lineno, indent, hastabs, (trimTrailing l'))
| (lineno, l) <- zip [1..] ls
, let (sps, l') = span isSpace l
indent = length sps
hastabs = '\t' `elem` sps
, validLine l' ]
where validLine ('-':'-':_) = False -- Comment
validLine [] = False -- blank line
validLine _ = True
-- | We parse generically based on indent level and braces '{' '}'. To do that
-- we split into lines and then '{' '}' tokens and other spans within a line.
data Token =
-- | The 'Line' token is for bits that /start/ a line, eg:
--
-- > "\n blah blah { blah"
--
-- tokenises to:
--
-- > [Line n 2 False "blah blah", OpenBracket, Span n "blah"]
--
-- so lines are the only ones that can have nested layout, since they
-- have a known indentation level.
--
-- eg: we can't have this:
-- > if ... {
-- > } else
-- > other
--
-- because other cannot nest under else, since else doesn't start a line
-- so cannot have nested layout. It'd have to be:
--
-- > if ... {
-- > }
-- > else
-- > other
--
-- but that's not so common, people would normally use layout or
-- brackets not both in a single @if else@ construct.
--
-- > if ... { foo : bar }
-- > else
-- > other
--
-- this is ok
Line LineNo Indent HasTabs String
| Span LineNo String -- ^ span in a line, following brackets
| OpenBracket LineNo | CloseBracket LineNo
type Indent = Int
type HasTabs = Bool
-- | Tokenise a single line, splitting on '{' '}' and the spans inbetween.
-- Also trims leading & trailing space on those spans within the line.
tokeniseLine :: (LineNo, Indent, HasTabs, String) -> [Token]
tokeniseLine (n0, i, t, l) = case split n0 l of
(Span _ l':ss) -> Line n0 i t l' :ss
cs -> cs
where split _ "" = []
split n s = case span (\c -> c /='}' && c /= '{') s of
("", '{' : s') -> OpenBracket n : split n s'
(w , '{' : s') -> mkspan n w (OpenBracket n : split n s')
("", '}' : s') -> CloseBracket n : split n s'
(w , '}' : s') -> mkspan n w (CloseBracket n : split n s')
(w , _) -> mkspan n w []
mkspan n s ss | null s' = ss
| otherwise = Span n s' : ss
where s' = trimTrailing (trimLeading s)
trimLeading, trimTrailing :: String -> String
trimLeading = dropWhile isSpace
trimTrailing = reverse . dropWhile isSpace . reverse
-- | Fix different systems silly line ending conventions
normaliseLineEndings :: String -> String
normaliseLineEndings [] = []
normaliseLineEndings ('\r':'\n':s) = '\n' : normaliseLineEndings s -- windows
normaliseLineEndings ('\r':s) = '\n' : normaliseLineEndings s -- old osx
normaliseLineEndings ( c :s) = c : normaliseLineEndings s
type SyntaxTree = Tree (LineNo, HasTabs, String)
-- | Parse the stream of tokens into a tree of them, based on indent / layout
mkTree :: [Token] -> ParseResult [SyntaxTree]
mkTree toks =
layout 0 [] toks >>= \(trees, trailing) -> case trailing of
[] -> return trees
OpenBracket n:_ -> syntaxError n "mismatched backets, unexpected {"
CloseBracket n:_ -> syntaxError n "mismatched backets, unexpected }"
-- the following two should never happen:
Span n l :_ -> syntaxError n $ "unexpected span: " ++ show l
Line n _ _ l :_ -> syntaxError n $ "unexpected line: " ++ show l
-- | Parse the stream of tokens into a tree of them, based on indent
-- This parse state expect to be in a layout context, though possibly
-- nested within a braces context so we may still encounter closing braces.
layout :: Indent -- ^ indent level of the parent/previous line
-> [SyntaxTree] -- ^ accumulating param, trees in this level
-> [Token] -- ^ remaining tokens
-> ParseResult ([SyntaxTree], [Token])
-- ^ collected trees on this level and trailing tokens
layout _ a [] = return (reverse a, [])
layout i a (s@(Line _ i' _ _):ss) | i' < i = return (reverse a, s:ss)
layout i a (Line n _ t l:OpenBracket n':ss) = do
(sub, ss') <- braces n' [] ss
layout i (Node (n,t,l) sub:a) ss'
layout i a (Span n l:OpenBracket n':ss) = do
(sub, ss') <- braces n' [] ss
layout i (Node (n,False,l) sub:a) ss'
-- look ahead to see if following lines are more indented, giving a sub-tree
layout i a (Line n i' t l:ss) = do
lookahead <- layout (i'+1) [] ss
case lookahead of
([], _) -> layout i (Node (n,t,l) [] :a) ss
(ts, ss') -> layout i (Node (n,t,l) ts :a) ss'
layout _ _ ( OpenBracket n :_) = syntaxError n $ "unexpected '{'"
layout _ a (s@(CloseBracket _):ss) = return (reverse a, s:ss)
layout _ _ ( Span n l : _) = syntaxError n $ "unexpected span: "
++ show l
-- | Parse the stream of tokens into a tree of them, based on explicit braces
-- This parse state expects to find a closing bracket.
braces :: LineNo -- ^ line of the '{', used for error messages
-> [SyntaxTree] -- ^ accumulating param, trees in this level
-> [Token] -- ^ remaining tokens
-> ParseResult ([SyntaxTree],[Token])
-- ^ collected trees on this level and trailing tokens
braces m a (Line n _ t l:OpenBracket n':ss) = do
(sub, ss') <- braces n' [] ss
braces m (Node (n,t,l) sub:a) ss'
braces m a (Span n l:OpenBracket n':ss) = do
(sub, ss') <- braces n' [] ss
braces m (Node (n,False,l) sub:a) ss'
braces m a (Line n i t l:ss) = do
lookahead <- layout (i+1) [] ss
case lookahead of
([], _) -> braces m (Node (n,t,l) [] :a) ss
(ts, ss') -> braces m (Node (n,t,l) ts :a) ss'
braces m a (Span n l:ss) = braces m (Node (n,False,l) []:a) ss
braces _ a (CloseBracket _:ss) = return (reverse a, ss)
braces n _ [] = syntaxError n $ "opening brace '{'"
++ "has no matching closing brace '}'"
braces _ _ (OpenBracket n:_) = syntaxError n "unexpected '{'"
-- | Convert the parse tree into the Field AST
-- Also check for dodgy uses of tabs in indentation.
mkField :: Int -> SyntaxTree -> ParseResult Field
mkField d (Node (n,t,_) _) | d >= 1 && t = tabsError n
mkField d (Node (n,_,l) ts) = case span (\c -> isAlphaNum c || c == '-') l of
([], _) -> syntaxError n $ "unrecognised field or section: " ++ show l
(name, rest) -> case trimLeading rest of
(':':rest') -> do let followingLines = concatMap flatten ts
tabs = not (null [()| (_,True,_) <- followingLines ])
if tabs && d >= 1
then tabsError n
else return $ F n (map toLower name)
(fieldValue rest' followingLines)
rest' -> do ts' <- mapM (mkField (d+1)) ts
return (Section n (map toLower name) rest' ts')
where fieldValue firstLine followingLines =
let firstLine' = trimLeading firstLine
followingLines' = map (\(_,_,s) -> stripDot s) followingLines
allLines | null firstLine' = followingLines'
| otherwise = firstLine' : followingLines'
in (concat . intersperse "\n") allLines
stripDot "." = ""
stripDot s = s
-- | Convert if/then/else 'Section's to 'IfBlock's
ifelse :: [Field] -> ParseResult [Field]
ifelse [] = return []
ifelse (Section n "if" cond thenpart
:Section _ "else" as elsepart:fs)