Commit 512db606 authored by Oleg Grenrus's avatar Oleg Grenrus
Browse files

Parsec parser

- Initial import of stuff
- Add hackage index parsing test for parsec
- We can parse name and version
- Add generics-sop dep
- Use D.C.Prelude
- Add StructDiff
- Fix integral
- Add Parsec -class
- Add licence parsing
- Free text fields (parsed as ascii atm)
- Better StructDiff
- Parsec BuildType
- Implement VersionRange parser
- Section parsing first steps
- Sketch parseCondTree
- more sec parsing
- Module name
- hs-source-dirs
    ::: composition/0.1/composition.cabal
    ::: composition/0.2/composition.cabal
    ::: composition/0.2.1/composition.cabal
    ::: composition/1.0.0.0/composition.cabal
- PError (Position 20 1) "Unknown section: \"source-repository\""
- Parses condition
- Add some notes, address few comments
- Install alex on linux travis
- Make ParseResult strict state monad
- Use withinVersion
- No warnings
- Move to Distribution.PackageDescription.Parsec.FieldDescr
- extensions
- SourceRepo (sans kind) parsing
- RepoKind
- Few more stuff
- Add Distribution.Compat.Parsec
- We can parse all 'composition*' packages cabal files \o/
- fromUtf8BS
- Clean up abit
- More stuff
- And more stuff
- Traling whitespace :(
- Executables
- Benchmarks
- spaceListField
- Parse flag sections
- a ~ Char,
- tested with
- package description x-fields
- Conditions
- reformat
- Handle old syntax
- More fields
- More stuff
- ^co ok,
- more stuff
- ^c ok
- some of hackage ok
- A-Z ok
- Works [A-al)
- to the h
- rest of Hackage
- Introduce parsec -flag
- Count warnings
- Verify we get not less warnings from parsec parser
- fixup! Introduce parsec -flag
- Warn about old/new syntax and cabal-version
- Invalid UTF warning
- Deprecated fields
- Fix meta
- Move transformers dep under parsec flag
- Add parsec travis job
parent 5af51562
......@@ -46,6 +46,9 @@ matrix:
- env: GHCVER=8.0.1 SCRIPT=solver-debug-flags
sudo: required
os: linux
- env: GHCVER=8.0.1 SCRIPT=script PARSEC=YES
os: linux
sudo: required
- env: GHCVER=8.0.1 SCRIPT=bootstrap
sudo: required
os: linux
......@@ -89,6 +92,7 @@ before_install:
- export PATH=$HOME/.local/bin:$PATH
- export PATH=/opt/cabal/1.24/bin:$PATH
- export PATH=/opt/happy/1.19.5/bin:$PATH
- export PATH=/opt/alex/3.1.7/bin:$PATH
- ./travis-install.sh
# Set up deployment to the haskell/cabal-website repo.
......
......@@ -322,6 +322,11 @@ flag old-directory
description: Use directory < 1.2 and old-time
default: False
flag parsec
description: Use parsec parser
default: False
manual: True
library
build-depends:
array >= 0.1 && < 0.6,
......@@ -482,6 +487,30 @@ library
Language.Haskell.Extension
Distribution.Compat.Binary
if flag(parsec)
cpp-options: -DCABAL_PARSEC
build-depends:
transformers,
parsec >= 3.1.9 && <3.2
build-tools:
alex >= 3.1.4
exposed-modules:
Distribution.Compat.Parsec
Distribution.PackageDescription.Parsec
Distribution.PackageDescription.Parsec.FieldDescr
Distribution.Parsec.Class
Distribution.Parsec.ConfVar
Distribution.Parsec.Lexer
Distribution.Parsec.LexerMonad
Distribution.Parsec.Parser
Distribution.Parsec.Types.Common
Distribution.Parsec.Types.Field
Distribution.Parsec.Types.FieldDescr
Distribution.Parsec.Types.ParseResult
Distribution.Compat.DList
-- Move DList to other-module if/when D.C.Lens is done
other-modules:
Distribution.Backpack.PreExistingComponent
Distribution.Backpack.ReadyComponent
......@@ -612,3 +641,29 @@ test-suite package-tests
ghc-options: -Wall -rtsopts
default-extensions: CPP
default-language: Haskell2010
test-suite parser-tests
if !flag(parsec)
buildable: False
type: exitcode-stdio-1.0
main-is: ParserTests.hs
other-modules:
DiffInstances
StructDiff
hs-source-dirs: tests
build-depends:
base,
containers,
tar >=0.5 && <0.6,
bytestring,
directory,
filepath,
generics-sop ==0.2.*,
these >=0.7.1 && <0.8,
singleton-bool >=0.1.1.0 && <0.2,
keys,
Cabal
ghc-options: -Wall -rtsopts
default-extensions: CPP
default-language: Haskell2010
{-# LANGUAGE FlexibleContexts #-}
module Distribution.Compat.Parsec (
P.Parsec,
P.ParsecT,
P.Stream,
(P.<?>),
P.runParser,
-- * Combinators
P.between,
P.option,
P.optional,
P.optionMaybe,
P.try,
P.sepBy,
P.sepBy1,
P.choice,
-- * Char
integral,
P.char,
P.anyChar,
P.satisfy,
P.space,
P.spaces,
P.string,
munch,
munch1,
P.oneOf,
) where
import Distribution.Compat.Prelude
import Prelude ()
import qualified Text.Parsec as P
import qualified Text.Parsec.Pos as P
integral :: (P.Stream s m Char, Integral a) => P.ParsecT s u m a
integral = toNumber <$> some d P.<?> "integral"
where
toNumber = foldl' (\a b -> a * 10 + b) 0
d = P.tokenPrim
(\c -> show [c])
(\pos c _cs -> P.updatePosChar pos c)
f
f '0' = Just 0
f '1' = Just 1
f '2' = Just 2
f '3' = Just 3
f '4' = Just 4
f '5' = Just 5
f '6' = Just 6
f '7' = Just 7
f '8' = Just 8
f '9' = Just 9
f _ = Nothing
-- | Greedely munch characters while predicate holds.
munch1
:: P.Stream s m Char
=> (Char -> Bool)
-> P.ParsecT s u m String
munch1 = some . P.satisfy
-- | Greedely munch characters while predicate holds.
munch
:: P.Stream s m Char
=> (Char -> Bool)
-> P.ParsecT s u m String
munch = many . P.satisfy
......@@ -13,7 +13,7 @@
-- Data type for Haskell module names.
module Distribution.ModuleName (
ModuleName,
ModuleName (..), -- TODO: move Parsec instance here, don't export constructor
fromString,
fromComponents,
components,
......
This diff is collapsed.
This diff is collapsed.
{-# LANGUAGE FlexibleContexts #-}
module Distribution.Parsec.Class (
Parsec(..),
-- * Warnings
parsecWarning,
-- * Utilities
parsecTestedWith,
parsecPkgconfigDependency,
parsecBuildTool,
parsecToken,
parsecToken',
parsecFilePath,
parsecQuoted,
parsecMaybeQuoted,
parsecCommaList,
parsecOptCommaList,
) where
import Distribution.Compat.Prelude
import Prelude ()
import Data.Functor.Identity (Identity)
import Distribution.Parsec.Types.Common
(PWarnType (..), PWarning (..), Position (..))
import qualified Distribution.Compat.Parsec as P
import qualified Text.Parsec as Parsec
import qualified Text.Parsec.Language as Parsec
import qualified Text.Parsec.Token as Parsec
-- Instances
import Distribution.Compiler
(CompilerFlavor (..), classifyCompilerFlavor)
import Distribution.License (License (..))
import Distribution.ModuleName (ModuleName)
import Distribution.Package
(Dependency (..), PackageName, mkPackageName)
import Distribution.System
(Arch (..), ClassificationStrictness (..), OS (..),
classifyArch, classifyOS)
import Distribution.Text (display)
import Distribution.Types.BenchmarkType
(BenchmarkType (..))
import Distribution.Types.BuildType (BuildType (..))
import Distribution.Types.GenericPackageDescription (FlagName (..))
import Distribution.Types.ModuleReexport
(ModuleReexport (..))
import Distribution.Types.SourceRepo
(RepoKind, RepoType, classifyRepoKind, classifyRepoType)
import Distribution.Types.TestType (TestType (..))
import Distribution.Version
(Version, mkVersion, VersionRange (..), anyVersion, earlierVersion,
intersectVersionRanges, laterVersion, noVersion,
orEarlierVersion, orLaterVersion, thisVersion,
unionVersionRanges, withinVersion)
import Language.Haskell.Extension
(Extension, Language, classifyExtension, classifyLanguage)
import qualified Distribution.ModuleName as ModuleName
-------------------------------------------------------------------------------
-- Class
-------------------------------------------------------------------------------
-- |
--
-- TODO: implementation details: should be careful about consuming trailing whitespace?
-- Should we always consume it?
class Parsec a where
parsec :: P.Stream s Identity Char => P.Parsec s [PWarning] a
-- | 'parsec' /could/ consume trailing spaces, this function /must/ consume.
lexemeParsec :: P.Stream s Identity Char => P.Parsec s [PWarning] a
lexemeParsec = parsec <* P.spaces
parsecWarning :: PWarnType -> String -> P.Parsec s [PWarning] ()
parsecWarning t w =
Parsec.modifyState (PWarning t (Position 0 0) w :)
-------------------------------------------------------------------------------
-- Instances
-------------------------------------------------------------------------------
-- TODO: use lexemeParsec
instance Parsec PackageName where
-- todo
parsec = mkPackageName . intercalate "-" <$> P.sepBy1 component (P.char '-')
where
component :: P.Stream s Identity Char => P.Parsec s [PWarning] String
component = do
cs <- P.munch1 isAlphaNum
if all isDigit cs then fail "all digits PackageName" else return cs
instance Parsec ModuleName where
parsec = ModuleName.fromComponents <$> P.sepBy1 component (P.char '.')
where
component = do
c <- P.satisfy isUpper
cs <- P.munch validModuleChar
return (c:cs)
validModuleChar :: Char -> Bool
validModuleChar c = isAlphaNum c || c == '_' || c == '\''
instance Parsec FlagName where
parsec = FlagName . map toLower . intercalate "-" <$> P.sepBy1 component (P.char '-')
where
-- http://hackage.haskell.org/package/cabal-debian-4.24.8/cabal-debian.cabal
-- has flag with all digit component: pretty-112
component :: P.Stream s Identity Char => P.Parsec s [PWarning] String
component = P.munch1 (\c -> isAlphaNum c || c `elem` "_")
instance Parsec Dependency where
parsec = do
name <- lexemeParsec
ver <- parsec <|> pure anyVersion
return (Dependency name ver)
instance Parsec Version where
parsec = mkVersion <$>
P.sepBy1 P.integral (P.char '.')
<* tags
where
tags = do
ts <- P.optionMaybe $ some $ P.char '-' *> some (P.satisfy isAlphaNum)
case ts of
Nothing -> pure ()
-- TODO: make this warning severe
Just _ -> parsecWarning PWTVersionTag "version with tags"
-- TODO: this is not good parsec code
-- use lexer, also see D.P.ConfVar
instance Parsec VersionRange where
parsec = expr
where
expr = do P.spaces
t <- term
P.spaces
(do _ <- P.string "||"
P.spaces
e <- expr
return (unionVersionRanges t e)
<|>
return t)
term = do f <- factor
P.spaces
(do _ <- P.string "&&"
P.spaces
t <- term
return (intersectVersionRanges f t)
<|>
return f)
factor = P.choice
$ parens expr
: parseAnyVersion
: parseNoVersion
: parseWildcardRange
: map parseRangeOp rangeOps
parseAnyVersion = P.string "-any" >> return anyVersion
parseNoVersion = P.string "-none" >> return noVersion
parseWildcardRange = P.try $ do
_ <- P.string "=="
P.spaces
branch <- some (P.integral <* P.char '.')
_ <- P.char '*'
return (withinVersion (mkVersion branch))
parens p = P.between
(P.char '(' >> P.spaces)
(P.char ')' >> P.spaces)
(do a <- p
P.spaces
return (VersionRangeParens a))
-- | TODO: make those non back-tracking
parseRangeOp (s,f) = P.try (P.string s *> P.spaces *> fmap f parsec)
rangeOps = [ ("<", earlierVersion),
("<=", orEarlierVersion),
(">", laterVersion),
(">=", orLaterVersion),
("==", thisVersion) ]
instance Parsec Language where
parsec = classifyLanguage <$> P.munch1 isAlphaNum
instance Parsec Extension where
parsec = classifyExtension <$> P.munch1 isAlphaNum
instance Parsec RepoType where
parsec = classifyRepoType <$> P.munch1 isIdent
instance Parsec RepoKind where
parsec = classifyRepoKind <$> P.munch1 isIdent
instance Parsec License where
parsec = do
name <- P.munch1 isAlphaNum
version <- P.optionMaybe (P.char '-' *> parsec)
return $! case (name, version :: Maybe Version) of
("GPL", _ ) -> GPL version
("LGPL", _ ) -> LGPL version
("AGPL", _ ) -> AGPL version
("BSD2", Nothing) -> BSD2
("BSD3", Nothing) -> BSD3
("BSD4", Nothing) -> BSD4
("ISC", Nothing) -> ISC
("MIT", Nothing) -> MIT
("MPL", Just version') -> MPL version'
("Apache", _ ) -> Apache version
("PublicDomain", Nothing) -> PublicDomain
("AllRightsReserved", Nothing) -> AllRightsReserved
("OtherLicense", Nothing) -> OtherLicense
_ -> UnknownLicense $ name ++
maybe "" (('-':) . display) version
instance Parsec BuildType where
parsec = do
name <- P.munch1 isAlphaNum
return $ case name of
"Simple" -> Simple
"Configure" -> Configure
"Custom" -> Custom
"Make" -> Make
_ -> UnknownBuildType name
instance Parsec TestType where
parsec = stdParse $ \ver name -> case name of
"exitcode-stdio" -> TestTypeExe ver
"detailed" -> TestTypeLib ver
_ -> TestTypeUnknown name ver
instance Parsec BenchmarkType where
parsec = stdParse $ \ver name -> case name of
"exitcode-stdio" -> BenchmarkTypeExe ver
_ -> BenchmarkTypeUnknown name ver
instance Parsec OS where
parsec = classifyOS Compat <$> parsecIdent
instance Parsec Arch where
parsec = classifyArch Strict <$> parsecIdent
instance Parsec CompilerFlavor where
parsec = classifyCompilerFlavor <$> component
where
component :: P.Stream s Identity Char => P.Parsec s [PWarning] String
component = do
cs <- P.munch1 isAlphaNum
if all isDigit cs then fail "all digits compiler name" else return cs
instance Parsec ModuleReexport where
parsec = do
mpkgname <- P.optionMaybe (P.try $ parsec <* P.char ':')
origname <- parsec
newname <- P.option origname $ P.try $ do
P.spaces
_ <- P.string "as"
P.spaces
parsec
return (ModuleReexport mpkgname origname newname)
-------------------------------------------------------------------------------
-- Utilities
-------------------------------------------------------------------------------
isIdent :: Char -> Bool
isIdent c = isAlphaNum c || c == '_' || c == '-'
parsecTestedWith :: P.Stream s Identity Char => P.Parsec s [PWarning] (CompilerFlavor, VersionRange)
parsecTestedWith = do
name <- lexemeParsec
ver <- parsec <|> pure anyVersion
return (name, ver)
parsecPkgconfigDependency :: P.Stream s Identity Char => P.Parsec s [PWarning] Dependency
parsecPkgconfigDependency = do
name <- P.munch1 (\c -> isAlphaNum c || c `elem` "+-._")
P.spaces
verRange <- parsec <|> pure anyVersion
pure $ Dependency (mkPackageName name) verRange
parsecBuildTool :: P.Stream s Identity Char => P.Parsec s [PWarning] Dependency
parsecBuildTool = do
name <- parsecMaybeQuoted nameP
P.spaces
verRange <- parsec <|> pure anyVersion
pure $ Dependency (mkPackageName name) verRange
where
nameP = intercalate "-" <$> P.sepBy1 component (P.char '-')
component = do
cs <- P.munch1 (\c -> isAlphaNum c || c == '+' || c == '_')
if all isDigit cs then fail "invalid component" else return cs
parsecToken :: P.Stream s Identity Char => P.Parsec s [PWarning] String
parsecToken = parsecHaskellString <|> (P.munch1 (\x -> not (isSpace x) && x /= ',') P.<?> "identifier" )
parsecToken' :: P.Stream s Identity Char => P.Parsec s [PWarning] String
parsecToken' = parsecHaskellString <|> (P.munch1 (not . isSpace) P.<?> "token")
parsecFilePath :: P.Stream s Identity Char => P.Parsec s [PWarning] String
parsecFilePath = parsecToken
stdParse
:: P.Stream s Identity Char
=> (Version -> String -> a)
-> P.Parsec s [PWarning] a
stdParse f = do
-- TODO: this backtracks
cs <- some $ P.try (component <* P.char '-')
ver <- parsec
let name = map toLower (intercalate "-" cs)
return $! f ver name
where
component = do
cs <- P.munch1 isAlphaNum
if all isDigit cs then fail "all digit component" else return cs
-- each component must contain an alphabetic character, to avoid
-- ambiguity in identifiers like foo-1 (the 1 is the version number).
parsecCommaList
:: P.Stream s Identity Char
=> P.Parsec s [PWarning] a
-> P.Parsec s [PWarning] [a]
parsecCommaList p = P.sepBy (p <* P.spaces) (P.char ',' *> P.spaces)
parsecOptCommaList
:: P.Stream s Identity Char
=> P.Parsec s [PWarning] a
-> P.Parsec s [PWarning] [a]
parsecOptCommaList p = P.sepBy (p <* P.spaces) (P.optional comma)
where
comma = P.char ',' *> P.spaces
-- | Content isn't unquoted
parsecQuoted
:: P.Stream s Identity Char
=> P.Parsec s [PWarning] a
-> P.Parsec s [PWarning] a
parsecQuoted = P.between (P.char '"') (P.char '"')
-- | @parsecMaybeQuoted p = 'parsecQuoted' p <|> p@.
parsecMaybeQuoted
:: P.Stream s Identity Char
=> P.Parsec s [PWarning] a
-> P.Parsec s [PWarning] a
parsecMaybeQuoted p = parsecQuoted p <|> p
parsecHaskellString :: P.Stream s Identity Char => P.Parsec s [PWarning] String
parsecHaskellString = Parsec.stringLiteral $ Parsec.makeTokenParser Parsec.emptyDef
{ Parsec.commentStart = "{-"
, Parsec.commentEnd = "-}"
, Parsec.commentLine = "--"
, Parsec.nestedComments = True
, Parsec.identStart = P.satisfy isAlphaNum
, Parsec.identLetter = P.satisfy isAlphaNum <|> P.oneOf "_'"
, Parsec.opStart = opl
, Parsec.opLetter = opl
, Parsec.reservedOpNames= []
, Parsec.reservedNames = []
, Parsec.caseSensitive = True
}
where
opl = P.oneOf ":!#$%&*+./<=>?@\\^|-~"
parsecIdent :: P.Stream s Identity Char => P.Parsec s [PWarning] String
parsecIdent = (:) <$> firstChar <*> rest
where
firstChar = P.satisfy isAlpha
rest = P.munch (\c -> isAlphaNum c || c == '_' || c == '-')
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Distribution.Parsec.ConfVar (parseConditionConfVar) where
import Distribution.Compat.Prelude
import Prelude ()
import Distribution.Compat.Parsec (integral)
import qualified Text.Parsec as P
--import qualified Text.Parsec.Pos as P
import qualified Text.Parsec.Error as P
import Distribution.Parsec.Class (Parsec (..))
import Distribution.Parsec.Types.Common
import Distribution.Parsec.Types.Field (SectionArg (..))
import Distribution.Parsec.Types.ParseResult
import Distribution.Simple.Utils (fromUTF8BS)
import Distribution.Types.GenericPackageDescription
(Condition (..), ConfVar (..))
import Distribution.Version
(mkVersion, anyVersion, earlierVersion,
intersectVersionRanges, laterVersion, noVersion,
orEarlierVersion, orLaterVersion, thisVersion,
unionVersionRanges, withinVersion)
-- | Parse @'Condition' 'ConfVar'@ from section arguments provided by parsec
-- based outline parser.
parseConditionConfVar :: [SectionArg Position] -> ParseResult (Condition ConfVar)
parseConditionConfVar args = do
-- Warnings!
args' <- preprocess args
case P.runParser (parser <* P.eof) () "<condition>" args' of
Right x -> pure x
Left err -> do
let ppos = P.errorPos err
let epos = Position (P.sourceLine ppos) (P.sourceColumn ppos)
let msg = P.showErrorMessages
"or" "unknown parse error" "expecting" "unexpected" "end of input"
(P.errorMessages err)
parseFailure epos msg
pure $ Lit True
-- This is a hack, as we have "broken" .cabal files on Hackage
preprocess :: [SectionArg Position] -> ParseResult [SectionArg Position]
preprocess (SecArgOther pos "&&!" : rest) = do
parseWarning pos PWTGluedOperators "Glued operators: &&!"
(\rest' -> SecArgOther pos "&&" : SecArgOther pos "!" : rest') <$> preprocess rest
preprocess (x : rest) =
(x: ) <$> preprocess rest
preprocess [] = pure []