Parsec.hs 17.2 KB
Newer Older
1
{-# LANGUAGE BangPatterns        #-}
2
{-# LANGUAGE CPP                 #-}
3
{-# LANGUAGE FlexibleContexts    #-}
4
{-# LANGUAGE GADTs               #-}
5
6
{-# LANGUAGE RankNTypes          #-}
{-# LANGUAGE ScopedTypeVariables #-}
7
module Distribution.Parsec (
Oleg Grenrus's avatar
Oleg Grenrus committed
8
    Parsec(..),
9
10
    ParsecParser (..),
    runParsecParser,
11
    runParsecParser',
Oleg Grenrus's avatar
Oleg Grenrus committed
12
    simpleParsec,
13
    simpleParsecBS,
14
15
    simpleParsec',
    simpleParsecW',
16
    lexemeParsec,
Oleg Grenrus's avatar
Oleg Grenrus committed
17
    eitherParsec,
18
    explicitEitherParsec,
19
    explicitEitherParsec',
20
    -- * CabalParsing and and diagnostics
21
    CabalParsing (..),
22
    -- ** Warnings
Oleg Grenrus's avatar
Oleg Grenrus committed
23
    PWarnType (..),
24
25
26
27
28
29
30
31
32
33
34
    PWarning (..),
    showPWarning,
    -- ** Errors
    PError (..),
    showPError,
    -- * Position
    Position (..),
    incPos,
    retPos,
    showPos,
    zeroPos,
Oleg Grenrus's avatar
Oleg Grenrus committed
35
36
37
38
39
40
41
    -- * Utilities
    parsecToken,
    parsecToken',
    parsecFilePath,
    parsecQuoted,
    parsecMaybeQuoted,
    parsecCommaList,
42
    parsecLeadingCommaList,
Oleg Grenrus's avatar
Oleg Grenrus committed
43
    parsecOptCommaList,
44
    parsecLeadingOptCommaList,
Oleg Grenrus's avatar
Oleg Grenrus committed
45
46
    parsecStandard,
    parsecUnqualComponentName,
Oleg Grenrus's avatar
Oleg Grenrus committed
47
48
    ) where

49
50
import Data.Char                           (digitToInt, intToDigit)
import Data.List                           (transpose)
51
52
import Distribution.CabalSpecVersion
import Distribution.Compat.Prelude
53
import Distribution.Parsec.Error           (PError (..), showPError)
54
55
import Data.ByteString (ByteString)
import Distribution.Parsec.FieldLineStream (FieldLineStream, fieldLineStreamFromString, fieldLineStreamFromBS)
56
57
58
import Distribution.Parsec.Position        (Position (..), incPos, retPos, showPos, zeroPos)
import Distribution.Parsec.Warning         (PWarnType (..), PWarning (..), showPWarning)
import Numeric                             (showIntAtBase)
59
60
61
import Prelude ()

import qualified Distribution.Compat.CharParsing as P
62
import qualified Distribution.Compat.DList       as DList
63
64
import qualified Distribution.Compat.MonadFail   as Fail
import qualified Text.Parsec                     as Parsec
Oleg Grenrus's avatar
Oleg Grenrus committed
65
66
67
68
69

-------------------------------------------------------------------------------
-- Class
-------------------------------------------------------------------------------

70
-- | Class for parsing with @parsec@. Mainly used for @.cabal@ file fields.
71
72
73
--
-- For parsing @.cabal@ like file structure, see "Distribution.Fields".
--
Oleg Grenrus's avatar
Oleg Grenrus committed
74
class Parsec a where
75
    parsec :: CabalParsing m => m a
Oleg Grenrus's avatar
Oleg Grenrus committed
76

Mikhail Glushenkov's avatar
Mikhail Glushenkov committed
77
-- | Parsing class which
78
79
80
81
82
--
-- * can report Cabal parser warnings.
--
-- * knows @cabal-version@ we work with
--
83
class (P.CharParsing m, MonadPlus m, Fail.MonadFail m) => CabalParsing m where
84
85
86
87
88
89
90
    parsecWarning :: PWarnType -> String -> m ()

    parsecHaskellString :: m String
    parsecHaskellString = stringLiteral

    askCabalSpecVersion :: m CabalSpecVersion

91
-- | 'parsec' /could/ consume trailing spaces, this function /will/ consume.
92
lexemeParsec :: (CabalParsing m, Parsec a) => m a
93
lexemeParsec = parsec <* P.spaces
Oleg Grenrus's avatar
Oleg Grenrus committed
94

95
newtype ParsecParser a = PP { unPP
96
    :: CabalSpecVersion -> Parsec.Parsec FieldLineStream [PWarning] a
97
98
    }

99
liftParsec :: Parsec.Parsec FieldLineStream [PWarning] a -> ParsecParser a
100
101
102
103
liftParsec p = PP $ \_ -> p

instance Functor ParsecParser where
    fmap f p = PP $ \v -> fmap f (unPP p v)
104
105
106
107
    {-# INLINE fmap #-}

    x <$ p = PP $ \v -> x <$ unPP p v
    {-# INLINE (<$) #-}
108
109
110

instance Applicative ParsecParser where
    pure = liftParsec . pure
111
    {-# INLINE pure #-}
112
113

    f <*> x = PP $ \v -> unPP f v <*> unPP x v
114
    {-# INLINE (<*>) #-}
115
    f  *> x = PP $ \v -> unPP f v  *> unPP x v
116
    {-# INLINE (*>) #-}
117
    f <*  x = PP $ \v -> unPP f v <*  unPP x v
118
    {-# INLINE (<*) #-}
119
120
121
122
123

instance Alternative ParsecParser where
    empty = liftParsec empty

    a <|> b = PP $ \v -> unPP a v <|> unPP b v
124
    {-# INLINE (<|>) #-}
125

126
127
128
129
130
131
    many p = PP $ \v -> many (unPP p v)
    {-# INLINE many #-}

    some p = PP $ \v -> some (unPP p v)
    {-# INLINE some #-}

132
133
134
135
instance Monad ParsecParser where
    return = pure

    m >>= k = PP $ \v -> unPP m v >>= \x -> unPP (k x) v
136
    {-# INLINE (>>=) #-}
137
    (>>) = (*>)
138
    {-# INLINE (>>) #-}
139

140
#if !(MIN_VERSION_base(4,13,0))
141
    fail = Fail.fail
142
#endif
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

instance MonadPlus ParsecParser where
    mzero = empty
    mplus = (<|>)

instance Fail.MonadFail ParsecParser where
    fail = P.unexpected

instance P.Parsing ParsecParser where
    try p           = PP $ \v -> P.try (unPP p v)
    p <?> d         = PP $ \v -> unPP p v P.<?> d
    skipMany p      = PP $ \v -> P.skipMany (unPP p v)
    skipSome p      = PP $ \v -> P.skipSome (unPP p v)
    unexpected      = liftParsec . P.unexpected
    eof             = liftParsec P.eof
    notFollowedBy p = PP $ \v -> P.notFollowedBy (unPP p v)

instance P.CharParsing ParsecParser where
    satisfy   = liftParsec . P.satisfy
    char      = liftParsec . P.char
    notChar   = liftParsec . P.notChar
    anyChar   = liftParsec P.anyChar
    string    = liftParsec . P.string

instance CabalParsing ParsecParser where
168
169
170
171
    parsecWarning t w = liftParsec $ do
        spos <- Parsec.getPosition
        Parsec.modifyState
            (PWarning t (Position (Parsec.sourceLine spos) (Parsec.sourceColumn spos)) w :)
172
    askCabalSpecVersion = PP pure
Oleg Grenrus's avatar
Oleg Grenrus committed
173

Oleg Grenrus's avatar
Oleg Grenrus committed
174
175
176
-- | Parse a 'String' with 'lexemeParsec'.
simpleParsec :: Parsec a => String -> Maybe a
simpleParsec
177
178
179
    = either (const Nothing) Just
    . runParsecParser lexemeParsec "<simpleParsec>"
    . fieldLineStreamFromString
Oleg Grenrus's avatar
Oleg Grenrus committed
180

181
182
183
184
185
186
187
-- | Like 'simpleParsec' but for 'ByteString'
simpleParsecBS :: Parsec a => ByteString -> Maybe a
simpleParsecBS
    = either (const Nothing) Just
    . runParsecParser lexemeParsec "<simpleParsec>"
    . fieldLineStreamFromBS

188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
-- | Parse a 'String' with 'lexemeParsec' using specific 'CabalSpecVersion'.
--
-- @since 3.4.0.0
simpleParsec' :: Parsec a => CabalSpecVersion -> String -> Maybe a
simpleParsec' spec
    = either (const Nothing) Just
    . runParsecParser' spec lexemeParsec "<simpleParsec>"
    . fieldLineStreamFromString

-- | Parse a 'String' with 'lexemeParsec' using specific 'CabalSpecVersion'.
-- Fail if there are any warnings.
--
-- @since 3.4.0.0
simpleParsecW' :: Parsec a => CabalSpecVersion -> String -> Maybe a
simpleParsecW' spec
    = either (const Nothing) (\(x, ws) -> if null ws then Just x else Nothing)
    . runParsecParser' spec ((,) <$> lexemeParsec <*> liftParsec Parsec.getState) "<simpleParsec>"
    . fieldLineStreamFromString

Oleg Grenrus's avatar
Oleg Grenrus committed
207
208
-- | Parse a 'String' with 'lexemeParsec'.
eitherParsec :: Parsec a => String -> Either String a
209
210
211
212
213
eitherParsec = explicitEitherParsec parsec

-- | Parse a 'String' with given 'ParsecParser'. Trailing whitespace is accepted.
explicitEitherParsec :: ParsecParser a -> String -> Either String a
explicitEitherParsec parser
Oleg Grenrus's avatar
Oleg Grenrus committed
214
    = either (Left . show) Right
215
    . runParsecParser (parser <* P.spaces) "<eitherParsec>"
216
    . fieldLineStreamFromString
Oleg Grenrus's avatar
Oleg Grenrus committed
217

218
219
220
221
222
223
224
225
226
227
228
-- | Parse a 'String' with given 'ParsecParser' and 'CabalSpecVersion'. Trailing whitespace is accepted.
-- See 'explicitEitherParsec'.
--
-- @since 3.4.0.0
--
explicitEitherParsec' :: CabalSpecVersion -> ParsecParser a -> String -> Either String a
explicitEitherParsec' spec parser
    = either (Left . show) Right
    . runParsecParser' spec (parser <* P.spaces) "<eitherParsec>"
    . fieldLineStreamFromString

229
-- | Run 'ParsecParser' with 'cabalSpecLatest'.
230
runParsecParser :: ParsecParser a -> FilePath -> FieldLineStream -> Either Parsec.ParseError a
231
232
233
234
235
236
237
238
runParsecParser = runParsecParser' cabalSpecLatest

-- | Like 'runParsecParser' but lets specify 'CabalSpecVersion' used.
--
-- @since 3.0.0.0
--
runParsecParser' :: CabalSpecVersion -> ParsecParser a -> FilePath -> FieldLineStream -> Either Parsec.ParseError a
runParsecParser' v p n = Parsec.runParser (unPP p v <* P.eof) [] n
Oleg Grenrus's avatar
Oleg Grenrus committed
239

Oleg Grenrus's avatar
Oleg Grenrus committed
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
instance Parsec a => Parsec (Identity a) where
    parsec = Identity <$> parsec

instance Parsec Bool where
    parsec = P.munch1 isAlpha >>= postprocess
      where
        postprocess str
            |  str == "True"  = pure True
            |  str == "False" = pure False
            | lstr == "true"  = parsecWarning PWTBoolCase caseWarning *> pure True
            | lstr == "false" = parsecWarning PWTBoolCase caseWarning *> pure False
            | otherwise       = fail $ "Not a boolean: " ++ str
          where
            lstr = map toLower str
            caseWarning =
                "Boolean values are case sensitive, use 'True' or 'False'."

Oleg Grenrus's avatar
Oleg Grenrus committed
257
-- | @[^ ,]@
258
parsecToken :: CabalParsing m => m String
259
parsecToken = parsecHaskellString <|> ((P.munch1 (\x -> not (isSpace x) && x /= ',')  P.<?> "identifier" ) >>= checkNotDoubleDash)
Oleg Grenrus's avatar
Oleg Grenrus committed
260

Oleg Grenrus's avatar
Oleg Grenrus committed
261
-- | @[^ ]@
262
parsecToken' :: CabalParsing m => m String
263
264
265
266
267
268
269
270
271
272
273
parsecToken' = parsecHaskellString <|> ((P.munch1 (not . isSpace) P.<?> "token") >>= checkNotDoubleDash)

checkNotDoubleDash ::  CabalParsing m => String -> m String
checkNotDoubleDash s = do
    when (s == "--") $ parsecWarning PWTDoubleDash $ unwords
        [ "Double-dash token found."
        , "Note: there are no end-of-line comments in .cabal files, only whole line comments."
        , "Use \"--\" (quoted double dash) to silence this warning, if you actually want -- token"
        ]

    return s
Oleg Grenrus's avatar
Oleg Grenrus committed
274

275
parsecFilePath :: CabalParsing m => m FilePath
Oleg Grenrus's avatar
Oleg Grenrus committed
276
277
parsecFilePath = parsecToken

278
-- | Parse a benchmark/test-suite types.
279
parsecStandard :: (CabalParsing m, Parsec ver) => (ver -> String -> a) -> m a
Oleg Grenrus's avatar
Oleg Grenrus committed
280
parsecStandard f = do
Oleg Grenrus's avatar
Oleg Grenrus committed
281
282
283
284
285
286
287
288
289
290
291
    cs   <- some $ P.try (component <* P.char '-')
    ver  <- parsec
    let name = map toLower (intercalate "-" cs)
    return $! f ver name
  where
    component = do
      cs <- P.munch1 isAlphaNum
      if all isDigit cs then fail "all digit component" else return cs
      -- each component must contain an alphabetic character, to avoid
      -- ambiguity in identifiers like foo-1 (the 1 is the version number).

292
293
parsecCommaList :: CabalParsing m => m a -> m [a]
parsecCommaList p = P.sepBy (p <* P.spaces) (P.char ',' *> P.spaces P.<?> "comma")
Oleg Grenrus's avatar
Oleg Grenrus committed
294

295
296
297
298
299
300
301
-- | Like 'parsecCommaList' but accept leading or trailing comma.
--
-- @
-- p (comma p)*  -- p `sepBy` comma
-- (comma p)*    -- leading comma
-- (p comma)*    -- trailing comma
-- @
302
parsecLeadingCommaList :: CabalParsing m => m a -> m [a]
303
parsecLeadingCommaList p = do
304
    c <- P.optional comma
305
    case c of
Dale Wijnand's avatar
Dale Wijnand committed
306
307
        Nothing -> toList <$> P.sepEndByNonEmpty lp comma <|> pure []
        Just _  -> toList <$> P.sepByNonEmpty lp comma
308
309
  where
    lp = p <* P.spaces
310
311
312
    comma = P.char ',' *> P.spaces P.<?> "comma"

parsecOptCommaList :: CabalParsing m => m a -> m [a]
Oleg Grenrus's avatar
Oleg Grenrus committed
313
314
parsecOptCommaList p = P.sepBy (p <* P.spaces) (P.optional comma)
  where
315
    comma = P.char ',' *> P.spaces
Oleg Grenrus's avatar
Oleg Grenrus committed
316

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
-- | Like 'parsecOptCommaList' but
--
-- * require all or none commas
-- * accept leading or trailing comma.
--
-- @
-- p (comma p)*  -- p `sepBy` comma
-- (comma p)*    -- leading comma
-- (p comma)*    -- trailing comma
-- p*            -- no commas: many p
-- @
--
-- @since 3.0.0.0
--
parsecLeadingOptCommaList :: CabalParsing m => m a -> m [a]
parsecLeadingOptCommaList p = do
    c <- P.optional comma
    case c of
        Nothing -> sepEndBy1Start <|> pure []
Dale Wijnand's avatar
Dale Wijnand committed
336
        Just _  -> toList <$> P.sepByNonEmpty lp comma
337
338
339
340
341
342
343
344
345
346
347
  where
    lp = p <* P.spaces
    comma = P.char ',' *> P.spaces P.<?> "comma"

    sepEndBy1Start = do
        x <- lp
        c <- P.optional comma
        case c of
            Nothing -> (x :) <$> many lp
            Just _  -> (x :) <$> P.sepEndBy lp comma

Oleg Grenrus's avatar
Oleg Grenrus committed
348
-- | Content isn't unquoted
349
parsecQuoted :: CabalParsing m => m a -> m a
Oleg Grenrus's avatar
Oleg Grenrus committed
350
351
352
parsecQuoted = P.between (P.char '"') (P.char '"')

-- | @parsecMaybeQuoted p = 'parsecQuoted' p <|> p@.
353
parsecMaybeQuoted :: CabalParsing m => m a -> m a
Oleg Grenrus's avatar
Oleg Grenrus committed
354
355
parsecMaybeQuoted p = parsecQuoted p <|> p

356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
parsecUnqualComponentName :: forall m. CabalParsing m => m String
parsecUnqualComponentName = state0 DList.empty where
    --
    -- using @kleene@ package we can easily see that
    -- we need only two states to recognize
    -- unqual-component-name
    --
    -- Compare with declarative
    -- 'Distribution.FieldGrammar.Described.reUnqualComponent'.
    --
    -- @
    -- import Kleene
    -- import Kleene.Internal.Pretty
    -- import Algebra.Lattice
    -- import Data.Char
    --
    -- import qualified Data.RangeSet.Map as RSet
    --
    -- main = do
    --     -- this is an approximation, to get an idea.
    --     let component :: RE Char
    --         component = star alphaNum <> alpha <> star alphaNum
    --
    --         alphaNum = alpha \/ num
    --         alpha    = unions $ map char ['a'..'z']
    --         num      = unions $ map char ['0'..'9']
    --
    --         re :: RE Char
    --         re = component <> star (char '-' <> component)
    --
    --     putPretty re
    --     putPretty $ fromTM re
    -- @

    state0 :: DList.DList Char -> m String
    state0 acc = do
        c <- ch -- <|> fail ("Invalid component, after " ++ DList.toList acc)
        case () of
            _ | isDigit c    -> state0 (DList.snoc acc c)
              | isAlphaNum c -> state1 (DList.snoc acc c)
              | c == '-'     -> fail ("Empty component, after " ++ DList.toList acc)
              | otherwise    -> fail ("Internal error, after " ++ DList.toList acc)

    state1 :: DList.DList Char -> m String
    state1 acc = state1' acc `alt` return (DList.toList acc)

    state1' :: DList.DList Char -> m String
    state1' acc = do
        c <- ch
        case () of
            _ | isAlphaNum c -> state1 (DList.snoc acc c)
              | c == '-'     -> state0 (DList.snoc acc c)
              | otherwise    -> fail ("Internal error, after " ++ DList.toList acc)

    ch :: m Char
    !ch = P.satisfy (\c -> isAlphaNum c || c == '-')

    alt :: m String -> m String -> m String
    !alt = (<|>)
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475

stringLiteral :: forall m. P.CharParsing m => m String
stringLiteral = lit where
    lit :: m String
    lit = foldr (maybe id (:)) ""
        <$> P.between (P.char '"') (P.char '"' P.<?> "end of string") (many stringChar)
        P.<?> "string"

    stringChar :: m (Maybe Char)
    stringChar = Just <$> stringLetter
         <|> stringEscape
         P.<?> "string character"

    stringLetter :: m Char
    stringLetter = P.satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))

    stringEscape :: m (Maybe Char)
    stringEscape = P.char '\\' *> esc where
        esc :: m (Maybe Char)
        esc = Nothing <$ escapeGap
            <|> Nothing <$ escapeEmpty
            <|> Just <$> escapeCode

    escapeEmpty, escapeGap :: m Char
    escapeEmpty = P.char '&'
    escapeGap = P.skipSpaces1 *> (P.char '\\' P.<?> "end of string gap")

escapeCode :: forall m. P.CharParsing m => m Char
escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) P.<?> "escape code"
  where
  charControl, charNum :: m Char
  charControl = (\c -> toEnum (fromEnum c - fromEnum '@')) <$> (P.char '^' *> (P.upper <|> P.char '@'))
  charNum = toEnum <$> num
    where
      num :: m Int
      num = bounded 10 maxchar
        <|> (P.char 'o' *> bounded 8 maxchar)
        <|> (P.char 'x' *> bounded 16 maxchar)
      maxchar = fromEnum (maxBound :: Char)

  bounded :: Int -> Int -> m Int
  bounded base bnd = foldl' (\x d -> base * x + digitToInt d) 0
                 <$> bounded' (take base thedigits) (map digitToInt $ showIntAtBase base intToDigit bnd "")
    where
      thedigits :: [m Char]
      thedigits = map P.char ['0'..'9'] ++ map P.oneOf (transpose [['A'..'F'],['a'..'f']])

      toomuch :: m a
      toomuch = P.unexpected "out-of-range numeric escape sequence"

      bounded', bounded'' :: [m Char] -> [Int] -> m [Char]
      bounded' dps@(zero:_) bds = P.skipSome zero *> ([] <$ P.notFollowedBy (P.choice dps) <|> bounded'' dps bds)
                              <|> bounded'' dps bds
      bounded' []           _   = error "bounded called with base 0"
      bounded'' dps []         = [] <$ P.notFollowedBy (P.choice dps) <|> toomuch
      bounded'' dps (bd : bds) = let anyd :: m Char
                                     anyd = P.choice dps

                                     nomore :: m ()
                                     nomore = P.notFollowedBy anyd <|> toomuch

476
477
478
                                     (low, ex, high) = case splitAt bd dps of
                                        (low', ex' : high') -> (low', ex', high')
                                        (_, _)              -> error "escapeCode: Logic error"
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
                                  in ((:) <$> P.choice low <*> atMost (length bds) anyd) <* nomore
                                     <|> ((:) <$> ex <*> ([] <$ nomore <|> bounded'' dps bds))
                                     <|> if not (null bds)
                                            then (:) <$> P.choice high <*> atMost (length bds - 1) anyd <* nomore
                                            else empty
      atMost n p | n <= 0    = pure []
                 | otherwise = ((:) <$> p <*> atMost (n - 1) p) <|> pure []

  charEsc :: m Char
  charEsc = P.choice $ parseEsc <$> escMap

  parseEsc (c,code) = code <$ P.char c
  escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"

  charAscii :: m Char
  charAscii = P.choice $ parseAscii <$> asciiMap

  parseAscii (asc,code) = P.try $ code <$ P.string asc
  asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
  ascii2codes, ascii3codes :: [String]
  ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO"
                , "SI","EM","FS","GS","RS","US","SP"]
  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK"
                ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK"
                ,"SYN","ETB","CAN","SUB","ESC","DEL"]
  ascii2, ascii3 :: String
  ascii2 = "\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP"
  ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"