Commit 803fc5db authored by Alan Zimmerman's avatar Alan Zimmerman Committed by Austin Seipp

Add API Annotations

Summary:
The final design and discussion is captured at
https://ghc.haskell.org/trac/ghc/wiki/GhcAstAnnotations

This is a proof of concept implementation of a completely
separate annotation structure, populated in the parser,and tied to the
AST by means of a virtual "node-key" comprising the surrounding
SrcSpan and a value derived from the specific constructor used for the
node.

The key parts of the design are the following.

== The Annotations ==

In `hsSyn/ApiAnnotation.hs`

```lang=haskell
type ApiAnns = (Map.Map ApiAnnKey SrcSpan, Map.Map SrcSpan [Located Token])

type ApiAnnKey = (SrcSpan,AnnKeywordId)

-- ---------------------------------------------------------------------

-- | Retrieve an annotation based on the @SrcSpan@ of the annotated AST
-- element, and the known type of the annotation.
getAnnotation :: ApiAnns -> SrcSpan -> AnnKeywordId -> Maybe SrcSpan
getAnnotation (anns,_) span ann = Map.lookup (span,ann) anns

-- |Retrieve the comments allocated to the current @SrcSpan@
getAnnotationComments :: ApiAnns -> SrcSpan -> [Located Token]
getAnnotationComments (_,anns) span =
  case Map.lookup span anns of
    Just cs -> cs
    Nothing -> []

-- | Note: in general the names of these are taken from the
-- corresponding token, unless otherwise noted
data AnnKeywordId
         = AnnAs
         | AnnBang
         | AnnClass
         | AnnClose -- ^ } or ] or ) or #) etc
         | AnnComma
         | AnnDarrow
         | AnnData
         | AnnDcolon
         ....
```

== Capturing in the lexer/parser ==

The annotations are captured in the lexer / parser by extending PState to include a field

In `parser/Lexer.x`

```lang=haskell
data PState = PState {
        ....
        annotations :: [(ApiAnnKey,SrcSpan)]
        -- Annotations giving the locations of 'noise' tokens in the
        -- source, so that users of the GHC API can do source to
        -- source conversions.
     }
```

The lexer exposes a helper function to add an annotation

```lang=haskell
addAnnotation :: SrcSpan -> Ann -> SrcSpan -> P ()
addAnnotation l a v = P $ \s -> POk s {
  annotations = ((AK l a), v) : annotations s
  } ()

```

The parser also has some helper functions of the form

```lang=haskell
type MaybeAnn = Maybe (SrcSpan -> P ())

gl = getLoc
gj x = Just (gl x)

ams :: Located a -> [MaybeAnn] -> P (Located a)
ams a@(L l _) bs = (mapM_ (\a -> a l) $ catMaybes bs) >> return a
```

This allows annotations to be captured in the parser by means of

```
ctypedoc :: { LHsType RdrName }
        : 'forall' tv_bndrs '.' ctypedoc {% hintExplicitForall (getLoc $1) >>
                                            ams (LL $ mkExplicitHsForAllTy $2 (noLoc []) $4)
                                                [mj AnnForall $1,mj AnnDot $3] }
        | context '=>' ctypedoc         {% ams (LL $ mkQualifiedHsForAllTy   $1 $3)
                                               [mj AnnDarrow $2] }
        | ipvar '::' type               {% ams (LL (HsIParamTy (unLoc $1) $3))
                                               [mj AnnDcolon $2] }
        | typedoc                       { $1 }
```

== Parse result ==

```lang-haskell
data HsParsedModule = HsParsedModule {
    hpm_module    :: Located (HsModule RdrName),
    hpm_src_files :: [FilePath],
       -- ^ extra source files (e.g. from #includes).  The lexer collects
       -- these from '# <file> <line>' pragmas, which the C preprocessor
       -- leaves behind.  These files and their timestamps are stored in
       -- the .hi file, so that we can force recompilation if any of
       -- them change (#3589)
    hpm_annotations :: ApiAnns
  }

-- | The result of successful parsing.
data ParsedModule =
  ParsedModule { pm_mod_summary   :: ModSummary
               , pm_parsed_source :: ParsedSource
               , pm_extra_src_files :: [FilePath]
               , pm_annotations :: ApiAnns }
```

This diff depends on D426

Test Plan: sh ./validate

Reviewers: austin, simonpj, Mikolaj

Reviewed By: simonpj, Mikolaj

Subscribers: Mikolaj, goldfire, thomie, carter

Differential Revision: https://phabricator.haskell.org/D438

GHC Trac Issues: #9628
parent 7927658e
......@@ -246,6 +246,9 @@ Note that (Foo a) might not be an instance of Ord.
\begin{code}
-- | A data constructor
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnClose','ApiAnnotation.AnnComma'
data DataCon
= MkData {
dcName :: Name, -- This is the name of the *source data con*
......
......@@ -331,6 +331,7 @@ Library
OptCoercion
Parser
RdrHsSyn
ApiAnnotation
ForeignCall
PrelInfo
PrelNames
......
......@@ -537,6 +537,7 @@ compiler_stage2_dll0_MODULES = \
InstEnv \
Kind \
Lexeme \
ApiAnnotation \
ListSetOps \
Literal \
LoadIface \
......@@ -599,6 +600,7 @@ ifeq "$(GhcWithInterpreter)" "YES"
# These files are reacheable from DynFlags
# only by GHCi-enabled code (see #9552)
compiler_stage2_dll0_MODULES += \
ApiAnnotation \
Bitmap \
BlockId \
ByteCodeAsm \
......
......@@ -119,6 +119,13 @@ data HsBindLR idL idR
-- But note that the form @f :: a->a = ...@
-- parses as a pattern binding, just like
-- @(f :: a -> a) = ... @
--
-- 'ApiAnnotation.AnnKeywordId's
--
-- - 'ApiAnnotation.AnnFunId', attached to each element of fun_matches
--
-- - 'ApiAnnotation.AnnEqual','ApiAnnotation.AnnWhere',
-- 'ApiAnnotation.AnnOpen','ApiAnnotation.AnnClose',
FunBind {
fun_id :: Located idL,
......@@ -129,10 +136,12 @@ data HsBindLR idL idR
fun_co_fn :: HsWrapper, -- ^ Coercion from the type of the MatchGroup to the type of
-- the Id. Example:
--
-- @
-- f :: Int -> forall a. a -> a
-- f x y = y
-- @
--
-- Then the MatchGroup will have type (Int -> a' -> a')
-- (with a free type variable a'). The coercion will take
-- a CoreExpr of this type and convert it to a CoreExpr of
......@@ -150,6 +159,10 @@ data HsBindLR idL idR
-- | The pattern is never a simple variable;
-- That case is done by FunBind
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnBang',
-- 'ApiAnnotation.AnnEqual','ApiAnnotation.AnnWhere',
-- 'ApiAnnotation.AnnOpen','ApiAnnotation.AnnClose',
| PatBind {
pat_lhs :: LPat idL,
pat_rhs :: GRHSs idR (LHsExpr idR),
......@@ -183,6 +196,9 @@ data HsBindLR idL idR
}
| PatSynBind (PatSynBind idL idR)
-- ^ - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnPattern',
-- 'ApiAnnotation.AnnLarrow','ApiAnnotation.AnnWhere'
-- 'ApiAnnotation.AnnOpen','ApiAnnotation.AnnClose'
deriving (Typeable)
deriving instance (DataId idL, DataId idR)
......@@ -525,12 +541,16 @@ isEmptyIPBinds :: HsIPBinds id -> Bool
isEmptyIPBinds (IPBinds is ds) = null is && isEmptyTcEvBinds ds
type LIPBind id = Located (IPBind id)
-- ^ May have 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnSemi' when in a
-- list
-- | Implicit parameter bindings.
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnEqual'
{- These bindings start off as (Left "x") in the parser and stay
that way until after type-checking when they are replaced with
(Right d), where "d" is the name of the dictionary holding the
evidene for the implicit parameter. -}
evidence for the implicit parameter. -}
data IPBind id
= IPBind (Either HsIPName id) (LHsExpr id)
deriving (Typeable)
......@@ -566,6 +586,9 @@ type LSig name = Located (Sig name)
data Sig name
= -- | An ordinary type signature
-- @f :: Num a => a -> a@
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnDcolon',
-- 'ApiAnnotation.AnnComma'
TypeSig [Located name] (LHsType name)
-- | A pattern synonym type signature
......@@ -587,11 +610,15 @@ data Sig name
-- the desired Id itself, replete with its name, type
-- and IdDetails. Otherwise it's just like a type
-- signature: there should be an accompanying binding
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnDefault',
-- 'ApiAnnotation.AnnDcolon','ApiAnnotation.AnnDotdot'
| IdSig Id
-- | An ordinary fixity declaration
--
-- > infixl *** 8
-- > infixl 8 ***
--
| FixSig (FixitySig name)
......@@ -599,6 +626,10 @@ data Sig name
--
-- > {#- INLINE f #-}
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnClose','ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnVal','ApiAnnotation.AnnTilde',
-- 'ApiAnnotation.AnnClose'
| InlineSig (Located name) -- Function name
InlinePragma -- Never defaultInlinePragma
......@@ -606,6 +637,10 @@ data Sig name
--
-- > {-# SPECIALISE f :: Int -> Int #-}
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnOpen','ApiAnnotation.AnnTilde',
-- 'ApiAnnotation.AnnVal','ApiAnnotation.AnnClose',
-- 'ApiAnnotation.AnnDcolon','ApiAnnotation.AnnClose',
| SpecSig (Located name) -- Specialise a function or datatype ...
[LHsType name] -- ... to these types
InlinePragma -- The pragma on SPECIALISE_INLINE form.
......@@ -618,11 +653,18 @@ data Sig name
--
-- (Class tys); should be a specialisation of the
-- current instance declaration
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnInstance','ApiAnnotation.AnnClose'
| SpecInstSig (LHsType name)
-- | A minimal complete definition pragma
--
-- > {-# MINIMAL a | (b, c | (d | e)) #-}
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnVbar','ApiAnnotation.AnnComma',
-- 'ApiAnnotation.AnnClose'
| MinimalSig (BooleanFormula (Located name))
deriving (Typeable)
......
This diff is collapsed.
This diff is collapsed.
......@@ -30,6 +30,10 @@ import Data.Data
One per \tr{import} declaration in a module.
\begin{code}
type LImportDecl name = Located (ImportDecl name)
-- ^ When in a list this may have
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnSemi'
--
-- | A single Haskell @import@ declaration.
data ImportDecl name
......@@ -42,8 +46,23 @@ data ImportDecl name
ideclImplicit :: Bool, -- ^ True => implicit import (of Prelude)
ideclAs :: Maybe ModuleName, -- ^ as Module
ideclHiding :: Maybe (Bool, Located [LIE name])
}
-- ^ (True => hiding, names)
} deriving (Data, Typeable)
--
-- 'ApiAnnotation.AnnKeywordId's
--
-- - 'ApiAnnotation.AnnImport'
--
-- - 'ApiAnnotation.AnnOpen', 'ApiAnnotation.AnnClose' for ideclSource
--
-- - 'ApiAnnotation.AnnSafe','ApiAnnotation.AnnQualified',
-- 'ApiAnnotation.AnnPackageName','ApiAnnotation.AnnAs',
--
-- - 'ApiAnnotation.AnnHiding','ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnClose' attached
-- to location in ideclHiding
deriving (Data, Typeable)
simpleImportDecl :: ModuleName -> ImportDecl name
simpleImportDecl mn = ImportDecl {
......@@ -102,15 +121,34 @@ instance (OutputableBndr name, HasOccName name) => Outputable (ImportDecl name)
\begin{code}
type LIE name = Located (IE name)
-- ^ When in a list this may have
--
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnComma'
--
-- | Imported or exported entity.
data IE name
= IEVar (Located name)
-- ^ - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnPattern',
-- 'ApiAnnotation.AnnType'
| IEThingAbs name -- ^ Class/Type (can't tell)
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnPattern',
-- 'ApiAnnotation.AnnType'
| IEThingAll (Located name) -- ^ Class/Type plus all methods/constructors
--
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnDotdot','ApiAnnotation.AnnClose',
-- 'ApiAnnotation.AnnType'
| IEThingWith (Located name) [Located name]
-- ^ Class/Type plus some methods/constructors
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnClose',
-- 'ApiAnnotation.AnnComma',
-- 'ApiAnnotation.AnnType'
| IEModuleContents (Located ModuleName) -- ^ (Export Only)
--
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnModule'
| IEGroup Int HsDocString -- ^ Doc section heading
| IEDoc HsDocString -- ^ Some documentation
| IEDocNamed String -- ^ Reference to named doc
......
......@@ -61,6 +61,7 @@ type OutPat id = LPat id -- No 'In' constructors
type LPat id = Located (Pat id)
-- | - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnBang'
data Pat id
= ------------ Simple patterns ---------------
WildPat (PostTc id Type) -- Wild card
......@@ -217,6 +218,7 @@ data HsRecFields id arg -- A bunch of record fields
-- and the remainder being 'filled in' implicitly
type LHsRecField id arg = Located (HsRecField id arg)
-- | - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnEqual',
data HsRecField id arg = HsRecField {
hsRecFieldId :: Located id,
hsRecFieldArg :: arg, -- Filled in by renamer
......
......@@ -72,6 +72,10 @@ data HsModule name
--
-- - @Just [...]@: as you would expect...
--
--
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnOpen'
-- ,'ApiAnnotation.AnnClose'
--
hsmodImports :: [LImportDecl name],
-- ^ We snaffle interesting stuff out of the imported interfaces early
-- on, adding that info to TyDecls/etc; so this list is often empty,
......@@ -80,9 +84,26 @@ data HsModule name
-- ^ Type, class, value, and interface signature decls
hsmodDeprecMessage :: Maybe (Located WarningTxt),
-- ^ reason\/explanation for warning/deprecation of this module
--
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnOpen'
-- ,'ApiAnnotation.AnnClose'
--
hsmodHaddockModHeader :: Maybe LHsDocString
-- ^ Haddock module info and description, unparsed
} deriving (Typeable)
--
-- - 'ApiAnnotation.AnnKeywordId's : 'ApiAnnotation.AnnOpen'
-- ,'ApiAnnotation.AnnClose'
--
}
-- ^ 'ApiAnnotation.AnnKeywordId's
--
-- - 'ApiAnnotation.AnnModule','ApiAnnotation.AnnWhere'
--
-- - 'ApiAnnotation.AnnOpen','ApiAnnotation.AnnSemi',
-- 'ApiAnnotation.AnnClose' for explicit braces and semi around
-- hsmodImports,hsmodDecls if this style is used.
--
deriving (Typeable)
deriving instance (DataId name) => Data (HsModule name)
\end{code}
......
......@@ -136,6 +136,8 @@ type LHsContext name = Located (HsContext name)
type HsContext name = [LHsType name]
type LHsType name = Located (HsType name)
-- ^ May have 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnComma' when
-- in a list
type HsKind name = HsType name
type LHsKind name = Located (HsKind name)
......@@ -199,6 +201,9 @@ data HsTyVarBndr name
| KindedTyVar
name
(LHsKind name) -- The user-supplied kind signature
-- ^
-- - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnDcolon', 'ApiAnnotation.AnnClose'
deriving (Typeable)
deriving instance (DataId name) => Data (HsTyVarBndr name)
......@@ -211,6 +216,10 @@ isHsKindedTyVar (KindedTyVar {}) = True
hsTvbAllKinded :: LHsTyVarBndrs name -> Bool
hsTvbAllKinded = all (isHsKindedTyVar . unLoc) . hsQTvBndrs
-- | - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnDcolon',
-- 'ApiAnnotation.AnnTilde','ApiAnnotation.AnnRarrow',
-- 'ApiAnnotation.AnnOpen','ApiAnnotation.AnnClose',
-- 'ApiAnnotation.AnnComma'
data HsType name
= HsForAllTy HsExplicitFlag -- Renamer leaves this flag unchanged, to record the way
-- the user wrote it originally, so that the printer can
......@@ -218,7 +227,8 @@ data HsType name
(LHsTyVarBndrs name)
(LHsContext name)
(LHsType name)
-- ^ - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnForall',
-- 'ApiAnnotation.AnnDot','ApiAnnotation.AnnDarrow'
| HsTyVar name -- Type variable, type constructor, or data constructor
-- see Note [Promotions (HsTyVar)]
......@@ -399,10 +409,13 @@ data HsTupleSort = HsUnboxedTuple
data HsExplicitFlag = Qualified | Implicit | Explicit deriving (Data, Typeable)
type LConDeclField name = Located (ConDeclField name)
-- ^ May have 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnComma' when
-- in a list
data ConDeclField name -- Record fields have Haddoc docs on them
= ConDeclField { cd_fld_names :: [Located name],
cd_fld_type :: LBangType name,
cd_fld_doc :: Maybe LHsDocString }
-- ^ - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnDcolon'
deriving (Typeable)
deriving instance (DataId name) => Data (ConDeclField name)
......
......@@ -130,10 +130,10 @@ mkSimpleMatch pats rhs
(pat:_) -> combineSrcSpans (getLoc pat) (getLoc rhs)
unguardedGRHSs :: Located (body id) -> GRHSs id (Located (body id))
unguardedGRHSs rhs = GRHSs (unguardedRHS rhs) emptyLocalBinds
unguardedGRHSs rhs@(L loc _) = GRHSs (unguardedRHS loc rhs) emptyLocalBinds
unguardedRHS :: Located (body id) -> [LGRHS id (Located (body id))]
unguardedRHS rhs@(L loc _) = [L loc (GRHS [] rhs)]
unguardedRHS :: SrcSpan -> Located (body id) -> [LGRHS id (Located (body id))]
unguardedRHS loc rhs = [L loc (GRHS [] rhs)]
mkMatchGroup :: Origin -> [LMatch RdrName (Located (body RdrName))]
-> MatchGroup RdrName (Located (body RdrName))
......@@ -570,7 +570,7 @@ mk_easy_FunBind loc fun pats expr
mkMatch :: [LPat id] -> LHsExpr id -> HsLocalBinds id -> LMatch id (LHsExpr id)
mkMatch pats expr binds
= noLoc (Match (map paren pats) Nothing
(GRHSs (unguardedRHS expr) binds))
(GRHSs (unguardedRHS noSrcSpan expr) binds))
where
paren lp@(L l p) | hsPatNeedsParens p = L l (ParPat lp)
| otherwise = lp
......
......@@ -243,6 +243,10 @@ module GHC (
-- * Pure interface to the parser
parser,
-- * API Annotations
ApiAnns,AnnKeywordId(..),AnnotationComment(..),
getAnnotation, getAnnotationComments,
-- * Miscellaneous
--sessionHscEnv,
cyclicModuleErr,
......@@ -313,6 +317,7 @@ import Maybes ( expectJust )
import FastString
import qualified Parser
import Lexer
import ApiAnnotation
import System.Directory ( doesFileExist )
import Data.Maybe
......@@ -716,7 +721,9 @@ class TypecheckedMod m => DesugaredMod m where
data ParsedModule =
ParsedModule { pm_mod_summary :: ModSummary
, pm_parsed_source :: ParsedSource
, pm_extra_src_files :: [FilePath] }
, pm_extra_src_files :: [FilePath]
, pm_annotations :: ApiAnns }
-- See Note [Api annotations] in ApiAnnotation.hs
instance ParsedMod ParsedModule where
modSummary m = pm_mod_summary m
......@@ -805,7 +812,9 @@ parseModule ms = do
hsc_env <- getSession
let hsc_env_tmp = hsc_env { hsc_dflags = ms_hspp_opts ms }
hpm <- liftIO $ hscParse hsc_env_tmp ms
return (ParsedModule ms (hpm_module hpm) (hpm_src_files hpm))
return (ParsedModule ms (hpm_module hpm) (hpm_src_files hpm)
(hpm_annotations hpm))
-- See Note [Api annotations] in ApiAnnotation.hs
-- | Typecheck and rename a parsed module.
--
......@@ -818,7 +827,8 @@ typecheckModule pmod = do
(tc_gbl_env, rn_info)
<- liftIO $ hscTypecheckRename hsc_env_tmp ms $
HsParsedModule { hpm_module = parsedSource pmod,
hpm_src_files = pm_extra_src_files pmod }
hpm_src_files = pm_extra_src_files pmod,
hpm_annotations = pm_annotations pmod }
details <- liftIO $ makeSimpleDetails hsc_env_tmp tc_gbl_env
safe <- liftIO $ finalSafeMode (ms_hspp_opts ms) tc_gbl_env
return $
......
......@@ -163,7 +163,7 @@ lazyGetToks dflags filename handle = do
lazyLexBuf :: Handle -> PState -> Bool -> Int -> IO [Located Token]
lazyLexBuf handle state eof size = do
case unP (lexer return) state of
case unP (lexer False return) state of
POk state' t -> do
-- pprTrace "lazyLexBuf" (text (show (buffer state'))) (return ())
if atEnd (buffer state') && not eof
......@@ -197,7 +197,7 @@ getToks dflags filename buf = lexAll (pragState dflags buf loc)
where
loc = mkRealSrcLoc (mkFastString filename) 1 1
lexAll state = case unP (lexer return) state of
lexAll state = case unP (lexer False return) state of
POk _ t@(L _ ITeof) -> [t]
POk state' t -> t : lexAll state'
_ -> [L (RealSrcSpan (last_loc state)) ITeof]
......
......@@ -164,6 +164,7 @@ import Data.Maybe
import Data.IORef
import System.FilePath as FilePath
import System.Directory
import qualified Data.Map as Map
#include "HsVersions.h"
......@@ -372,7 +373,11 @@ hscParse' mod_summary = do
return HsParsedModule {
hpm_module = rdr_module,
hpm_src_files = srcs2
hpm_src_files = srcs2,
hpm_annotations
= (Map.fromListWith (++) $ annotations pst,
Map.fromList $ ((noSrcSpan,comment_q pst)
:(annotations_comments pst)))
}
-- XXX: should this really be a Maybe X? Check under which circumstances this
......
......@@ -145,6 +145,7 @@ import Id
import IdInfo ( IdDetails(..) )
import Type
import ApiAnnotation ( ApiAnns )
import Annotations ( Annotation, AnnEnv, mkAnnEnv, plusAnnEnv )
import Class
import TyCon
......@@ -2604,12 +2605,14 @@ instance Binary IfaceTrustInfo where
\begin{code}
data HsParsedModule = HsParsedModule {
hpm_module :: Located (HsModule RdrName),
hpm_src_files :: [FilePath]
hpm_src_files :: [FilePath],
-- ^ extra source files (e.g. from #includes). The lexer collects
-- these from '# <file> <line>' pragmas, which the C preprocessor
-- leaves behind. These files and their timestamps are stored in
-- the .hi file, so that we can force recompilation if any of
-- them change (#3589)
hpm_annotations :: ApiAnns
-- See note [Api annotations] in ApiAnnotation.hs
}
\end{code}
......
{-# LANGUAGE DeriveDataTypeable #-}
module ApiAnnotation (
getAnnotation,
getAnnotationComments,
ApiAnns,
ApiAnnKey,
AnnKeywordId(..),
AnnotationComment(..),
LRdrName -- Exists for haddocks only
) where
import RdrName
import Outputable
import SrcLoc
import qualified Data.Map as Map
import Data.Data
{- Note [Api annotations]
~~~~~~~~~~~~~~~~~~~~~~
In order to do source to source conversions using the GHC API, the
locations of all elements of the original source needs to be tracked.
The includes keywords such as 'let' / 'in' / 'do' etc as well as
punctuation such as commas and braces, and also comments.
These are captured in a structure separate from the parse tree, and
returned in the pm_annotations field of the ParsedModule type.
The non-comment annotations are stored indexed to the SrcSpan of the
AST element containing them, together with a AnnKeywordId value
identifying the specific keyword being captured.
> type ApiAnnKey = (SrcSpan,AnnKeywordId)
>
> Map.Map ApiAnnKey SrcSpan
So
> let X = 1 in 2 *x
would result in the AST element
L span (HsLet (binds for x = 1) (2 * x))
and the annotations
(span,AnnLet) having the location of the 'let' keyword
(span,AnnIn) having the location of the 'in' keyword
The comments are indexed to the SrcSpan of the lowest AST element
enclosing them
> Map.Map SrcSpan [Located AnnotationComment]
So the full ApiAnns type is
> type ApiAnns = ( Map.Map ApiAnnKey SrcSpan
> , Map.Map SrcSpan [Located AnnotationComment])
This is done in the lexer / parser as follows.
The PState variable in the lexer has the following variables added
> annotations :: [(ApiAnnKey,SrcSpan)],
> comment_q :: [Located Token],
> annotations_comments :: [(SrcSpan,[Located AnnotationComment])]
The first and last store the values that end up in the ApiAnns value
at the end via Map.fromList
The comment_q captures comments as they are seen in the token stream,
so that when they are ready to be allocated via the parser they are
available.
The parser interacts with the lexer using the function
> addAnnotation :: SrcSpan -> AnnKeywordId -> SrcSpan -> P ()
which takes the AST element SrcSpan, the annotation keyword and the
target SrcSpan.
This adds the annotation to the `annotations` field of `PState` and
transfers any comments in `comment_q` to the `annotations_comments`
field.
Parser
------
The parser implements a number of helper types and methods for the
capture of annotations
> type AddAnn = (SrcSpan -> P ())
>
> mj :: AnnKeywordId -> Located e -> (SrcSpan -> P ())
> mj a l = (\s -> addAnnotation s a (gl l))
AddAnn represents the addition of an annotation a to a provided
SrcSpan, and `mj` constructs an AddAnn value.
> ams :: Located a -> [AddAnn] -> P (Located a)
> ams a@(L l _) bs = (mapM_ (\a -> a l) bs) >> return a
So the production in Parser.y for the HsLet AST element is
| 'let' binds 'in' exp {% ams (sLL $1 $> $ HsLet (snd $ unLoc $2) $4)
(mj AnnLet $1:mj AnnIn $3
:(fst $ unLoc $2)) }
This adds an AnnLet annotation for 'let', an AnnIn for 'in', as well
as any annotations that may arise in the binds. This will include open
and closing braces if they are used to delimit the let expressions.
-}
-- ---------------------------------------------------------------------
type ApiAnns = ( Map.Map ApiAnnKey [SrcSpan]
, Map.Map SrcSpan [Located AnnotationComment])
type ApiAnnKey = (SrcSpan,AnnKeywordId)
-- | Retrieve a list of annotation 'SrcSpan's based on the 'SrcSpan'
-- of the annotated AST element, and the known type of the annotation.
getAnnotation :: ApiAnns -> SrcSpan -> AnnKeywordId -> [SrcSpan]
getAnnotation (anns,_) span ann
= case Map.lookup (span,ann) anns of
Nothing -> []
Just ss -> ss
-- |Retrieve the comments allocated to the current 'SrcSpan'
getAnnotationComments :: ApiAnns -> SrcSpan -> [Located AnnotationComment]
getAnnotationComments (_,anns) span =
case Map.lookup span anns of
Just cs -> cs
Nothing -> []
-- --------------------------------------------------------------------
-- | Note: in general the names of these are taken from the
-- corresponding token, unless otherwise noted
-- See note [Api annotations] above for details of the usage
data AnnKeywordId
= AnnAs
| AnnAt
| AnnBang -- ^ '!'
| AnnBy
| AnnCase -- ^ case or lambda case
| AnnClass
| AnnClose -- ^ '}' or ']' or ')' or '#)' etc
| AnnColon
| AnnComma
| AnnDarrow -- ^ '=>'
| AnnData
| AnnDcolon -- ^ '::'
| AnnDefault
| AnnDeriving
| AnnDo
| AnnDot -- ^ '.'
| AnnDotdot -- ^ '..'
| AnnElse
| AnnEqual
| AnnExport
| AnnFamily
| AnnForall
| AnnForeign
| AnnFunId -- ^ for function name in matches where there are
-- multiple equations for the function.
| AnnGroup
| AnnHeader -- ^ for CType
| AnnHiding
| AnnIf
| AnnImport
| AnnIn
| AnnInstance
| AnnLam
| AnnLarrow -- ^ '<-'
| AnnLet
| AnnMdo
| AnnMinus -- ^ '-'
| AnnModule
| AnnNewtype
| AnnOf
| AnnOpen -- ^ '{' or '[' or '(' or '(#' etc
| AnnPackageName
| AnnPattern
| AnnProc
| AnnQualified
| AnnRarrow -- ^ '->'
| AnnRec
| AnnRole
| AnnSafe
| AnnSemi -- ^ ';'
| AnnThen
| AnnTilde -- ^ '~'
| AnnTildehsh -- ^ '~#'
| AnnType
| AnnUsing
| AnnVal -- ^ e.g. INTEGER
| AnnVbar -- ^ '|'
| AnnWhere
| Annlarrowtail -- ^ '-<'
| Annrarrowtail -- ^ '->'
| AnnLarrowtail -- ^ '-<<'
| AnnRarrowtail -- ^ '>>-'
| AnnEofPos
deriving (Eq,Ord,Data,Typeable,Show)
instance Outputable AnnKeywordId where
ppr x = text (show x)
-- ---------------------------------------------------------------------
data AnnotationComment =
-- Documentation annotations
AnnDocCommentNext String -- ^ something beginning '-- |'
| AnnDocCommentPrev String -- ^ something beginning '-- ^'
| AnnDocCommentNamed String -- ^ something beginning '-- $'
| AnnDocSection Int String -- ^ a section heading
| AnnDocOptions String -- ^ doc options (prune, ignore-exports, etc)
| AnnDocOptionsOld String -- ^ doc options declared "-- # ..."-style
| AnnLineComment String -- ^ comment starting by "--"
| AnnBlockComment String -- ^ comment in {- -}
deriving (Eq,Ord,Data,Typeable,Show)
-- Note: these are based on the Token versions, but the Token type is
-- defined in Lexer.x and bringing it in here would create a loop
-- | - 'ApiAnnotation.AnnKeywordId' : 'ApiAnnotation.AnnOpen',
-- 'ApiAnnotation.AnnClose','ApiAnnotation.AnnComma',
-- 'ApiAnnotation.AnnRarrow','ApiAnnotation.AnnTildehsh',
-- 'ApiAnnotation.AnnTilde'
-- - May have 'ApiAnnotation.AnnComma' when in a list
type LRdrName = Located RdrName
......@@ -43,6 +43,7 @@
{
-- XXX The above flags turn off warnings in the generated code:
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# OPTIONS_GHC -fno-warn-unused-binds #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
......@@ -71,7 +72,8 @@ module Lexer (
patternSynonymsEnabled,
sccProfilingOn, hpcEnabled,
addWarning,
lexTokenStream
lexTokenStream,
addAnnotation