Commit 63489d40 authored by Isaac Dupree's avatar Isaac Dupree

remove Haddock-lexing/parsing/renaming from GHC

parent 8a25c54e
......@@ -324,8 +324,6 @@ Library
SysTools
TidyPgm
Ctype
HaddockLex
HaddockParse
HaddockUtils
LexCore
Lexer
......
......@@ -101,7 +101,7 @@ data HsDecl id
| AnnD (AnnDecl id)
| RuleD (RuleDecl id)
| SpliceD (SpliceDecl id)
| DocD (DocDecl id)
| DocD (DocDecl)
-- NB: all top-level fixity decls are contained EITHER
......@@ -136,7 +136,7 @@ data HsGroup id
hs_annds :: [LAnnDecl id],
hs_ruleds :: [LRuleDecl id],
hs_docs :: [LDocDecl id]
hs_docs :: [LDocDecl]
}
emptyGroup, emptyRdrGroup, emptyRnGroup :: HsGroup a
......@@ -476,7 +476,7 @@ data TyClDecl name
-- only 'TyFamily' and
-- 'TySynonym'; the
-- latter for defaults
tcdDocs :: [LDocDecl name] -- ^ Haddock docs
tcdDocs :: [LDocDecl] -- ^ Haddock docs
}
data NewOrData
......@@ -716,7 +716,7 @@ data ConDecl name
, con_res :: ResType name
-- ^ Result type of the constructor
, con_doc :: Maybe (LHsDoc name)
, con_doc :: Maybe LHsDocString
-- ^ A possible Haddock comment.
, con_old_rec :: Bool
......@@ -1000,19 +1000,19 @@ instance OutputableBndr name => Outputable (RuleBndr name) where
\begin{code}
type LDocDecl name = Located (DocDecl name)
type LDocDecl = Located (DocDecl)
data DocDecl name
= DocCommentNext (HsDoc name)
| DocCommentPrev (HsDoc name)
| DocCommentNamed String (HsDoc name)
| DocGroup Int (HsDoc name)
data DocDecl
= DocCommentNext HsDocString
| DocCommentPrev HsDocString
| DocCommentNamed String HsDocString
| DocGroup Int HsDocString
-- Okay, I need to reconstruct the document comments, but for now:
instance Outputable (DocDecl name) where
instance Outputable DocDecl where
ppr _ = text "<document comment>"
docDeclDoc :: DocDecl name -> HsDoc name
docDeclDoc :: DocDecl -> HsDocString
docDeclDoc (DocCommentNext d) = d
docDeclDoc (DocCommentPrev d) = d
docDeclDoc (DocCommentNamed _ d) = d
......
module HsDoc (
HsDoc(..),
LHsDoc,
docAppend,
docParagraph,
HsDocString(..),
LHsDocString,
ppr_mbDoc
) where
......@@ -10,87 +8,17 @@ module HsDoc (
import Outputable
import SrcLoc
import FastString
import Data.Char (isSpace)
data HsDoc id
= DocEmpty
| DocAppend (HsDoc id) (HsDoc id)
| DocString String
| DocParagraph (HsDoc id)
| DocIdentifier [id]
| DocModule String
| DocEmphasis (HsDoc id)
| DocMonospaced (HsDoc id)
| DocUnorderedList [HsDoc id]
| DocOrderedList [HsDoc id]
| DocDefList [(HsDoc id, HsDoc id)]
| DocCodeBlock (HsDoc id)
| DocURL String
| DocPic String
| DocAName String
newtype HsDocString = HsDocString FastString
deriving (Eq, Show)
type LHsDoc a = Located (HsDoc a)
type LHsDocString = Located HsDocString
instance Outputable (HsDoc a) where
instance Outputable HsDocString where
ppr _ = text "<document comment>"
ppr_mbDoc :: Maybe (LHsDoc a) -> SDoc
ppr_mbDoc :: Maybe LHsDocString -> SDoc
ppr_mbDoc (Just doc) = ppr doc
ppr_mbDoc Nothing = empty
-- used to make parsing easier; we group the list items later
docAppend :: HsDoc id -> HsDoc id -> HsDoc id
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2)
= DocUnorderedList (ds1++ds2)
docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d)
= DocAppend (DocUnorderedList (ds1++ds2)) d
docAppend (DocOrderedList ds1) (DocOrderedList ds2)
= DocOrderedList (ds1++ds2)
docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d)
= DocAppend (DocOrderedList (ds1++ds2)) d
docAppend (DocDefList ds1) (DocDefList ds2)
= DocDefList (ds1++ds2)
docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d)
= DocAppend (DocDefList (ds1++ds2)) d
docAppend DocEmpty d = d
docAppend d DocEmpty = d
docAppend d1 d2
= DocAppend d1 d2
-- again to make parsing easier - we spot a paragraph whose only item
-- is a DocMonospaced and make it into a DocCodeBlock
docParagraph :: HsDoc id -> HsDoc id
docParagraph (DocMonospaced p)
= DocCodeBlock (docCodeBlock p)
docParagraph (DocAppend (DocString s1) (DocMonospaced p))
| all isSpace s1
= DocCodeBlock (docCodeBlock p)
docParagraph (DocAppend (DocString s1)
(DocAppend (DocMonospaced p) (DocString s2)))
| all isSpace s1 && all isSpace s2
= DocCodeBlock (docCodeBlock p)
docParagraph (DocAppend (DocMonospaced p) (DocString s2))
| all isSpace s2
= DocCodeBlock (docCodeBlock p)
docParagraph p
= DocParagraph p
-- Drop trailing whitespace from @..@ code blocks. Otherwise this:
--
-- -- @
-- -- foo
-- -- @
--
-- turns into (DocCodeBlock "\nfoo\n ") which when rendered in HTML
-- gives an extra vertical space after the code block. The single space
-- on the final line seems to trigger the extra vertical space.
--
docCodeBlock :: HsDoc id -> HsDoc id
docCodeBlock (DocString s)
= DocString (reverse $ dropWhile (`elem` " \t") $ reverse s)
docCodeBlock (DocAppend l r)
= DocAppend l (docCodeBlock r)
docCodeBlock d = d
......@@ -16,7 +16,7 @@ HsImpExp: Abstract syntax: imports, exports, interfaces
module HsImpExp where
import Module ( ModuleName )
import HsDoc ( HsDoc )
import HsDoc ( HsDocString )
import Outputable
import FastString
......@@ -88,8 +88,8 @@ data IE name
| IEThingAll name -- ^ Class/Type plus all methods/constructors
| IEThingWith name [name] -- ^ Class/Type plus some methods/constructors
| IEModuleContents ModuleName -- ^ (Export Only)
| IEGroup Int (HsDoc name) -- ^ Doc section heading
| IEDoc (HsDoc name) -- ^ Some documentation
| IEGroup Int HsDocString -- ^ Doc section heading
| IEDoc HsDocString -- ^ Some documentation
| IEDocNamed String -- ^ Reference to named doc
\end{code}
......
......@@ -22,9 +22,6 @@ module HsSyn (
Fixity,
HsModule(..), HsExtCore(..),
HaddockModInfo(..),
emptyHaddockModInfo,
) where
-- friends:
......@@ -71,26 +68,10 @@ data HsModule name
-- ^ Type, class, value, and interface signature decls
hsmodDeprecMessage :: Maybe WarningTxt,
-- ^ reason\/explanation for warning/deprecation of this module
hsmodHaddockModInfo :: HaddockModInfo name,
-- ^ Haddock module info
hsmodHaddockModDescr :: Maybe (HsDoc name)
-- ^ Haddock module description
hsmodHaddockModHeader :: Maybe LHsDocString
-- ^ Haddock module info and description, unparsed
}
data HaddockModInfo name = HaddockModInfo {
hmi_description :: Maybe (HsDoc name),
hmi_portability :: Maybe String,
hmi_stability :: Maybe String,
hmi_maintainer :: Maybe String
}
emptyHaddockModInfo :: HaddockModInfo a
emptyHaddockModInfo = HaddockModInfo {
hmi_description = Nothing,
hmi_portability = Nothing,
hmi_stability = Nothing,
hmi_maintainer = Nothing
}
data HsExtCore name -- Read from Foo.hcr
= HsExtCore
......@@ -108,10 +89,10 @@ instance Outputable Char where
instance (OutputableBndr name)
=> Outputable (HsModule name) where
ppr (HsModule Nothing _ imports decls _ _ mbDoc)
ppr (HsModule Nothing _ imports decls _ mbDoc)
= pp_mb mbDoc $$ pp_nonnull imports $$ pp_nonnull decls
ppr (HsModule (Just name) exports imports decls deprec _ mbDoc)
ppr (HsModule (Just name) exports imports decls deprec mbDoc)
= vcat [
pp_mb mbDoc,
case exports of
......
......@@ -157,7 +157,7 @@ data HsType name
| HsSpliceTy (HsSplice name)
| HsDocTy (LHsType name) (LHsDoc name) -- A documented type
| HsDocTy (LHsType name) LHsDocString -- A documented type
| HsBangTy HsBang (LHsType name) -- Bang-style type annotations
| HsRecTy [ConDeclField name] -- Only in data type declarations
......@@ -169,7 +169,7 @@ data HsExplicitForAll = Explicit | Implicit
data ConDeclField name -- Record fields have Haddoc docs on them
= ConDeclField { cd_fld_name :: Located name,
cd_fld_type :: LBangType name,
cd_fld_doc :: Maybe (LHsDoc name) }
cd_fld_doc :: Maybe LHsDocString }
-----------------------
......
......@@ -58,9 +58,6 @@ module GHC (
compileCoreToObj,
getModSummary,
-- * Parsing Haddock comments
parseHaddockComment,
-- * Inspecting the module structure of the program
ModuleGraph, ModSummary(..), ms_mod_name, ModLocation(..),
getModuleGraph,
......@@ -300,8 +297,6 @@ import StringBuffer ( StringBuffer, hGetStringBuffer, nextChar )
import Outputable
import BasicTypes
import Maybes ( expectJust, mapCatMaybes )
import HaddockParse
import HaddockLex ( tokenise )
import FastString
import Lexer
......@@ -625,15 +620,6 @@ setGlobalTypeScope ids
= modifySession $ \hscEnv ->
hscEnv{ hsc_global_type_env = extendTypeEnvWithIds emptyTypeEnv ids }
-- -----------------------------------------------------------------------------
-- Parsing Haddock comments
parseHaddockComment :: String -> Either String (HsDoc RdrName)
parseHaddockComment string =
case parseHaddockParagraphs (tokenise string) of
MyLeft x -> Left x
MyRight x -> Right x
-- -----------------------------------------------------------------------------
-- Loading the program
......@@ -1035,7 +1021,7 @@ instance DesugaredMod DesugaredModule where
type ParsedSource = Located (HsModule RdrName)
type RenamedSource = (HsGroup Name, [LImportDecl Name], Maybe [LIE Name],
Maybe (HsDoc Name), HaddockModInfo Name)
Maybe LHsDocString)
type TypecheckedSource = LHsBinds Id
-- NOTE:
......
......@@ -68,7 +68,7 @@ getImports dflags buf filename source_filename = do
then liftIO $ throwIO $ mkSrcErr errs
else
case rdr_module of
L _ (HsModule mb_mod _ imps _ _ _ _) ->
L _ (HsModule mb_mod _ imps _ _ _) ->
let
main_loc = mkSrcLoc (mkFastString source_filename) 1 0
mod = mb_mod `orElse` L (srcLocSpan main_loc) mAIN_NAME
......
......@@ -216,7 +216,7 @@ hscTypecheck mod_summary rdr_module = do
-- exception/signal an error.
type RenamedStuff =
(Maybe (HsGroup Name, [LImportDecl Name], Maybe [LIE Name],
Maybe (HsDoc Name), HaddockModInfo Name))
Maybe LHsDocString))
-- | Rename and typecheck a module, additionally returning the renamed syntax
hscTypecheckRename ::
......@@ -233,9 +233,8 @@ hscTypecheckRename mod_summary rdr_module = do
rn_info = do { decl <- tcg_rn_decls tc_result
; let imports = tcg_rn_imports tc_result
exports = tcg_rn_exports tc_result
doc = tcg_doc tc_result
hmi = tcg_hmi tc_result
; return (decl,imports,exports,doc,hmi) }
doc_hdr = tcg_doc_hdr tc_result
; return (decl,imports,exports,doc_hdr) }
return (tc_result, rn_info)
......
......@@ -26,7 +26,7 @@ import Data.Char
\begin{code}
ppSourceStats :: Bool -> Located (HsModule RdrName) -> SDoc
ppSourceStats short (L _ (HsModule _ exports imports ldecls _ _ _))
ppSourceStats short (L _ (HsModule _ exports imports ldecls _ _))
= (if short then hcat else vcat)
(map pp_val
[("ExportAll ", export_all), -- 1 if no export list
......
module HaddockLex ( Token(..), tokenise ) where
import RdrName
tokenise :: String -> [Token]
data Token
= TokPara
| TokNumber
| TokBullet
| TokDefStart
| TokDefEnd
| TokSpecial Char
| TokIdent [RdrName]
| TokString String
| TokURL String
| TokPic String
| TokEmphasis String
| TokAName String
| TokBirdTrack String
--
-- Haddock - A Haskell Documentation Tool
--
-- (c) Simon Marlow 2002
--
-- This file was modified and integrated into GHC by David Waern 2006
--
{
{-# OPTIONS -Wwarn -w #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
module HaddockLex (
Token(..),
tokenise
) where
import Lexer hiding (Token)
import Parser ( parseIdentifier )
import StringBuffer
import RdrName
import SrcLoc
import DynFlags
import Data.Char
import Numeric
import System.IO.Unsafe
}
$ws = $white # \n
$digit = [0-9]
$hexdigit = [0-9a-fA-F]
$special = [\"\@]
$alphanum = [A-Za-z0-9]
$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~]
:-
-- beginning of a paragraph
<0,para> {
$ws* \n ;
$ws* \> { begin birdtrack }
$ws* [\*\-] { token TokBullet `andBegin` string }
$ws* \[ { token TokDefStart `andBegin` def }
$ws* \( $digit+ \) { token TokNumber `andBegin` string }
$ws* { begin string }
}
-- beginning of a line
<line> {
$ws* \> { begin birdtrack }
$ws* \n { token TokPara `andBegin` para }
-- Here, we really want to be able to say
-- $ws* (\n | <eof>) { token TokPara `andBegin` para}
-- because otherwise a trailing line of whitespace will result in
-- a spurious TokString at the end of a docstring. We don't have <eof>,
-- though (NOW I realise what it was for :-). To get around this, we always
-- append \n to the end of a docstring.
() { begin string }
}
<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line }
<string,def> {
$special { strtoken $ \s -> TokSpecial (head s) }
\<\<.*\>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
\<.*\> { strtoken $ \s -> TokURL (init (tail s)) }
\#.*\# { strtoken $ \s -> TokAName (init (tail s)) }
\/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
[\'\`] $ident+ [\'\`] { ident }
\\ . { strtoken (TokString . tail) }
"&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
"&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-- allow special characters through if they don't fit one of the previous
-- patterns.
[\/\'\`\<\#\&\\] { strtoken TokString }
[^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
[^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
}
<def> {
\] { token TokDefEnd `andBegin` string }
}
-- ']' doesn't have any special meaning outside of the [...] at the beginning
-- of a definition paragraph.
<string> {
\] { strtoken TokString }
}
{
data Token
= TokPara
| TokNumber
| TokBullet
| TokDefStart
| TokDefEnd
| TokSpecial Char
| TokIdent [RdrName]
| TokString String
| TokURL String
| TokPic String
| TokEmphasis String
| TokAName String
| TokBirdTrack String
-- deriving Show
-- -----------------------------------------------------------------------------
-- Alex support stuff
type StartCode = Int
type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token]
type AlexInput = (Char,String)
alexGetChar (_, []) = Nothing
alexGetChar (_, c:cs) = Just (c, (c,cs))
alexInputPrevChar (c,_) = c
tokenise :: String -> [Token]
tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks
where go inp@(_,str) sc =
case alexScan inp sc of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp' _ -> go inp' sc
AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc)
-- NB. we add a final \n to the string, (see comment in the beginning of line
-- production above).
eofHack str = str++"\n"
andBegin :: Action -> StartCode -> Action
andBegin act new_sc = \str _ cont -> act str new_sc cont
token :: Token -> Action
token t = \_ sc cont -> t : cont sc
strtoken, strtokenNL :: (String -> Token) -> Action
strtoken t = \str sc cont -> t str : cont sc
strtokenNL t = \str sc cont -> t (filter (/= '\r') str) : cont sc
-- ^ We only want LF line endings in our internal doc string format, so we
-- filter out all CRs.
begin :: StartCode -> Action
begin sc = \_ _ cont -> cont sc
-- -----------------------------------------------------------------------------
-- Lex a string as a Haskell identifier
ident :: Action
ident str sc cont =
case strToHsQNames id of
Just names -> TokIdent names : cont sc
Nothing -> TokString str : cont sc
where id = init (tail str)
strToHsQNames :: String -> Maybe [RdrName]
strToHsQNames str0 =
let buffer = unsafePerformIO (stringToStringBuffer str0)
pstate = mkPState buffer noSrcLoc defaultDynFlags
result = unP parseIdentifier pstate
in case result of
POk _ name -> Just [unLoc name]
_ -> Nothing
}
{
{-# OPTIONS -Wwarn -w #-}
-- The above warning supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
module HaddockParse (
parseHaddockParagraphs,
parseHaddockString,
EitherString(..)
) where
import {-# SOURCE #-} HaddockLex
import HsSyn
import RdrName
}
%expect 0
%tokentype { Token }
%token '/' { TokSpecial '/' }
'@' { TokSpecial '@' }
'[' { TokDefStart }
']' { TokDefEnd }
DQUO { TokSpecial '\"' }
URL { TokURL $$ }
PIC { TokPic $$ }
ANAME { TokAName $$ }
'/../' { TokEmphasis $$ }
'-' { TokBullet }
'(n)' { TokNumber }
'>..' { TokBirdTrack $$ }
IDENT { TokIdent $$ }
PARA { TokPara }
STRING { TokString $$ }
%monad { EitherString }
%name parseHaddockParagraphs doc
%name parseHaddockString seq
%%
doc :: { HsDoc RdrName }
: apara PARA doc { docAppend $1 $3 }
| PARA doc { $2 }
| apara { $1 }
| {- empty -} { DocEmpty }
apara :: { HsDoc RdrName }
: ulpara { DocUnorderedList [$1] }
| olpara { DocOrderedList [$1] }
| defpara { DocDefList [$1] }
| para { $1 }
ulpara :: { HsDoc RdrName }
: '-' para { $2 }
olpara :: { HsDoc RdrName }
: '(n)' para { $2 }
defpara :: { (HsDoc RdrName, HsDoc RdrName) }
: '[' seq ']' seq { ($2, $4) }
para :: { HsDoc RdrName }
: seq { docParagraph $1 }
| codepara { DocCodeBlock $1 }
codepara :: { HsDoc RdrName }
: '>..' codepara { docAppend (DocString $1) $2 }
| '>..' { DocString $1 }
seq :: { HsDoc RdrName }
: elem seq { docAppend $1 $2 }
| elem { $1 }
elem :: { HsDoc RdrName }
: elem1 { $1 }
| '@' seq1 '@' { DocMonospaced $2 }
seq1 :: { HsDoc RdrName }
: PARA seq1 { docAppend (DocString "\n") $2 }
| elem1 seq1 { docAppend $1 $2 }
| elem1 { $1 }
elem1 :: { HsDoc RdrName }
: STRING { DocString $1 }
| '/../' { DocEmphasis (DocString $1) }
| URL { DocURL $1 }
| PIC { DocPic $1 }
| ANAME { DocAName $1 }
| IDENT { DocIdentifier $1 }
| DQUO strings DQUO { DocModule $2 }
strings :: { String }
: STRING { $1 }
| STRING strings { $1 ++ $2 }
{
happyError :: [Token] -> EitherString a
happyError toks = MyLeft ("parse error in doc string")
-- We don't want to make an instance for Either String,
-- since every user of the GHC API would get that instance
-- But why use non-Haskell98 instances when MyEither String
-- is the only MyEither we're intending to use anyway? --Isaac Dupree
--data MyEither a b = MyLeft a | MyRight b
data EitherString b = MyLeft String | MyRight b
instance Monad EitherString where
return = MyRight
MyLeft l >>= _ = MyLeft l
MyRight r >>= k = k r
fail msg = MyLeft msg
}
......@@ -2,168 +2,33 @@
module HaddockUtils where
import