Commit 19e80b9a authored by Vladislav Zavialov's avatar Vladislav Zavialov Committed by Ben Gamari

Accumulate Haddock comments in P (#17544, #17561, #8944)

Haddock comments are, first and foremost, comments. It's very annoying
to incorporate them into the grammar. We can take advantage of an
important property: adding a Haddock comment does not change the parse
tree in any way other than wrapping some nodes in HsDocTy and the like
(and if it does, that's a bug).

This patch implements the following:

* Accumulate Haddock comments with their locations in the P monad.
  This is handled in the lexer.

* After parsing, do a pass over the AST to associate Haddock comments
  with AST nodes using location info.

* Report the leftover comments to the user as a warning (-Winvalid-haddock).
parent 58235d46
Pipeline #22527 failed with stages
in 881 minutes and 34 seconds
......@@ -695,6 +695,7 @@ summariseRequirement pn mod_name = do
ms_textual_imps = extra_sig_imports,
ms_parsed_mod = Just (HsParsedModule {
hpm_module = L loc (HsModule {
hsmodLayout = NoLayoutInfo,
hsmodName = Just (L loc mod_name),
hsmodExports = Nothing,
hsmodImports = [],
......
......@@ -496,6 +496,7 @@ data WarningFlag =
| Opt_WarnMissingSafeHaskellMode -- Since 8.10
| Opt_WarnCompatUnqualifiedImports -- Since 8.10
| Opt_WarnDerivingDefaults
| Opt_WarnInvalidHaddock -- Since 8.12
deriving (Eq, Show, Enum)
-- | Used when outputting warnings: if a reason is given, it is
......
......@@ -3450,7 +3450,8 @@ wWarningFlagsDeps = [
flagSpec "prepositive-qualified-module"
Opt_WarnPrepositiveQualifiedModule,
flagSpec "unused-packages" Opt_WarnUnusedPackages,
flagSpec "compat-unqualified-imports" Opt_WarnCompatUnqualifiedImports
flagSpec "compat-unqualified-imports" Opt_WarnCompatUnqualifiedImports,
flagSpec "invalid-haddock" Opt_WarnInvalidHaddock
]
-- | These @-\<blah\>@ flags can all be reversed with @-no-\<blah\>@
......
......@@ -63,6 +63,9 @@ import Data.Data hiding ( Fixity )
-- All we actually declare here is the top-level structure for a module.
data HsModule
= HsModule {
hsmodLayout :: LayoutInfo,
-- ^ Layout info for the module.
-- For incomplete modules (e.g. the output of parseHeader), it is NoLayoutInfo.
hsmodName :: Maybe (Located ModuleName),
-- ^ @Nothing@: \"module X where\" is omitted (in which case the next
-- field is Nothing too)
......@@ -116,11 +119,11 @@ deriving instance Data HsModule
instance Outputable HsModule where
ppr (HsModule Nothing _ imports decls _ mbDoc)
ppr (HsModule _ Nothing _ imports decls _ mbDoc)
= pp_mb mbDoc $$ pp_nonnull imports
$$ pp_nonnull decls
ppr (HsModule (Just name) exports imports decls deprec mbDoc)
ppr (HsModule _ (Just name) exports imports decls deprec mbDoc)
= vcat [
pp_mb mbDoc,
case exports of
......
......@@ -92,6 +92,7 @@ module GHC.Hs.Decls (
HsGroup(..), emptyRdrGroup, emptyRnGroup, appendGroups, hsGroupInstDecls,
hsGroupTopLevelFixitySigs,
partitionBindsAndSigs,
) where
-- friends:
......@@ -219,6 +220,38 @@ Template Haskell `Dec`. If there are any duplicate signatures between the two
fields, this will result in an error (#17608).
-}
-- | Partition a list of HsDecls into function/pattern bindings, signatures,
-- type family declarations, type family instances, and documentation comments.
--
-- Panics when given a declaration that cannot be put into any of the output
-- groups.
--
-- The primary use of this function is to implement
-- 'GHC.Parser.PostProcess.cvBindsAndSigs'.
partitionBindsAndSigs
:: [LHsDecl GhcPs]
-> (LHsBinds GhcPs, [LSig GhcPs], [LFamilyDecl GhcPs],
[LTyFamInstDecl GhcPs], [LDataFamInstDecl GhcPs], [LDocDecl])
partitionBindsAndSigs = go
where
go [] = (emptyBag, [], [], [], [], [])
go ((L l decl) : ds) =
let (bs, ss, ts, tfis, dfis, docs) = go ds in
case decl of
ValD _ b
-> (L l b `consBag` bs, ss, ts, tfis, dfis, docs)
SigD _ s
-> (bs, L l s : ss, ts, tfis, dfis, docs)
TyClD _ (FamDecl _ t)
-> (bs, ss, L l t : ts, tfis, dfis, docs)
InstD _ (TyFamInstD { tfid_inst = tfi })
-> (bs, ss, ts, L l tfi : tfis, dfis, docs)
InstD _ (DataFamInstD { dfid_inst = dfi })
-> (bs, ss, ts, tfis, L l dfi : dfis, docs)
DocD _ d
-> (bs, ss, ts, tfis, dfis, L l d : docs)
_ -> pprPanic "partitionBindsAndSigs" (ppr decl)
-- | Haskell Group
--
-- A 'HsDecl' is categorised into a 'HsGroup' before being
......@@ -643,10 +676,29 @@ type instance XDataDecl GhcPs = NoExtField
type instance XDataDecl GhcRn = DataDeclRn
type instance XDataDecl GhcTc = DataDeclRn
type instance XClassDecl GhcPs = NoExtField
type instance XClassDecl GhcPs = LayoutInfo -- See Note [Class LayoutInfo]
type instance XClassDecl GhcRn = NameSet -- FVs
type instance XClassDecl GhcTc = NameSet -- FVs
{- Note [Class LayoutInfo]
~~~~~~~~~~~~~~~~~~~~~~~~~~
The LayoutInfo is used to associate Haddock comments with parts of the declaration.
Compare the following examples:
class C a where
f :: a -> Int
-- ^ comment on f
class C a where
f :: a -> Int
-- ^ comment on C
Notice how "comment on f" and "comment on C" differ only by indentation level.
Thus we have to record the indentation level of the class declarations.
See also Note [Adding Haddock comments to the syntax tree] in GHC.Parser.PostProcess.Haddock
-}
type instance XXTyClDecl (GhcPass _) = NoExtCon
-- Simple classifiers for TyClDecl
......
......@@ -7,6 +7,7 @@ module GHC.Hs.Doc
, LHsDocString
, mkHsDocString
, mkHsDocStringUtf8ByteString
, isEmptyDocString
, unpackHDS
, hsDocStringToByteString
, ppr_mbDoc
......@@ -64,6 +65,9 @@ instance Binary HsDocString where
instance Outputable HsDocString where
ppr = doubleQuotes . text . unpackHDS
isEmptyDocString :: HsDocString -> Bool
isEmptyDocString (HsDocString bs) = BS.null bs
mkHsDocString :: String -> HsDocString
mkHsDocString s =
inlinePerformIO $ do
......
......@@ -22,7 +22,7 @@ import Data.Char
-- | Source Statistics
ppSourceStats :: Bool -> Located HsModule -> SDoc
ppSourceStats short (L _ (HsModule _ exports imports ldecls _ _))
ppSourceStats short (L _ (HsModule{ hsmodExports = exports, hsmodImports = imports, hsmodDecls = ldecls }))
= (if short then hcat else vcat)
(map pp_val
[("ExportAll ", export_all), -- 1 if no export list
......
This diff is collapsed.
......@@ -66,7 +66,8 @@ module GHC.Parser.Lexer (
lexTokenStream,
AddAnn(..),mkParensApiAnn,
addAnnsAt,
commentToAnnotation
commentToAnnotation,
HdkComment(..),
) where
import GHC.Prelude
......@@ -97,6 +98,8 @@ import GHC.Utils.Outputable
import GHC.Data.StringBuffer
import GHC.Data.FastString
import GHC.Types.Unique.FM
import GHC.Data.Maybe
import GHC.Data.OrdList
import GHC.Utils.Misc ( readRational, readHexRational )
-- compiler/main
......@@ -109,6 +112,7 @@ import GHC.Unit
import GHC.Types.Basic ( InlineSpec(..), RuleMatchInfo(..),
IntegralLit(..), FractionalLit(..),
SourceText(..) )
import GHC.Hs.Doc
-- compiler/parser
import GHC.Parser.CharClass
......@@ -363,10 +367,8 @@ $tab { warnTab }
-- Haddock comments
<0,option_prags> {
"-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment }
"{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment }
}
"-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment }
"{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment }
-- "special" symbols
......@@ -1271,11 +1273,8 @@ nested_comment cont span buf len = do
go (reverse $ lexemeToString buf len) (1::Int) input
where
go commentAcc 0 input = do
setInput input
b <- getBit RawTokenStreamBit
if b
then docCommentEnd input commentAcc ITblockComment buf span
else cont
let finalizeComment str = (Nothing, ITblockComment str)
commentEnd cont input commentAcc finalizeComment buf span
go commentAcc n input = case alexGetChar' input of
Nothing -> errBrace input (psRealSpan span)
Just ('-',input) -> case alexGetChar' input of
......@@ -1365,24 +1364,37 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token.
See #314 for more background on the bug this fixes.
-}
withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (PsLocated Token))
withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
-> P (PsLocated Token)
withLexedDocType lexDocComment = do
input@(AI _ buf) <- getInput
case prevChar buf ' ' of
-- The `Bool` argument to lexDocComment signals whether or not the next
-- line of input might also belong to this doc comment.
'|' -> lexDocComment input ITdocCommentNext True
'^' -> lexDocComment input ITdocCommentPrev True
'$' -> lexDocComment input ITdocCommentNamed True
'|' -> lexDocComment input mkHdkCommentNext True
'^' -> lexDocComment input mkHdkCommentPrev True
'$' -> lexDocComment input mkHdkCommentNamed True
'*' -> lexDocSection 1 input
_ -> panic "withLexedDocType: Bad doc type"
where
lexDocSection n input = case alexGetChar' input of
Just ('*', input) -> lexDocSection (n+1) input
Just (_, _) -> lexDocComment input (ITdocSection n) False
Just (_, _) -> lexDocComment input (mkHdkCommentSection n) False
Nothing -> do setInput input; lexToken -- eof reached, lex it normally
mkHdkCommentNext, mkHdkCommentPrev :: String -> (HdkComment, Token)
mkHdkCommentNext str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str)
mkHdkCommentPrev str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str)
mkHdkCommentNamed :: String -> (HdkComment, Token)
mkHdkCommentNamed str =
let (name, rest) = break isSpace str
in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str)
mkHdkCommentSection :: Int -> String -> (HdkComment, Token)
mkHdkCommentSection n str =
(HdkCommentSection n (mkHsDocString str), ITdocSection n str)
-- RULES pragmas turn on the forall and '.' keywords, and we turn them
-- off again at the end of the pragma.
rulePrag :: Action
......@@ -1425,17 +1437,34 @@ endPrag span _buf _len = do
-- it writes the wrong token length to the parser state. This function is
-- called afterwards, so it can just update the state.
docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
PsSpan -> P (PsLocated Token)
docCommentEnd input commentAcc docType buf span = do
commentEnd :: P (PsLocated Token)
-> AlexInput
-> String
-> (String -> (Maybe HdkComment, Token))
-> StringBuffer
-> PsSpan
-> P (PsLocated Token)
commentEnd cont input commentAcc finalizeComment buf span = do
setInput input
let (AI loc nextBuf) = input
comment = reverse commentAcc
span' = mkPsSpan (psSpanStart span) loc
last_len = byteDiff buf nextBuf
span `seq` setLastToken span' last_len
return (L span' (docType comment))
let (m_hdk_comment, hdk_token) = finalizeComment comment
whenIsJust m_hdk_comment $ \hdk_comment ->
P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) ()
b <- getBit RawTokenStreamBit
if b then return (L span' hdk_token)
else cont
docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer ->
PsSpan -> P (PsLocated Token)
docCommentEnd input commentAcc docType buf span = do
let finalizeComment str =
let (hdk_comment, token) = docType str
in (Just hdk_comment, token)
commentEnd lexToken input commentAcc finalizeComment buf span
errBrace :: AlexInput -> RealSrcSpan -> P a
errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) (psRealLoc end) "unterminated `{-'"
......@@ -2170,6 +2199,15 @@ data ParserFlags = ParserFlags {
, pExtsBitmap :: !ExtsBitmap -- ^ bitmap of permitted extensions
}
-- | Haddock comment as produced by the lexer. These are accumulated in
-- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock".
data HdkComment
= HdkCommentNext HsDocString
| HdkCommentPrev HsDocString
| HdkCommentNamed String HsDocString
| HdkCommentSection Int HsDocString
deriving Show
data PState = PState {
buffer :: StringBuffer,
options :: ParserFlags,
......@@ -2211,7 +2249,13 @@ data PState = PState {
annotations :: [(ApiAnnKey,[RealSrcSpan])],
eof_pos :: Maybe RealSrcSpan,
comment_q :: [RealLocated AnnotationComment],
annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])]
annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])],
-- Haddock comments accumulated in ascending order of their location
-- (BufPos). We use OrdList to get O(1) snoc.
--
-- See Note [Adding Haddock comments to the syntax tree] in GHC.Parser.PostProcess.Haddock
hdk_comments :: OrdList (PsLocated HdkComment)
}
-- last_loc and last_len are used when generating error messages,
-- and in pushCurrentContext only. Sigh, if only Happy passed the
......@@ -2698,7 +2742,8 @@ mkPStatePure options buf loc =
annotations = [],
eof_pos = Nothing,
comment_q = [],
annotations_comments = []
annotations_comments = [],
hdk_comments = nilOL
}
where init_loc = PsLoc loc (BufPos 0)
......@@ -2917,10 +2962,6 @@ lexer queueComments cont = do
(L span tok) <- lexTokenFun
--trace ("token: " ++ show tok) $ do
if (queueComments && isDocComment tok)
then queueComment (L (psRealSpan span) tok)
else return ()
if (queueComments && isComment tok)
then queueComment (L (psRealSpan span) tok) >> lexer queueComments cont
else cont (L (mkSrcSpanPs span) tok)
......@@ -3372,13 +3413,10 @@ commentToAnnotation _ = panic "commentToAnnotation"
isComment :: Token -> Bool
isComment (ITlineComment _) = True
isComment (ITblockComment _) = True
isComment (ITdocCommentNext _) = True
isComment (ITdocCommentPrev _) = True
isComment (ITdocCommentNamed _) = True
isComment (ITdocSection _ _) = True
isComment (ITdocOptions _) = True
isComment _ = False
isDocComment :: Token -> Bool
isDocComment (ITdocCommentNext _) = True
isDocComment (ITdocCommentPrev _) = True
isDocComment (ITdocCommentNamed _) = True
isDocComment (ITdocSection _ _) = True
isDocComment (ITdocOptions _) = True
isDocComment _ = False
}
This diff is collapsed.
This diff is collapsed.
......@@ -206,7 +206,7 @@ tcRnModuleTcRnM :: HscEnv
tcRnModuleTcRnM hsc_env mod_sum
(HsParsedModule {
hpm_module =
(L loc (HsModule maybe_mod export_ies
(L loc (HsModule _ maybe_mod export_ies
import_decls local_decls mod_deprec
maybe_doc_hdr)),
hpm_src_files = src_files
......
......@@ -270,7 +270,7 @@ cvtDec (ClassD ctxt cl tvs fds decs)
<+> text "are not allowed:")
$$ (Outputable.ppr adts'))
; returnJustL $ TyClD noExtField $
ClassDecl { tcdCExt = noExtField
ClassDecl { tcdCExt = NoLayoutInfo
, tcdCtxt = cxt', tcdLName = tc', tcdTyVars = tvs'
, tcdFixity = Prefix
, tcdFDs = fds', tcdSigs = Hs.mkClassOpSigs sigs'
......
......@@ -69,7 +69,9 @@ module GHC.Types.SrcLoc (
-- * StringBuffer locations
BufPos(..),
getBufPos,
BufSpan(..),
getBufSpan,
-- * Located
Located,
......@@ -88,10 +90,11 @@ module GHC.Types.SrcLoc (
mapLoc,
-- ** Combining and comparing Located values
eqLocated, cmpLocated, combineLocs, addCLoc,
eqLocated, cmpLocated, cmpBufSpan,
combineLocs, addCLoc,
leftmost_smallest, leftmost_largest, rightmost_smallest,
spans, isSubspanOf, isRealSubspanOf, sortLocated,
sortRealLocated,
spans, isSubspanOf, isRealSubspanOf,
sortLocated, sortRealLocated,
lookupSrcLoc, lookupSrcSpan,
liftL,
......@@ -106,6 +109,10 @@ module GHC.Types.SrcLoc (
psSpanEnd,
mkSrcSpanPs,
-- * Layout information
LayoutInfo(..),
leftmostColumn
) where
import GHC.Prelude
......@@ -122,6 +129,7 @@ import Data.Data
import Data.List (sortBy, intercalate)
import Data.Function (on)
import qualified Data.Map as Map
import qualified Data.Semigroup
{-
************************************************************************
......@@ -143,13 +151,77 @@ data RealSrcLoc
{-# UNPACK #-} !Int -- column number, begins at 1
deriving (Eq, Ord)
-- | 0-based index identifying the raw location in the StringBuffer.
-- | 0-based offset identifying the raw location in the 'StringBuffer'.
--
-- The lexer increments the 'BufPos' every time a character (UTF-8 code point)
-- is read from the input buffer. As UTF-8 is a variable-length encoding and
-- 'StringBuffer' needs a byte offset for indexing, a 'BufPos' cannot be used
-- for indexing.
--
-- The parser guarantees that 'BufPos' are monotonic. See #17632. This means
-- that syntactic constructs that appear later in the 'StringBuffer' are guaranteed to
-- have a higher 'BufPos'. Constrast that with 'RealSrcLoc', which does *not* make the
-- analogous guarantee about higher line/column numbers.
--
-- This is due to #line and {-# LINE ... #-} pragmas that can arbitrarily
-- modify 'RealSrcLoc'. Notice how 'setSrcLoc' and 'resetAlrLastLoc' in
-- "GHC.Parser.Lexer" update 'PsLoc', modifying 'RealSrcLoc' but preserving
-- 'BufPos'.
--
-- Monotonicity makes 'BufPos' useful to determine the order in which syntactic
-- elements appear in the source. Consider this example (haddockA041 in the test suite):
--
-- haddockA041.hs
-- {-# LANGUAGE CPP #-}
-- -- | Module header documentation
-- module Comments_and_CPP_include where
-- #include "IncludeMe.hs"
--
-- IncludeMe.hs:
-- -- | Comment on T
-- data T = MkT -- ^ Comment on MkT
--
-- After the C preprocessor runs, the 'StringBuffer' will contain a program that
-- looks like this (unimportant lines at the beginning removed):
--
-- # 1 "haddockA041.hs"
-- {-# LANGUAGE CPP #-}
-- -- | Module header documentation
-- module Comments_and_CPP_include where
-- # 1 "IncludeMe.hs" 1
-- -- | Comment on T
-- data T = MkT -- ^ Comment on MkT
-- # 7 "haddockA041.hs" 2
--
-- The line pragmas inserted by CPP make the error messages more informative.
-- The downside is that we can't use RealSrcLoc to determine the ordering of
-- syntactic elements.
--
-- With RealSrcLoc, we have the following location information recorded in the AST:
-- * The module name is located at haddockA041.hs:3:8-31
-- * The Haddock comment "Comment on T" is located at IncludeMe:1:1-17
-- * The data declaration is located at IncludeMe.hs:2:1-32
--
-- Unlike 'RealSrcLoc', it is not affected by #line and {-# LINE ... #-}
-- pragmas. In particular, notice how 'setSrcLoc' and 'resetAlrLastLoc' in
-- "GHC.Parser.Lexer" update 'PsLoc' preserving 'BufPos'.
-- Is the Haddock comment located between the module name and the data
-- declaration? This is impossible to tell because the locations are not
-- comparable; they even refer to different files.
--
-- The parser guarantees that 'BufPos' are monotonic. See #17632.
-- On the other hand, with 'BufPos', we have the following location information:
-- * The module name is located at 846-870
-- * The Haddock comment "Comment on T" is located at 898-915
-- * The data declaration is located at 916-928
--
-- Aside: if you're wondering why the numbers are so high, try running
-- @ghc -E haddockA041.hs@
-- and see the extra fluff that CPP inserts at the start of the file.
--
-- For error messages, 'BufPos' is not useful at all. On the other hand, this is
-- exactly what we need to determine the order of syntactic elements:
-- 870 < 898, therefore the Haddock comment appears *after* the module name.
-- 915 < 916, therefore the Haddock comment appears *before* the data declaration.
--
-- We use 'BufPos' in in GHC.Parser.PostProcess.Haddock to associate Haddock
-- comments with parts of the AST using location information (#17544).
newtype BufPos = BufPos { bufPos :: Int }
deriving (Eq, Ord, Show)
......@@ -173,6 +245,10 @@ mkSrcLoc x line col = RealSrcLoc (mkRealSrcLoc x line col) Nothing
mkRealSrcLoc :: FastString -> Int -> Int -> RealSrcLoc
mkRealSrcLoc x line col = SrcLoc x line col
getBufPos :: SrcLoc -> Maybe BufPos
getBufPos (RealSrcLoc _ mbpos) = mbpos
getBufPos (UnhelpfulLoc _) = Nothing
-- | Built-in "bad" 'SrcLoc' values for particular locations
noSrcLoc, generatedSrcLoc, interactiveSrcLoc :: SrcLoc
noSrcLoc = UnhelpfulLoc (fsLit "<no location info>")
......@@ -298,6 +374,10 @@ data BufSpan =
BufSpan { bufSpanStart, bufSpanEnd :: {-# UNPACK #-} !BufPos }
deriving (Eq, Ord, Show)
instance Semigroup BufSpan where
BufSpan start1 end1 <> BufSpan start2 end2 =
BufSpan (min start1 start2) (max end1 end2)
-- | Source Span
--
-- A 'SrcSpan' identifies either a specific portion of a text file
......@@ -352,6 +432,10 @@ instance ToJson RealSrcSpan where
instance NFData SrcSpan where
rnf x = x `seq` ()
getBufSpan :: SrcSpan -> Maybe BufSpan
getBufSpan (RealSrcSpan _ mbspan) = mbspan
getBufSpan (UnhelpfulSpan _) = Nothing
-- | Built-in "bad" 'SrcSpan's for common sources of location uncertainty
noSrcSpan, generatedSrcSpan, wiredInSrcSpan, interactiveSrcSpan :: SrcSpan
noSrcSpan = UnhelpfulSpan UnhelpfulNoLocationInfo
......@@ -674,6 +758,17 @@ eqLocated a b = unLoc a == unLoc b
cmpLocated :: Ord a => GenLocated l a -> GenLocated l a -> Ordering
cmpLocated a b = unLoc a `compare` unLoc b
-- | Compare the 'BufSpan' of two located things.
--
-- Precondition: both operands have an associated 'BufSpan'.
cmpBufSpan :: HasDebugCallStack => Located a -> Located a -> Ordering
cmpBufSpan (L l1 _) (L l2 _)
| Just a <- getBufSpan l1
, Just b <- getBufSpan l2
= compare a b
| otherwise = panic "cmpBufSpan: no BufSpan"
instance (Outputable l, Outputable e) => Outputable (GenLocated l e) where
ppr (L l e) = -- TODO: We can't do this since Located was refactored into
-- GenLocated:
......@@ -768,3 +863,33 @@ psSpanEnd (PsSpan r b) = PsLoc (realSrcSpanEnd r) (bufSpanEnd b)
mkSrcSpanPs :: PsSpan -> SrcSpan
mkSrcSpanPs (PsSpan r b) = RealSrcSpan r (Just b)
-- | Layout information for declarations.
data LayoutInfo =
-- | Explicit braces written by the user.
--
-- @
-- class C a where { foo :: a; bar :: a }
-- @
ExplicitBraces
|
-- | Virtual braces inserted by the layout algorithm.
--
-- @
-- class C a where
-- foo :: a
-- bar :: a
-- @
VirtualBraces
!Int -- ^ Layout column (indentation level, begins at 1)
|
-- | Empty or compiler-generated blocks do not have layout information
-- associated with them.
NoLayoutInfo
deriving (Eq, Ord, Show, Data)
-- | Indentation level is 1-indexed, so the leftmost column is 1.
leftmostColumn :: Int
leftmostColumn = 1
......@@ -5,6 +5,7 @@
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
......@@ -49,9 +50,13 @@ module GHC.Utils.Misc (
chunkList,
changeLast,
mapLastM,
whenNonEmpty,
mergeListsBy,
isSortedBy,
-- * Tuples
fstOf3, sndOf3, thdOf3,
firstM, first3M, secondM,
......@@ -601,10 +606,65 @@ changeLast [] _ = panic "changeLast"
changeLast [_] x = [x]
changeLast (x:xs) x' = x : changeLast xs x'
-- | Apply an effectful function to the last list element.
-- Assumes a non-empty list (panics otherwise).
mapLastM :: Functor f => (a -> f a) -> [a] -> f [a]
mapLastM _ [] = panic "mapLastM: empty list"
mapLastM f [x] = (\x' -> [x']) <$> f x
mapLastM f (x:xs) = (x:) <$> mapLastM f xs
whenNonEmpty :: Applicative m => [a] -> (NonEmpty a -> m ()) -> m ()
whenNonEmpty [] _ = pure ()
whenNonEmpty (x:xs) f = f (x :| xs)
-- | Merge an unsorted list of sorted lists, for example:
--
-- > mergeListsBy compare [ [2,5,15], [1,10,100] ] = [1,2,5,10,15,100]
--
-- \( O(n \log{} k) \)
mergeListsBy :: forall a. (a -> a -> Ordering) -> [[a]] -> [a]
mergeListsBy cmp lists | debugIsOn, not (all sorted lists) =
-- When debugging is on, we check that the input lists are sorted.
panic "mergeListsBy: input lists must be sorted"
where sorted = isSortedBy cmp
mergeListsBy cmp all_lists = merge_lists all_lists
where
-- Implements "Iterative 2-Way merge" described at
-- https://en.wikipedia.org/wiki/K-way_merge_algorithm
-- Merge two sorted lists into one in O(n).
merge2 :: [a] -> [a] -> [a]
merge2 [] ys = ys
merge2 xs [] = xs
merge2 (x:xs) (y:ys) =
case cmp x y of
GT -> y : merge2 (x:xs) ys
_ -> x : merge2 xs (y:ys)
-- Merge the first list with the second, the third with the fourth, and so
-- on. The output has half as much lists as the input.
merge_neighbours :: [[a]] -> [[a]]
merge_neighbours [] = []
merge_neighbours [xs] = [xs]
merge_neighbours (xs : ys : lists) =
merge2 xs ys : merge_neighbours lists
-- Since 'merge_neighbours' halves the amount of lists in each iteration,
-- we perform O(log k) iteration. Each iteration is O(n). The total running
-- time is therefore O(n log k).
merge_lists :: [[a]] -> [a]
merge_lists lists =
case merge_neighbours lists of
[] -> []
[xs] -> xs
lists' -> merge_lists lists'
isSortedBy :: (a -> a -> Ordering) -> [a] -> Bool
isSortedBy cmp = sorted
where
sorted [] = True
sorted [_] = True
sorted (x:y:xs) = cmp x y /= GT && sorted (y:xs)
{-
************************************************************************
* *
......
......@@ -290,6 +290,46 @@ Arrow notation
within 0.5 -< x
... |)
Haddock
~~~~~~~
- Parsing is now more robust to insufficiently indented Haddock comments::
class C a where
f :: a -> a
-- ^ This comment used to trigger a parse error
g :: a -> a
- :ghc-flag:`-Winvalid-haddock` is a new warning that reports discarded Haddock
comments that cannot be associated with AST elements::
myValue =
-- | Invalid (discarded) comment in an expression
2 + 2
- When faced with several comments for a data constructor or a data constructor
field, Haddock now picks the first one instead of the last one. The
extraneous comment is reported as invalid when :ghc-flag:`-Winvalid-haddock`
is enabled::
data T
-- | First comment
= MkT
-- ^ Second comment (rejected)
- Haddock is now more relaxed about the placement of comments in types relative