From 968410ef256f49be0a3ba5de087f1ec776c3f395 Mon Sep 17 00:00:00 2001 From: Duncan Coutts <duncan@haskell.org> Date: Sun, 30 Dec 2007 01:25:30 +0000 Subject: [PATCH] Refactor unlit code to improve error and line pragma handling And to make it more like the original code that classified lines separately from checking and transforming them. --- Distribution/Simple/Hugs.hs | 10 +- Distribution/Simple/PreProcess.hs | 2 +- Distribution/Simple/PreProcess/Unlit.hs | 141 +++++++++++++++--------- 3 files changed, 94 insertions(+), 59 deletions(-) diff --git a/Distribution/Simple/Hugs.hs b/Distribution/Simple/Hugs.hs index a83fd411c5..5bfccb0151 100644 --- a/Distribution/Simple/Hugs.hs +++ b/Distribution/Simple/Hugs.hs @@ -249,7 +249,9 @@ build pkg_descr lbi verbosity = do readHaskellFile :: FilePath -> IO String readHaskellFile file = do text <- readFile file - return $ if ".lhs" `isSuffixOf` file then unlit file text else text + if ".lhs" `isSuffixOf` file + then either return die (unlit file text) + else return text -- ------------------------------------------------------------ -- * options in source files @@ -265,11 +267,13 @@ getOptionsFromSource ) getOptionsFromSource file = do text <- readFile file + text' <- if ".lhs" `isSuffixOf` file + then either return die (unlit file text) + else return text return $ foldr appendOptions ([],[],[]) $ map getOptions $ takeWhileJust $ map getPragma $ filter textLine $ map (dropWhile isSpace) $ lines $ - stripComments True $ - if ".lhs" `isSuffixOf` file then unlit file text else text + stripComments True text' where textLine [] = False textLine ('#':_) = False textLine _ = True diff --git a/Distribution/Simple/PreProcess.hs b/Distribution/Simple/PreProcess.hs index 425e2f3cc5..b0fbdb7d26 100644 --- a/Distribution/Simple/PreProcess.hs +++ b/Distribution/Simple/PreProcess.hs @@ -268,7 +268,7 @@ ppUnlit = platformIndependent = True, runPreProcessor = mkSimplePreProcessor $ \inFile outFile _verbosity -> do contents <- readFile inFile - writeFile outFile (unlit inFile contents) + either (writeFile outFile) die (unlit inFile contents) } ppCpp :: BuildInfo -> LocalBuildInfo -> PreProcessor diff --git a/Distribution/Simple/PreProcess/Unlit.hs b/Distribution/Simple/PreProcess/Unlit.hs index bf56adb046..c947e839de 100644 --- a/Distribution/Simple/PreProcess/Unlit.hs +++ b/Distribution/Simple/PreProcess/Unlit.hs @@ -10,26 +10,70 @@ -- Remove the \"literal\" markups from a Haskell source file, including -- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\" +-- This version is interesting because instead of striping comment lines, it +-- turns them into "-- " style comments. This allows using haddock markup +-- in literate scripts without having to use "> --" prefix. + module Distribution.Simple.PreProcess.Unlit (unlit,plain) where import Data.Char import Data.List +data Classified = BirdTrack String | Blank String | Ordinary String + | Line !Int String | CPP String + | BeginCode | EndCode + -- output only: + | Error String | Comment String + -- | No unliteration. plain :: String -> String -> String plain _ hs = hs +classify :: String -> Classified +classify ('>':s) = BirdTrack s +classify ('#':s) = case tokens s of + (line:file:_) | all isDigit line + && length file >= 2 + && head file == '"' + && last file == '"' + -> Line (read line) (tail (init file)) + _ -> CPP s + where tokens = unfoldr $ \str -> case lex str of + (t@(_:_), str'):_ -> Just (t, str') + _ -> Nothing +classify ('\\':s) + | s `isPrefixOf` "begin{code}" = BeginCode + | s `isPrefixOf` "end{code}" = EndCode +classify s | all isSpace s = Blank s +classify s = Ordinary s + +unclassify :: Classified -> String +unclassify (BirdTrack s) = ' ':s +unclassify (Blank s) = s +unclassify (Ordinary s) = s +unclassify (Line n file) = "# " ++ show n ++ " " ++ show file +unclassify (CPP s) = '#':s +unclassify (Comment s) = "-- " ++ s + -- | 'unlit' takes a filename (for error reports), and transforms the -- given string, to eliminate the literate comments from the program text. -unlit :: FilePath -> String -> String -unlit file lhs = (unlines . classify file) (inlines lhs) +unlit :: FilePath -> String -> Either String String +unlit file = either (Left . unlines + . map unclassify) + Right + . checkErrors + . reclassify + . map classify + . inlines - -isBirdTrack = isPrefixOf ">" -isCpp = isPrefixOf "#" -isCodeStart = isPrefixOf "\\begin{code}" -isCodeEnd = isPrefixOf "\\end{code}" -isEmptyLine = all isSpace + where checkErrors ls = case [ e | Error e <- ls ] of + [] -> Left ls + (message:_) -> Right (f ++ ":" ++ show n ++ ": " ++ message) + where (f, n) = errorPos file 1 ls + errorPos f n [] = (f, n) + errorPos f n (Error _:_) = (f, n) + errorPos _ _ (Line n' f':ls) = errorPos f' n' ls + errorPos f n (_ :ls) = errorPos f (n+1) ls -- Here we model a state machine, with each state represented by -- a local function. We only have four states (well, five, @@ -40,57 +84,44 @@ isEmptyLine = all isSpace -- Each state represents the type of line that was last read -- i.e. are we in a comment section, or a latex-code section, -- or a bird-code section, etc? -classify :: String -> [String] -> [String] -classify file [] = [] -classify file lines = blank lines 1 -- begin in blank state +reclassify :: [Classified] -> [Classified] +reclassify = blank -- begin in blank state where - err n msg = error ("In file "++file++" at line "++show n++": "++msg++".") - dropBird _ ('>':' ':x) = x - dropBird _ ('>':x) = x - dropBird n _ = err n "expecting '>' at start of line" - mkComment x = "-- " ++ x - - transition classification [] _ _ = [classification] - -- First case guarantee that xs is never null, - -- so state functions can assume that too. - transition classification xs n state = classification : state xs (n+1) - - latex (x:xs) n - | isCodeEnd x = transition "" xs n comment - | isCodeStart x = err n "\\begin{code} in code section" - | otherwise = transition x xs n latex - - blank (x:xs) n - | isCodeEnd x = err n "\\end{code} without \\begin{code}" - | isCodeStart x = transition "" xs n latex - | isCpp x = transition x xs n blank - | isBirdTrack x = transition (dropBird n x) xs n bird - | isEmptyLine x = transition "" xs n blank - | otherwise = transition (mkComment x) xs n comment + latex [] = [] + latex (EndCode :ls) = Blank "" : comment ls + latex (BeginCode :_ ) = [Error "\\begin{code} in code section"] + latex (BirdTrack l:ls) = Ordinary ('>':l) : bird ls + latex ( l:ls) = l : latex ls - bird (x:xs) n - | isCodeEnd x = err n "\\end{code} without \\begin{code}" - | isCodeStart x = transition "" xs n latex - | isCpp x = transition x xs n bird - | isBirdTrack x = transition (dropBird n x) xs n bird - | isEmptyLine x = transition "" xs n blank - | otherwise = err n "program line before comment line" + blank [] = [] + blank (EndCode :_ ) = [Error "\\end{code} without \\begin{code}"] + blank (BeginCode :ls) = Blank "" : latex ls + blank (BirdTrack l:ls) = BirdTrack l : bird ls + blank (Ordinary l:ls) = Comment l : comment ls + blank ( l:ls) = l : blank ls - comment (x:xs) n - | isCodeEnd x = err n "\\end{code} without \\begin{code}" - | isCodeStart x = transition "" xs n latex - | isCpp x = transition x xs n comment - | isBirdTrack x = err n "comment line before program line" - -- special case here: a truly empty line will terminate - -- a comment section (and send us into the "blank" state) - -- but a line containing whitespace will be treated as a - -- comment (prefixed with "-- "), unless it is followed by - -- a program line, in which case it is really blank. - | null x = transition "" xs n blank - | isEmptyLine x && null xs = transition (mkComment x) xs n comment - | isEmptyLine x && isBirdTrack (head xs) = transition "" xs n blank - | otherwise = transition (mkComment x) xs n comment + bird [] = [] + bird (EndCode :_ ) = [Error "\\end{code} without \\begin{code}"] + bird (BeginCode :ls) = Blank "" : latex ls + bird (Blank l :ls) = Blank l : blank ls + bird (Ordinary _:_ ) = [Error "program line before comment line"] + bird ( l:ls) = l : bird ls + comment [] = [] + comment (EndCode :_ ) = [Error "\\end{code} without \\begin{code}"] + comment (BeginCode :ls) = Blank "" : latex ls + comment (CPP l :ls) = CPP l : comment ls + comment (BirdTrack _:_ ) = [Error "comment line before program line"] + -- special case here: a truly empty line will terminate + -- a comment section (and send us into the "blank" state) + comment (Blank "" :ls) = Blank "" : blank ls + -- but a line containing whitespace will be treated as a + -- comment (prefixed with "-- "), unless it is followed by + -- a program line, in which case it is really blank. + comment (Blank l:ls@(BirdTrack _:_)) = Blank l : blank ls + comment (Blank l:ls) = Comment l : comment ls + comment (Line n f :ls) = Line n f : comment ls + comment (Ordinary l:ls) = Comment l : comment ls -- Re-implementation of 'lines', for better efficiency (but decreased laziness). -- Also, importantly, accepts non-standard DOS and Mac line ending characters. -- GitLab