Commit c197fe60 authored by Simon Marlow's avatar Simon Marlow

FIX #3079, dodgy parsing of LANGUAGE pragmas

I ended up rewriting this horrible bit of code, using (yikes) lazy I/O
to slurp in the source file a chunk at a time.  The old code tried to
read the file a chunk at a time, but failed with LANGUAGE pragmas
because the parser for LANGUAGE has state and the state wasn't being
saved between chunks.  We're still closing the Handle eagerly, so
there shouldn't be any problems here.
parent c5e9e310
...@@ -23,8 +23,7 @@ import FastString ...@@ -23,8 +23,7 @@ import FastString
import HsSyn ( ImportDecl(..), HsModule(..) ) import HsSyn ( ImportDecl(..), HsModule(..) )
import Module ( ModuleName, moduleName ) import Module ( ModuleName, moduleName )
import PrelNames ( gHC_PRIM, mAIN_NAME ) import PrelNames ( gHC_PRIM, mAIN_NAME )
import StringBuffer ( StringBuffer(..), hGetStringBufferBlock import StringBuffer
, appendStringBuffers )
import SrcLoc import SrcLoc
import DynFlags import DynFlags
import ErrUtils import ErrUtils
...@@ -38,6 +37,7 @@ import MonadUtils ( MonadIO ) ...@@ -38,6 +37,7 @@ import MonadUtils ( MonadIO )
import Exception import Exception
import Control.Monad import Control.Monad
import System.IO import System.IO
import System.IO.Unsafe
import Data.List import Data.List
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
...@@ -93,21 +93,57 @@ getOptionsFromFile dflags filename ...@@ -93,21 +93,57 @@ getOptionsFromFile dflags filename
= Exception.bracket = Exception.bracket
(openBinaryFile filename ReadMode) (openBinaryFile filename ReadMode)
(hClose) (hClose)
(\handle -> (\handle -> do
do buf <- hGetStringBufferBlock handle blockSize opts <- fmap getOptions' $ lazyGetToks dflags filename handle
loop handle buf) seqList opts $ return opts)
where blockSize = 1024
loop handle buf blockSize :: Int
| len buf == 0 = return [] -- blockSize = 17 -- for testing :-)
| otherwise blockSize = 1024
= case getOptions' dflags buf filename of
(Nothing, opts) -> return opts lazyGetToks :: DynFlags -> FilePath -> Handle -> IO [Located Token]
(Just buf', opts) -> do nextBlock <- hGetStringBufferBlock handle blockSize lazyGetToks dflags filename handle = do
newBuf <- appendStringBuffers buf' nextBlock buf <- hGetStringBufferBlock handle blockSize
if len newBuf == len buf unsafeInterleaveIO $ lazyLexBuf handle (pragState dflags buf loc) False
then return opts where
else do opts' <- loop handle newBuf loc = mkSrcLoc (mkFastString filename) 1 0
return (opts++opts')
lazyLexBuf :: Handle -> PState -> Bool -> IO [Located Token]
lazyLexBuf handle state eof = do
case unP (lexer return) state of
POk state' t -> do
-- pprTrace "lazyLexBuf" (text (show (buffer state'))) (return ())
if atEnd (buffer state') && not eof
-- if this token reached the end of the buffer, and we haven't
-- necessarily read up to the end of the file, then the token might
-- be truncated, so read some more of the file and lex it again.
then getMore handle state
else case t of
L _ ITeof -> return [t]
_other -> do rest <- lazyLexBuf handle state' eof
return (t : rest)
_ | not eof -> getMore handle state
| otherwise -> return []
getMore :: Handle -> PState -> IO [Located Token]
getMore handle state = do
-- pprTrace "getMore" (text (show (buffer state))) (return ())
nextbuf <- hGetStringBufferBlock handle blockSize
if (len nextbuf == 0) then lazyLexBuf handle state True else do
newbuf <- appendStringBuffers (buffer state) nextbuf
unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False
getToks :: DynFlags -> FilePath -> StringBuffer -> [Located Token]
getToks dflags filename buf = lexAll (pragState dflags buf loc)
where
loc = mkSrcLoc (mkFastString filename) 1 0
lexAll state = case unP (lexer return) state of
POk _ t@(L _ ITeof) -> [t]
POk state' t -> t : lexAll state'
_ -> [L (last_loc state) ITeof]
-- | Parse OPTIONS and LANGUAGE pragmas of the source file. -- | Parse OPTIONS and LANGUAGE pragmas of the source file.
-- --
...@@ -117,76 +153,54 @@ getOptions :: DynFlags ...@@ -117,76 +153,54 @@ getOptions :: DynFlags
-> FilePath -- ^ Source filename. Used for location info. -> FilePath -- ^ Source filename. Used for location info.
-> [Located String] -- ^ Parsed options. -> [Located String] -- ^ Parsed options.
getOptions dflags buf filename getOptions dflags buf filename
= case getOptions' dflags buf filename of = getOptions' (getToks dflags filename buf)
(_,opts) -> opts
-- The token parser is written manually because Happy can't -- The token parser is written manually because Happy can't
-- return a partial result when it encounters a lexer error. -- return a partial result when it encounters a lexer error.
-- We want to extract options before the buffer is passed through -- We want to extract options before the buffer is passed through
-- CPP, so we can't use the same trick as 'getImports'. -- CPP, so we can't use the same trick as 'getImports'.
getOptions' :: DynFlags getOptions' :: [Located Token] -- Input buffer
-> StringBuffer -- Input buffer -> [Located String] -- Options.
-> FilePath -- Source file. Used for msgs only. getOptions' toks
-> ( Maybe StringBuffer -- Just => we can use more input = parseToks toks
, [Located String] -- Options. where
) getToken (L _loc tok) = tok
getOptions' dflags buf filename getLoc (L loc _tok) = loc
= parseToks (lexAll (pragState dflags buf loc))
where loc = mkSrcLoc (mkFastString filename) 1 0
getToken (_buf,L _loc tok) = tok
getLoc (_buf,L loc _tok) = loc
getBuf (buf,_tok) = buf
combine opts (flag, opts') = (flag, opts++opts')
add opt (flag, opts) = (flag, opt:opts)
parseToks (open:close:xs) parseToks (open:close:xs)
| IToptions_prag str <- getToken open | IToptions_prag str <- getToken open
, ITclose_prag <- getToken close , ITclose_prag <- getToken close
= map (L (getLoc open)) (words str) `combine` = map (L (getLoc open)) (words str) ++
parseToks xs parseToks xs
parseToks (open:close:xs) parseToks (open:close:xs)
| ITinclude_prag str <- getToken open | ITinclude_prag str <- getToken open
, ITclose_prag <- getToken close , ITclose_prag <- getToken close
= map (L (getLoc open)) ["-#include",removeSpaces str] `combine` = map (L (getLoc open)) ["-#include",removeSpaces str] ++
parseToks xs parseToks xs
parseToks (open:close:xs) parseToks (open:close:xs)
| ITdocOptions str <- getToken open | ITdocOptions str <- getToken open
, ITclose_prag <- getToken close , ITclose_prag <- getToken close
= map (L (getLoc open)) ["-haddock-opts", removeSpaces str] = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
`combine` parseToks xs ++ parseToks xs
parseToks (open:xs) parseToks (open:xs)
| ITdocOptionsOld str <- getToken open | ITdocOptionsOld str <- getToken open
= map (L (getLoc open)) ["-haddock-opts", removeSpaces str] = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
`combine` parseToks xs ++ parseToks xs
parseToks (open:xs) parseToks (open:xs)
| ITlanguage_prag <- getToken open | ITlanguage_prag <- getToken open
= parseLanguage xs = parseLanguage xs
-- The last token before EOF could have been truncated. parseToks _ = []
-- We ignore it to be on the safe side. parseLanguage (L loc (ITconid fs):rest)
parseToks [tok,eof] = checkExtension (L loc fs) :
| ITeof <- getToken eof
= (Just (getBuf tok),[])
parseToks (eof:_)
| ITeof <- getToken eof
= (Just (getBuf eof),[])
parseToks _ = (Nothing,[])
parseLanguage ((_buf,L loc (ITconid fs)):rest)
= checkExtension (L loc fs) `add`
case rest of case rest of
(_,L _loc ITcomma):more -> parseLanguage more (L _loc ITcomma):more -> parseLanguage more
(_,L _loc ITclose_prag):more -> parseToks more (L _loc ITclose_prag):more -> parseToks more
(_,L loc _):_ -> languagePragParseError loc (L loc _):_ -> languagePragParseError loc
[] -> panic "getOptions'.parseLanguage(1) went past eof token" [] -> panic "getOptions'.parseLanguage(1) went past eof token"
parseLanguage (tok:_) parseLanguage (tok:_)
= languagePragParseError (getLoc tok) = languagePragParseError (getLoc tok)
parseLanguage [] parseLanguage []
= panic "getOptions'.parseLanguage(2) went past eof token" = panic "getOptions'.parseLanguage(2) went past eof token"
lexToken t = return t
lexAll state = case unP (lexer lexToken) state of
POk _ t@(L _ ITeof) -> [(buffer state,t)]
POk state' t -> (buffer state,t):lexAll state'
_ -> [(buffer state,L (last_loc state) ITeof)]
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment