diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 114a523c2af906a235db7ede84891f6a7403809f..ec9bca88e98e4bb248ea50f4c5c26781835263ef 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -296,6 +296,7 @@ data DynFlag
    | Opt_KeepSFiles
    | Opt_KeepRawSFiles
    | Opt_KeepTmpFiles
+   | Opt_KeepRawTokenStream
 
    deriving (Eq, Show)
 
diff --git a/compiler/main/GHC.hs b/compiler/main/GHC.hs
index 472f587e56dd2e063710dc1ac1d81c190de44575..766ed011f29e4a2c99d706d8711989dc883fcedb 100644
--- a/compiler/main/GHC.hs
+++ b/compiler/main/GHC.hs
@@ -198,6 +198,11 @@ module GHC (
 	-- * Exceptions
 	GhcException(..), showGhcException,
 
+        -- * Token stream manipulations
+        Token,
+        getTokenStream, getRichTokenStream,
+        showRichTokenStream, addSourceToTokens,
+
 	-- * Miscellaneous
 	--sessionHscEnv,
 	cyclicModuleErr,
@@ -269,13 +274,14 @@ import Bag		( unitBag, listToBag, emptyBag, isEmptyBag )
 import ErrUtils
 import MonadUtils
 import Util
-import StringBuffer	( StringBuffer, hGetStringBuffer )
+import StringBuffer	( StringBuffer, hGetStringBuffer, nextChar )
 import Outputable
 import BasicTypes
 import Maybes		( expectJust, mapCatMaybes )
 import HaddockParse
 import HaddockLex       ( tokenise )
 import FastString
+import Lexer
 
 import Control.Concurrent
 import System.Directory ( getModificationTime, doesFileExist,
@@ -1717,7 +1723,7 @@ topSortModuleGraph
 --
 --   True:	eliminate the hi-boot nodes, and instead pretend
 --		the a source-import of Foo is an import of Foo
---		The resulting graph has no hi-boot nodes, but can by cyclic
+--		The resulting graph has no hi-boot nodes, but can be cyclic
 
 topSortModuleGraph drop_hs_boot_nodes summaries mb_root_mod
   = map (fmap summaryNodeSummary) $ stronglyConnCompG initial_graph
@@ -2440,12 +2446,85 @@ pprParenSymName a = parenSymOcc (getOccName a) (ppr (getName a))
 -- :browse will use either lm_toplev or inspect lm_interface, depending
 -- on whether the module is interpreted or not.
 
--- This is for reconstructing refactored source code
--- Calls the lexer repeatedly.
--- ToDo: add comment tokens to token stream
-getTokenStream :: Session -> Module -> IO [Located Token]
 #endif
 
+-- Extract the filename, stringbuffer content and dynflags associed to a module
+--
+-- XXX: Explain pre-conditions
+getModuleSourceAndFlags :: GhcMonad m => Module -> m (String, StringBuffer, DynFlags)
+getModuleSourceAndFlags mod = do
+  m <- getModSummary (moduleName mod)
+  case ml_hs_file $ ms_location m of
+    Nothing -> throw $ mkApiErr (text "No source available for module " <+> ppr mod)
+    Just sourceFile -> do
+        source <- liftIO $ hGetStringBuffer sourceFile
+        return (sourceFile, source, ms_hspp_opts m)
+
+
+-- | Return module source as token stream, including comments.
+--
+-- The module must be in the module graph and its source must be available.
+-- Throws a 'HscTypes.SourceError' on parse error.
+getTokenStream :: GhcMonad m => Module -> m [Located Token]
+getTokenStream mod = do
+  (sourceFile, source, flags) <- getModuleSourceAndFlags mod
+  let startLoc = mkSrcLoc (mkFastString sourceFile) 0 0
+  case lexTokenStream source startLoc flags of
+    POk _ ts  -> return ts
+    PFailed span err -> throw $ mkSrcErr (unitBag $ mkPlainErrMsg span err)
+
+-- | Give even more information on the source than 'getTokenStream'
+-- This function allows reconstructing the source completely with
+-- 'showRichTokenStream'.
+getRichTokenStream :: GhcMonad m => Module -> m [(Located Token, String)]
+getRichTokenStream mod = do
+  (sourceFile, source, flags) <- getModuleSourceAndFlags mod
+  let startLoc = mkSrcLoc (mkFastString sourceFile) 0 0
+  case lexTokenStream source startLoc flags of
+    POk _ ts -> return $ addSourceToTokens startLoc source ts
+    PFailed span err -> throw $ mkSrcErr (unitBag $ mkPlainErrMsg span err)
+
+-- | Given a source location and a StringBuffer corresponding to this
+-- location, return a rich token stream with the source associated to the
+-- tokens.
+addSourceToTokens :: SrcLoc -> StringBuffer -> [Located Token]
+                  -> [(Located Token, String)]
+addSourceToTokens _ _ [] = []
+addSourceToTokens loc buf (t@(L span _) : ts)
+    | not (isGoodSrcSpan span) = (t,"") : addSourceToTokens loc buf ts
+    | otherwise = (t,str) : addSourceToTokens newLoc newBuf ts
+    where
+      (newLoc, newBuf, str) = go "" loc buf
+      start = srcSpanStart span
+      end = srcSpanEnd span
+      go acc loc buf | loc < start = go acc nLoc nBuf
+                     | start <= loc && loc < end = go (ch:acc) nLoc nBuf
+                     | otherwise = (loc, buf, reverse acc)
+          where (ch, nBuf) = nextChar buf
+                nLoc = advanceSrcLoc loc ch
+
+
+-- | Take a rich token stream such as produced from 'getRichTokenStream' and
+-- return source code almost identical to the original code (except for
+-- insignificant whitespace.)
+showRichTokenStream :: [(Located Token, String)] -> String
+showRichTokenStream ts = go startLoc ts ""
+    where sourceFile = srcSpanFile (getLoc . fst . head $ ts)
+          startLoc = mkSrcLoc sourceFile 0 0
+          go _ [] = id
+          go loc ((L span _, str):ts)
+              | not (isGoodSrcSpan span) = go loc ts
+              | locLine == tokLine = ((replicate (tokCol - locCol) ' ') ++)
+                                     . (str ++)
+                                     . go tokEnd ts
+              | otherwise = ((replicate (tokLine - locLine) '\n') ++)
+                            . ((replicate tokCol ' ') ++)
+                            . (str ++)
+                            . go tokEnd ts
+              where (locLine, locCol) = (srcLocLine loc, srcLocCol loc)
+                    (tokLine, tokCol) = (srcSpanStartLine span, srcSpanStartCol span)
+                    tokEnd = srcSpanEnd span
+
 -- -----------------------------------------------------------------------------
 -- Interactive evaluation
 
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 66f4fe51366d36d60087e99ef2b32eba25c2f8cb..613848ade970d5c77e824f9c10b21b83779c8961 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -41,7 +41,8 @@ module Lexer (
    popContext, pushCurrentContext, setLastToken, setSrcLoc,
    getLexState, popLexState, pushLexState,
    extension, standaloneDerivingEnabled, bangPatEnabled,
-   addWarning
+   addWarning,
+   lexTokenStream
   ) where
 
 import Bag
@@ -148,12 +149,12 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 -- have a Haddock comment). The rules then munch the rest of the line.
 
-"-- " ~[$docsym \#] .* ;
-"--" [^$symbol : \ ] .* ;
+"-- " ~[$docsym \#] .* { lineCommentToken }
+"--" [^$symbol : \ ] .* { lineCommentToken }
 
 -- Next, match Haddock comments if no -haddock flag
 
-"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } ;
+"-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 
 -- Now, when we've matched comments that begin with 2 dashes and continue
 -- with a different character, we need to match comments that begin with three
@@ -161,17 +162,17 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 -- make sure that the first non-dash character isn't a symbol, and munch the
 -- rest of the line.
 
-"---"\-* [^$symbol :] .* ;
+"---"\-* [^$symbol :] .* { lineCommentToken }
 
 -- Since the previous rules all match dashes followed by at least one
 -- character, we also need to match a whole line filled with just dashes.
 
-"--"\-* / { atEOL } ;
+"--"\-* / { atEOL } { lineCommentToken }
 
 -- We need this rule since none of the other single line comment rules
 -- actually match this case.
 
-"-- " / { atEOL } ;
+"-- " / { atEOL } { lineCommentToken }
 
 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 -- blank lines) until we find a non-whitespace character, then do layout
@@ -277,7 +278,7 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 }
 
 <0> {
-  "-- #" .* ;
+  "-- #" .* { lineCommentToken }
 }
 
 <0,option_prags> {
@@ -575,6 +576,8 @@ data Token
   | ITdocSection      Int String -- a section heading
   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
+  | ITlineComment     String     -- comment starting by "--"
+  | ITblockComment    String     -- comment in {- -}
 
 #ifdef DEBUG
   deriving Show -- debugging
@@ -802,6 +805,11 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
             | otherwise -> input
           Nothing -> input
 
+lineCommentToken :: Action
+lineCommentToken span buf len = do
+  b <- extension rawTokenStreamEnabled
+  if b then strtoken ITlineComment span buf len else lexToken
+
 {-
   nested comments require traversing by hand, they can't be parsed
   using regular expressions.
@@ -809,20 +817,24 @@ multiline_doc_comment span buf _len = withLexedDocType (worker "")
 nested_comment :: P (Located Token) -> Action
 nested_comment cont span _str _len = do
   input <- getInput
-  go (1::Int) input
+  go "" (1::Int) input
   where
-    go 0 input = do setInput input; cont
-    go n input = case alexGetChar input of
+    go commentAcc 0 input = do setInput input
+                               b <- extension rawTokenStreamEnabled
+                               if b
+                                 then docCommentEnd input commentAcc ITblockComment _str span
+                                 else cont
+    go commentAcc n input = case alexGetChar input of
       Nothing -> errBrace input span
       Just ('-',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
-        Just ('\125',input) -> go (n-1) input
-        Just (_,_)          -> go n input
+        Just ('\125',input) -> go commentAcc (n-1) input
+        Just (_,_)          -> go ('-':commentAcc) n input
       Just ('\123',input) -> case alexGetChar input of
         Nothing  -> errBrace input span
-        Just ('-',input) -> go (n+1) input
-        Just (_,_)       -> go n input
-      Just (_,input) -> go n input
+        Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
+        Just (_,_)       -> go ('\123':commentAcc) n input
+      Just (c,input) -> go (c:commentAcc) n input
 
 nested_doc_comment :: Action
 nested_doc_comment span buf _len = withLexedDocType (go "")
@@ -1596,6 +1608,7 @@ standaloneDerivingBit = 16 -- standalone instance deriving declarations
 transformComprehensionsBit = 17
 qqBit	   = 18 -- enable quasiquoting
 inRulePragBit = 19
+rawTokenStreamBit = 20 -- producing a token stream with all comments included
 
 genericsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
 always           _     = True
@@ -1618,6 +1631,7 @@ standaloneDerivingEnabled flags = testBit flags standaloneDerivingBit
 transformComprehensionsEnabled flags = testBit flags transformComprehensionsBit
 qqEnabled        flags = testBit flags qqBit
 inRulePrag       flags = testBit flags inRulePragBit
+rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
 
 -- PState for parsing options pragmas
 --
@@ -1679,7 +1693,8 @@ mkPState buf loc flags  =
 	       .|. unicodeSyntaxBit `setBitIf` dopt Opt_UnicodeSyntax flags
 	       .|. unboxedTuplesBit `setBitIf` dopt Opt_UnboxedTuples flags
 	       .|. standaloneDerivingBit `setBitIf` dopt Opt_StandaloneDeriving flags
-           .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+               .|. transformComprehensionsBit `setBitIf` dopt Opt_TransformListComp flags
+               .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
       --
       setBitIf :: Int -> Bool -> Int
       b `setBitIf` cond | cond      = bit b
@@ -1795,4 +1810,13 @@ reportLexError loc1 loc2 buf str
   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
+
+lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
+lexTokenStream buf loc dflags = unP go initState
+    where initState = mkPState buf loc (dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream)
+          go = do
+            ltok <- lexer return
+            case ltok of
+              L _ ITeof -> return []
+              _ -> liftM (ltok:) go
 }