From 4b297979d25740d31241a9000e36068db112545a Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Wed, 18 Nov 2020 11:36:07 +0000
Subject: [PATCH] Add -finfo-table-map which maps info tables to source
 positions

This new flag embeds a lookup table from the address of an info table
to information about that info table.

The main interface for consulting the map is the `lookupIPE` C function

> InfoProvEnt * lookupIPE(StgInfoTable *info)

The `InfoProvEnt` has the following structure:

> typedef struct InfoProv_{
>     char * table_name;
>     char * closure_desc;
>     char * ty_desc;
>     char * label;
>     char * module;
>     char * srcloc;
> } InfoProv;
>
> typedef struct InfoProvEnt_ {
>     StgInfoTable * info;
>     InfoProv prov;
>     struct InfoProvEnt_ *link;
> } InfoProvEnt;

The source positions are approximated in a similar way to the source
positions for DWARF debugging information. They are only approximate but
in our experience provide a good enough hint about where the problem
might be. It is therefore recommended to use this flag in conjunction
with `-g<n>` for more accurate locations.

The lookup table is also emitted into the eventlog when it is available
as it is intended to be used with the `-hi` profiling mode.

Using this flag will significantly increase the size of the resulting
object file but only by a factor of 2-3x in our experience.
---
 compiler/GHC/Cmm/CLabel.hs                    |  56 ++++++-
 compiler/GHC/Cmm/Parser.y                     |  15 +-
 compiler/GHC/CoreToStg.hs                     |  17 ++-
 compiler/GHC/Driver/CodeOutput.hs             | 125 +++++++++++++++-
 compiler/GHC/Driver/Flags.hs                  |   2 +
 compiler/GHC/Driver/Hooks.hs                  |   6 +-
 compiler/GHC/Driver/Main.hs                   |  66 +++++----
 compiler/GHC/Driver/Pipeline.hs               |   4 +-
 compiler/GHC/Driver/Session.hs                |   2 +
 compiler/GHC/HsToCore/Coverage.hs             |   6 +-
 compiler/GHC/Stg/Debug.hs                     | 140 ++++++++++++++++++
 compiler/GHC/StgToCmm.hs                      |  49 ++++--
 compiler/GHC/StgToCmm/Closure.hs              |   1 +
 compiler/GHC/StgToCmm/Monad.hs                |  13 +-
 compiler/GHC/StgToCmm/Prof.hs                 |  53 +++++++
 compiler/GHC/StgToCmm/Types.hs                |   3 +
 compiler/GHC/StgToCmm/Utils.hs                |  36 ++++-
 compiler/GHC/Types/IPE.hs                     |  24 +++
 compiler/GHC/Types/SrcLoc.hs                  |   5 +
 compiler/GHC/Types/Unique/Map.hs              |   2 +-
 compiler/GHC/Unit/Module/Name.hs              |   2 +-
 compiler/ghc.cabal.in                         |   2 +
 docs/users_guide/debug-info.rst               |  58 ++++++++
 docs/users_guide/eventlog-formats.rst         |  18 +++
 docs/users_guide/profiling.rst                |   5 +-
 includes/Rts.h                                |   1 +
 includes/rts/EventLogFormat.h                 |   1 +
 includes/rts/IPE.h                            |  35 +++++
 rts/IPE.c                                     |  81 ++++++++++
 rts/IPE.h                                     |  18 +++
 rts/RtsStartup.c                              |   2 +
 rts/RtsSymbols.c                              |   3 +
 rts/Trace.c                                   |  13 ++
 rts/Trace.h                                   |   9 ++
 rts/eventlog/EventLog.c                       |  34 +++++
 rts/eventlog/EventLog.h                       |   8 +
 rts/rts.cabal.in                              |   2 +
 .../parser/should_run/CountAstDeps.stdout     |   4 +-
 .../parser/should_run/CountParserDeps.stdout  |   4 +-
 .../tests/regalloc/regalloc_unit_tests.hs     |   6 +-
 40 files changed, 852 insertions(+), 79 deletions(-)
 create mode 100644 compiler/GHC/Stg/Debug.hs
 create mode 100644 compiler/GHC/Types/IPE.hs
 create mode 100644 includes/rts/IPE.h
 create mode 100644 rts/IPE.c
 create mode 100644 rts/IPE.h

diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index 4b30bc8cf1e9..1afb97dcd84e 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -83,6 +83,9 @@ module GHC.Cmm.CLabel (
         mkForeignLabel,
         mkCCLabel,
         mkCCSLabel,
+        mkIPELabel,
+        InfoProvEnt(..),
+
         mkDynamicLinkerLabel,
         mkPicBaseLabel,
         mkDeadStripPreventer,
@@ -148,6 +151,7 @@ import GHC.Types.Unique.Set
 import GHC.Utils.Misc
 import GHC.Core.Ppr ( {- instances -} )
 import GHC.CmmToAsm.Config
+import GHC.Types.SrcLoc
 
 -- -----------------------------------------------------------------------------
 -- The CLabel type
@@ -251,6 +255,7 @@ data CLabel
 
   | CC_Label  CostCentre
   | CCS_Label CostCentreStack
+  | IPE_Label InfoProvEnt
 
 
   -- | These labels are generated and used inside the NCG only.
@@ -342,6 +347,8 @@ instance Ord CLabel where
     compare a1 a2
   compare (CCS_Label a1) (CCS_Label a2) =
     compare a1 a2
+  compare (IPE_Label a1) (IPE_Label a2) =
+    compare a1 a2
   compare (DynamicLinkerLabel a1 b1) (DynamicLinkerLabel a2 b2) =
     compare a1 a2 `thenCmp`
     compare b1 b2
@@ -384,6 +391,8 @@ instance Ord CLabel where
   compare _ HpcTicksLabel{} = GT
   compare SRTLabel{} _ = LT
   compare _ SRTLabel{} = GT
+  compare (IPE_Label {}) _ = LT
+  compare  _ (IPE_Label{}) = GT
 
 -- | Record where a foreign label is stored.
 data ForeignLabelSource
@@ -416,7 +425,7 @@ pprDebugCLabel platform lbl = pprCLabel platform AsmStyle lbl <> parens extra
    where
       extra = case lbl of
          IdLabel _ _ info
-            -> text "IdLabel" <> whenPprDebug (text ":" <> text (show info))
+            -> text "IdLabel" <> whenPprDebug (text ":" <> ppr info)
 
          CmmLabel pkg _ext _name _info
             -> text "CmmLabel" <+> ppr pkg
@@ -452,7 +461,25 @@ data IdLabelInfo
                         -- instead of a closure entry-point.
                         -- See Note [Proc-point local block entry-point].
 
-  deriving (Eq, Ord, Show)
+  deriving (Eq, Ord)
+
+instance Outputable IdLabelInfo where
+  ppr Closure    = text "Closure"
+  ppr InfoTable  = text "InfoTable"
+  ppr Entry      = text "Entry"
+  ppr Slow       = text "Slow"
+
+  ppr LocalInfoTable  = text "LocalInfoTable"
+  ppr LocalEntry      = text "LocalEntry"
+
+  ppr RednCounts      = text "RednCounts"
+  ppr ConEntry        = text "ConEntry"
+  ppr ConInfoTable    = text "ConInfoTable"
+--  ppr (ConEntry mn) = text "ConEntry" <+> ppr mn
+--  ppr (ConInfoTable mn) = text "ConInfoTable" <+> ppr mn
+  ppr ClosureTable = text "ClosureTable"
+  ppr Bytes        = text "Bytes"
+  ppr BlockInfoTable  = text "BlockInfoTable"
 
 
 data RtsLabelInfo
@@ -710,11 +737,28 @@ foreignLabelStdcallInfo _lbl = Nothing
 mkBitmapLabel   :: Unique -> CLabel
 mkBitmapLabel   uniq            = LargeBitmapLabel uniq
 
+-- | Info Table Provenance Entry
+-- See Note [Mapping Info Tables to Source Positions]
+data InfoProvEnt = InfoProvEnt
+                               { infoTablePtr :: !CLabel
+                               -- Address of the info table
+                               , infoProvEntClosureType :: !Int
+                               -- The closure type of the info table (from ClosureMacros.h)
+                               , infoTableType :: !String
+                               -- The rendered Haskell type of the closure the table represents
+                               , infoProvModule :: !Module
+                               -- Origin module
+                               , infoTableProv :: !(Maybe (RealSrcSpan, String)) }
+                               -- Position and information about the info table
+                               deriving (Eq, Ord)
+
 -- Constructing Cost Center Labels
 mkCCLabel  :: CostCentre      -> CLabel
 mkCCSLabel :: CostCentreStack -> CLabel
+mkIPELabel :: InfoProvEnt -> CLabel
 mkCCLabel           cc          = CC_Label cc
 mkCCSLabel          ccs         = CCS_Label ccs
+mkIPELabel          ipe         = IPE_Label ipe
 
 mkRtsApFastLabel :: FastString -> CLabel
 mkRtsApFastLabel str = RtsLabel (RtsApFast (NonDetFastString str))
@@ -863,6 +907,7 @@ needsCDecl (CmmLabel pkgId (NeedExternDecl external) _ _)
 needsCDecl l@(ForeignLabel{})           = not (isMathFun l)
 needsCDecl (CC_Label _)                 = True
 needsCDecl (CCS_Label _)                = True
+needsCDecl (IPE_Label {})               = True
 needsCDecl (HpcTicksLabel _)            = True
 needsCDecl (DynamicLinkerLabel {})      = panic "needsCDecl DynamicLinkerLabel"
 needsCDecl PicBaseLabel                 = panic "needsCDecl PicBaseLabel"
@@ -985,6 +1030,7 @@ externallyVisibleCLabel (ForeignLabel{})        = True
 externallyVisibleCLabel (IdLabel name _ info)   = isExternalName name && externallyVisibleIdLabel info
 externallyVisibleCLabel (CC_Label _)            = True
 externallyVisibleCLabel (CCS_Label _)           = True
+externallyVisibleCLabel (IPE_Label {})          = True
 externallyVisibleCLabel (DynamicLinkerLabel _ _)  = False
 externallyVisibleCLabel (HpcTicksLabel _)       = True
 externallyVisibleCLabel (LargeBitmapLabel _)    = False
@@ -1044,6 +1090,7 @@ labelType (AsmTempDerivedLabel _ _)             = panic "labelType(AsmTempDerive
 labelType (StringLitLabel _)                    = DataLabel
 labelType (CC_Label _)                          = DataLabel
 labelType (CCS_Label _)                         = DataLabel
+labelType (IPE_Label {})                        = DataLabel
 labelType (DynamicLinkerLabel _ _)              = DataLabel -- Is this right?
 labelType PicBaseLabel                          = DataLabel
 labelType (DeadStripPreventer _)                = DataLabel
@@ -1057,7 +1104,7 @@ idInfoLabelType info =
     LocalInfoTable -> DataLabel
     BlockInfoTable -> DataLabel
     Closure       -> GcPtrLabel
-    ConInfoTable  -> DataLabel
+    ConInfoTable {} -> DataLabel
     ClosureTable  -> DataLabel
     RednCounts    -> DataLabel
     Bytes         -> DataLabel
@@ -1132,6 +1179,7 @@ labelDynamic config lbl =
 
    -- CCS_Label always contains a CostCentre defined in the current module
    CCS_Label _ -> False
+   IPE_Label {} -> True
 
    HpcTicksLabel m ->
      externalDynamicRefs && this_mod /= m
@@ -1356,6 +1404,8 @@ pprCLabel platform sty lbl =
 
    CC_Label cc   -> maybe_underscore $ ppr cc
    CCS_Label ccs -> maybe_underscore $ ppr ccs
+   IPE_Label (InfoProvEnt l _ _ m _) -> maybe_underscore $ (pprCode CStyle (pdoc platform l) <> text "_" <> ppr m <> text "_ipe")
+
 
    CmmLabel _ _ fs CmmCode     -> maybe_underscore $ ftext fs
    CmmLabel _ _ fs CmmData     -> maybe_underscore $ ftext fs
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index c04c9b82ca63..92e981a84189 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -221,6 +221,7 @@ import GHC.StgToCmm.Expr
 import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Layout     hiding (ArgRep(..))
 import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Prof
 import GHC.StgToCmm.Bind  ( emitBlackHoleCode, emitUpdateFrame )
 
 import GHC.Core           ( Tickish(SourceNote) )
@@ -1448,8 +1449,9 @@ initEnv profile = listToUFM [
   ]
   where platform = profilePlatform profile
 
-parseCmmFile :: DynFlags -> HomeUnit -> FilePath -> IO (Bag PsWarning, Bag PsError, Maybe CmmGroup)
-parseCmmFile dflags home_unit filename = do
+
+parseCmmFile :: DynFlags -> Module -> HomeUnit -> FilePath -> IO (Bag PsWarning, Bag PsError, Maybe (CmmGroup, [InfoProvEnt]))
+parseCmmFile dflags this_mod home_unit filename = do
   buf <- hGetStringBuffer filename
   let
         init_loc = mkRealSrcLoc (mkFastString filename) 1 1
@@ -1463,8 +1465,13 @@ parseCmmFile dflags home_unit filename = do
         return (warnings, errors, Nothing)
     POk pst code -> do
         st <- initC
-        let fcode = getCmm $ unEC code "global" (initEnv (targetProfile dflags)) [] >> return ()
-            (cmm,_) = runC dflags no_module st fcode
+        let fcode = do
+              ((), cmm) <- getCmm $ unEC code "global" (initEnv (targetProfile dflags)) [] >> return ()
+              let used_info = map (cmmInfoTableToInfoProvEnt this_mod)
+                                              (mapMaybe topInfoTable cmm)
+              ((), cmm2) <- getCmm $ mapM_ emitInfoTableProv used_info
+              return (cmm ++ cmm2, used_info)
+            (cmm, _) = runC dflags no_module st fcode
             (warnings,errors) = getMessages pst
         if not (isEmptyBag errors)
          then return (warnings, errors, Nothing)
diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs
index 5a53cc933fa1..2f9e3816ef94 100644
--- a/compiler/GHC/CoreToStg.hs
+++ b/compiler/GHC/CoreToStg.hs
@@ -22,6 +22,7 @@ import GHC.Core.Utils   ( exprType, findDefault, isJoinBind
                         , exprIsTickedString_maybe )
 import GHC.Core.Opt.Arity   ( manifestArity )
 import GHC.Stg.Syntax
+import GHC.Stg.Debug
 
 import GHC.Core.Type
 import GHC.Types.RepType
@@ -46,6 +47,7 @@ import GHC.Driver.Session
 import GHC.Platform.Ways
 import GHC.Driver.Ppr
 import GHC.Types.ForeignCall
+import GHC.Types.IPE
 import GHC.Types.Demand    ( isUsedOnceDmd )
 import GHC.Builtin.PrimOps ( PrimCall(..) )
 import GHC.Types.SrcLoc    ( mkGeneralSrcSpan )
@@ -226,14 +228,21 @@ import qualified Data.Set as Set
 -- Setting variable info: top-level, binds, RHSs
 -- --------------------------------------------------------------
 
-coreToStg :: DynFlags -> Module -> CoreProgram
-          -> ([StgTopBinding], CollectedCCs)
-coreToStg dflags this_mod pgm
-  = (pgm', final_ccs)
+
+coreToStg :: DynFlags -> Module -> ModLocation -> CoreProgram
+          -> ([StgTopBinding], InfoTableProvMap, CollectedCCs)
+coreToStg dflags this_mod ml pgm
+  = (pgm'', denv, final_ccs)
   where
     (_, (local_ccs, local_cc_stacks), pgm')
       = coreTopBindsToStg dflags this_mod emptyVarEnv emptyCollectedCCs pgm
 
+    -- See Note [Mapping Info Tables to Source Positions]
+    (!pgm'', !denv) =
+        if gopt Opt_InfoTableMap dflags
+          then collectDebugInformation dflags ml pgm'
+          else (pgm', emptyInfoTableProvMap)
+
     prof = WayProf `Set.member` ways dflags
 
     final_ccs
diff --git a/compiler/GHC/Driver/CodeOutput.hs b/compiler/GHC/Driver/CodeOutput.hs
index 134ee2f96062..f6b9e9738cc7 100644
--- a/compiler/GHC/Driver/CodeOutput.hs
+++ b/compiler/GHC/Driver/CodeOutput.hs
@@ -10,6 +10,7 @@ module GHC.Driver.CodeOutput
    ( codeOutput
    , outputForeignStubs
    , profilingInitCode
+   , ipInitCode
    )
 where
 
@@ -37,6 +38,7 @@ import qualified GHC.Data.Stream as Stream
 
 import GHC.SysTools.FileCleanup
 
+
 import GHC.Utils.Error
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
@@ -70,7 +72,7 @@ codeOutput :: Logger
            -> Module
            -> FilePath
            -> ModLocation
-           -> ForeignStubs
+           -> (a -> ForeignStubs)
            -> [(ForeignSrcLang, FilePath)]
            -- ^ additional files to be compiled with the C compiler
            -> [UnitId]
@@ -80,7 +82,7 @@ codeOutput :: Logger
                   [(ForeignSrcLang, FilePath)]{-foreign_fps-},
                   a)
 
-codeOutput logger dflags unit_state this_mod filenm location foreign_stubs foreign_fps pkg_deps
+codeOutput logger dflags unit_state this_mod filenm location genForeignStubs foreign_fps pkg_deps
   cmm_stream
   =
     do  {
@@ -107,7 +109,6 @@ codeOutput logger dflags unit_state this_mod filenm location foreign_stubs forei
                 ; return cmm
                 }
 
-        ; stubs_exist <- outputForeignStubs logger dflags unit_state this_mod location foreign_stubs
         ; a <- case backend dflags of
                  NCG         -> outputAsm logger dflags this_mod location filenm
                                           linted_cmm_stream
@@ -115,6 +116,8 @@ codeOutput logger dflags unit_state this_mod filenm location foreign_stubs forei
                  LLVM        -> outputLlvm logger dflags filenm linted_cmm_stream
                  Interpreter -> panic "codeOutput: Interpreter"
                  NoBackend   -> panic "codeOutput: NoBackend"
+        ; let stubs = genForeignStubs a
+        ; stubs_exist <- outputForeignStubs logger dflags unit_state this_mod location stubs
         ; return (filenm, stubs_exist, foreign_fps, a)
         }
 
@@ -225,9 +228,14 @@ outputForeignStubs logger dflags unit_state mod location stubs
 
         -- we need the #includes from the rts package for the stub files
         let rts_includes =
-               let rts_pkg = unsafeLookupUnitId unit_state rtsUnitId in
-               concatMap mk_include (unitIncludes rts_pkg)
-            mk_include i = "#include \"" ++ ST.unpack i ++ "\"\n"
+               let mrts_pkg = lookupUnitId unit_state rtsUnitId
+                   mk_include i = "#include \"" ++ ST.unpack i ++ "\"\n"
+               in case mrts_pkg of
+                    Just rts_pkg -> concatMap mk_include (unitIncludes rts_pkg)
+                    -- This case only happens when compiling foreign stub for the rts
+                    -- library itself. The only time we do this at the moment is for
+                    -- IPE information for the RTS info tables
+                    Nothing -> ""
 
             -- wrapper code mentions the ffi_arg type, which comes from ffi.h
             ffi_includes
@@ -314,3 +322,108 @@ profilingInitCode platform this_mod (local_CCs, singleton_CCSs)
                          | cc <- ccs
                          ] ++ [text "NULL"])
       <> semi
+
+-- | Generate code to initialise info pointer origin
+-- See note [Mapping Info Tables to Source Positions]
+ipInitCode :: DynFlags -> Module -> [InfoProvEnt] -> SDoc
+ipInitCode dflags this_mod ents
+ = if not (gopt Opt_InfoTableMap dflags)
+    then empty
+    else withPprStyle (PprCode CStyle) $ vcat
+    $  map emit_ipe_decl ents
+    ++ [emit_ipe_list ents]
+    ++ [ text "static void ip_init_" <> ppr this_mod
+            <> text "(void) __attribute__((constructor));"
+       , text "static void ip_init_" <> ppr this_mod <> text "(void)"
+       , braces (vcat
+                 [ text "registerInfoProvList" <> parens local_ipe_list_label <> semi
+                 ])
+       ]
+ where
+   platform = targetPlatform dflags
+   emit_ipe_decl ipe =
+       text "extern InfoProvEnt" <+> ipe_lbl <> text "[];"
+     where ipe_lbl = pprCLabel platform CStyle (mkIPELabel ipe)
+   local_ipe_list_label = text "local_ipe_" <> ppr this_mod
+   emit_ipe_list ipes =
+      text "static InfoProvEnt *" <> local_ipe_list_label <> text "[] ="
+      <+> braces (vcat $ [ pprCLabel platform CStyle (mkIPELabel ipe) <> comma
+                         | ipe <- ipes
+                         ] ++ [text "NULL"])
+      <> semi
+
+
+{-
+Note [Mapping Info Tables to Source Positions]
+
+This note describes what the `-finfo-table-map` flag achieves.
+
+When debugging memory issues it is very useful to be able to map a specific closure
+to a position in the source. The prime example is being able to map a THUNK to
+a specific place in the source program, the mapping is usually quite precise because
+a fresh info table is created for each distinct THUNK.
+
+There are three parts to the implementation
+
+1. In CoreToStg, the SourceNote information is used in order to give a source location to
+some specific closures.
+2. In StgToCmm, the actually used info tables are recorded.
+3. During code generation, a mapping from the info table to the statically
+determined location is emitted which can then be queried at runtime by
+various tools.
+
+-- Giving Source Locations to Closures
+
+At the moment thunk and constructor closures are added to the map. This information
+is collected in the `InfoTableProvMap` which provides a mapping from:
+
+1. Data constructors to a list of where they are used.
+2. `Name`s and where they originate from.
+
+During the CoreToStg phase, this map is populated whenever something is turned into
+a StgRhsClosure or an StgConApp. The current source position is recorded
+depending on the location indicated by the surrounding SourceNote.
+
+The functions which add information to the map are `recordStgIdPosition` and
+`incDc`.
+
+When the -fdistinct-constructor-tables` flag is turned on then every
+usage of a data constructor gets its own distinct info table. This is orchestrated
+in `coreToStgExpr` where an incrementing number is used to distinguish each
+occurrence of a data constructor.
+
+-- StgToCmm
+
+The info tables which are actually used in the generated program are recorded during the
+conversion from STG to Cmm. The used info tables are recorded in the `emitProc` function.
+All the used info tables are recorded in the `cgs_used_info` field. This step
+is necessary because when the information about names is collected in the previous
+phase it's unpredictable about which names will end up needing info tables. If
+you don't record which ones are actually used then you end up generating code
+which references info tables which don't exist.
+
+-- Code Generation
+
+The output of these two phases is combined together during code generation.
+A C stub is generated which
+creates the static map from info table pointer to the information about where that
+info table was created from. This is created by `ipInitCode` in the same manner as a
+C stub is generated for cost centres.
+
+This information can be consumed in two ways.
+
+1. The complete mapping is emitted into the eventlog so that external tools such
+as eventlog2html can use the information with the heap profile by info table mode.
+2. The `lookupIPE` function can be used via the `whereFrom#` primop to introspect
+information about a closure in a running Haskell program.
+
+Note [Distinct Info Tables for Constructors]
+
+In the old times, each usage of a data constructor used the same info table.
+This made it impossible to distinguish which actual usuage of a data constructor was
+contributing primarily to the allocation in a program. Using the `-fdistinct-info-tables` flag you
+can cause code generation to generate a distinct info table for each usage of
+a constructor. Then, when inspecting the heap you can see precisely which usage of a constructor
+was responsible for each allocation.
+
+-}
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index 728b6159a699..955b6fabd162 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -132,6 +132,8 @@ data GeneralFlag
    | Opt_FastLlvm                       -- hidden flag
    | Opt_NoTypeableBinds
 
+   | Opt_InfoTableMap
+
    | Opt_WarnIsError                    -- -Werror; makes warnings fatal
    | Opt_ShowWarnGroups                 -- Show the group a warning belongs to
    | Opt_HideSourcePaths                -- Hide module source/object paths
diff --git a/compiler/GHC/Driver/Hooks.hs b/compiler/GHC/Driver/Hooks.hs
index cb21072bd6a4..4cf62412b558 100644
--- a/compiler/GHC/Driver/Hooks.hs
+++ b/compiler/GHC/Driver/Hooks.hs
@@ -46,6 +46,7 @@ import GHC.Types.Id
 import GHC.Types.SrcLoc
 import GHC.Types.Basic
 import GHC.Types.CostCentre
+import GHC.Types.IPE
 import GHC.Types.Meta
 import GHC.Types.HpcInfo
 
@@ -70,6 +71,7 @@ import GHC.Data.Bag
 
 import qualified Data.Kind
 import System.Process
+import GHC.Utils.Outputable ( SDoc )
 
 {-
 ************************************************************************
@@ -143,8 +145,8 @@ data Hooks = Hooks
   , getValueSafelyHook     :: !(Maybe (HscEnv -> Name -> Type
                                                           -> IO (Maybe HValue)))
   , createIservProcessHook :: !(Maybe (CreateProcess -> IO ProcessHandle))
-  , stgToCmmHook           :: !(Maybe (DynFlags -> Module -> [TyCon] -> CollectedCCs
-                                 -> [CgStgTopBinding] -> HpcInfo -> Stream IO CmmGroup ModuleLFInfos))
+  , stgToCmmHook           :: !(Maybe (DynFlags -> Module -> InfoTableProvMap -> [TyCon] -> CollectedCCs
+                                 -> [CgStgTopBinding] -> HpcInfo -> Stream IO CmmGroup (SDoc, ModuleLFInfos)))
   , cmmToRawCmmHook        :: !(forall a . Maybe (DynFlags -> Maybe Module -> Stream IO CmmGroupSRTs a
                                  -> IO (Stream IO RawCmmGroup a)))
   }
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index f16685775bb2..fea51a7f96d9 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -1,6 +1,8 @@
 {-# LANGUAGE CPP                      #-}
 {-# LANGUAGE BangPatterns             #-}
 {-# LANGUAGE NondecreasingIndentation #-}
+{-# LANGUAGE LambdaCase               #-}
+{-# LANGUAGE ViewPatterns             #-}
 {-# OPTIONS_GHC -fprof-auto-top #-}
 
 -------------------------------------------------------------------------------
@@ -186,6 +188,7 @@ import GHC.Types.Var.Env       ( emptyTidyEnv )
 import GHC.Types.Error
 import GHC.Types.Fixity.Env
 import GHC.Types.CostCentre
+import GHC.Types.IPE
 import GHC.Types.Unique.Supply
 import GHC.Types.SourceFile
 import GHC.Types.SrcLoc
@@ -1536,19 +1539,18 @@ hscGenHardCode hsc_env cgguts location output_filename = do
                        corePrepPgm hsc_env this_mod location
                                    core_binds data_tycons
         -----------------  Convert to STG ------------------
-        (stg_binds, (caf_ccs, caf_cc_stacks))
+        (stg_binds, denv, (caf_ccs, caf_cc_stacks))
             <- {-# SCC "CoreToStg" #-}
-               myCoreToStg logger dflags this_mod prepd_binds
+               myCoreToStg logger dflags this_mod location prepd_binds
 
-        let cost_centre_info = (S.toList local_ccs ++ caf_ccs, caf_cc_stacks)
+        let cost_centre_info =
+              (S.toList local_ccs ++ caf_ccs, caf_cc_stacks)
             platform = targetPlatform dflags
             prof_init
-               | sccProfilingEnabled dflags = profilingInitCode platform this_mod cost_centre_info
-               | otherwise = empty
-            foreign_stubs = foreign_stubs0 `appendStubC` prof_init
+              | sccProfilingEnabled dflags = profilingInitCode platform this_mod cost_centre_info
+              | otherwise = empty
 
         ------------------  Code generation ------------------
-
         -- The back-end is streamed: each top-level function goes
         -- from Stg all the way to asm before dealing with the next
         -- top-level function, so showPass isn't very useful here.
@@ -1558,7 +1560,7 @@ hscGenHardCode hsc_env cgguts location output_filename = do
                    (text "CodeGen"<+>brackets (ppr this_mod))
                    (const ()) $ do
             cmms <- {-# SCC "StgToCmm" #-}
-                            doCodeGen hsc_env this_mod data_tycons
+                            doCodeGen hsc_env this_mod denv data_tycons
                                 cost_centre_info
                                 stg_binds hpc_info
 
@@ -1574,6 +1576,8 @@ hscGenHardCode hsc_env cgguts location output_filename = do
                   return a
                 rawcmms1 = Stream.mapM dump rawcmms0
 
+            let foreign_stubs st = foreign_stubs0 `appendStubC` prof_init `appendStubC` (cgIPEStub st)
+
             (output_filename, (_stub_h_exists, stub_c_exists), foreign_fps, cg_infos)
                 <- {-# SCC "codeOutput" #-}
                   codeOutput logger dflags (hsc_units hsc_env) this_mod output_filename location
@@ -1615,24 +1619,24 @@ hscInteractive hsc_env cgguts location = do
 
 ------------------------------
 
-hscCompileCmmFile :: HscEnv -> FilePath -> FilePath -> IO ()
+hscCompileCmmFile :: HscEnv -> FilePath -> FilePath -> IO (Maybe FilePath)
 hscCompileCmmFile hsc_env filename output_filename = runHsc hsc_env $ do
     let dflags   = hsc_dflags hsc_env
     let logger   = hsc_logger hsc_env
     let hooks    = hsc_hooks hsc_env
         home_unit = hsc_home_unit hsc_env
         platform  = targetPlatform dflags
-    cmm <- ioMsgMaybe
+        -- Make up a module name to give the NCG. We can't pass bottom here
+        -- lest we reproduce #11784.
+        mod_name = mkModuleName $ "Cmm$" ++ FilePath.takeFileName filename
+        cmm_mod = mkHomeModule home_unit mod_name
+    (cmm, ents) <- ioMsgMaybe
                $ do
                   (warns,errs,cmm) <- withTiming logger dflags (text "ParseCmm"<+>brackets (text filename)) (\_ -> ())
-                                       $ parseCmmFile dflags home_unit filename
+                                       $ parseCmmFile dflags cmm_mod home_unit filename
                   return (mkMessages (fmap pprWarning warns `unionBags` fmap pprError errs), cmm)
     liftIO $ do
         dumpIfSet_dyn logger dflags Opt_D_dump_cmm_verbose_by_proc "Parsed Cmm" FormatCMM (pdoc platform cmm)
-        let -- Make up a module name to give the NCG. We can't pass bottom here
-            -- lest we reproduce #11784.
-            mod_name = mkModuleName $ "Cmm$" ++ FilePath.takeFileName filename
-            cmm_mod = mkHomeModule home_unit mod_name
 
         -- Compile decls in Cmm files one decl at a time, to avoid re-ordering
         -- them in SRT analysis.
@@ -1651,9 +1655,14 @@ hscCompileCmmFile hsc_env filename output_filename = runHsc hsc_env $ do
           Nothing -> cmmToRawCmm logger dflags         (Stream.yield cmmgroup)
           Just h  -> h                  dflags Nothing (Stream.yield cmmgroup)
 
-        _ <- codeOutput logger dflags (hsc_units hsc_env) cmm_mod output_filename no_loc NoStubs [] []
+        let foreign_stubs _ =
+              let ip_init = ipInitCode dflags cmm_mod ents
+              in NoStubs `appendStubC` ip_init
+
+        (_output_filename, (_stub_h_exists, stub_c_exists), _foreign_fps, _caf_infos)
+          <- codeOutput logger dflags (hsc_units hsc_env) cmm_mod output_filename no_loc foreign_stubs [] []
              rawCmms
-        return ()
+        return stub_c_exists
   where
     no_loc = ModLocation{ ml_hs_file  = Just filename,
                           ml_hi_file  = panic "hscCompileCmmFile: no hi file",
@@ -1680,7 +1689,7 @@ This reduces residency towards the end of the CodeGen phase significantly
 (5-10%).
 -}
 
-doCodeGen   :: HscEnv -> Module -> [TyCon]
+doCodeGen   :: HscEnv -> Module -> InfoTableProvMap -> [TyCon]
             -> CollectedCCs
             -> [StgTopBinding]
             -> HpcInfo
@@ -1688,7 +1697,7 @@ doCodeGen   :: HscEnv -> Module -> [TyCon]
          -- Note we produce a 'Stream' of CmmGroups, so that the
          -- backend can be run incrementally.  Otherwise it generates all
          -- the C-- up front, which has a significant space cost.
-doCodeGen hsc_env this_mod data_tycons
+doCodeGen hsc_env this_mod denv data_tycons
               cost_centre_info stg_binds hpc_info = do
     let dflags = hsc_dflags hsc_env
     let logger = hsc_logger hsc_env
@@ -1703,10 +1712,10 @@ doCodeGen hsc_env this_mod data_tycons
                         Nothing -> StgToCmm.codeGen logger
                         Just h  -> h
 
-    let cmm_stream :: Stream IO CmmGroup ModuleLFInfos
+    let cmm_stream :: Stream IO CmmGroup (SDoc, ModuleLFInfos)
         -- See Note [Forcing of stg_binds]
         cmm_stream = stg_binds_w_fvs `seqList` {-# SCC "StgToCmm" #-}
-            stg_to_cmm dflags this_mod data_tycons cost_centre_info stg_binds_w_fvs hpc_info
+            stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_binds_w_fvs hpc_info
 
         -- codegen consumes a stream of CmmGroup, and produces a new
         -- stream of CmmGroup (not necessarily synchronised: one
@@ -1723,12 +1732,12 @@ doCodeGen hsc_env this_mod data_tycons
 
         pipeline_stream :: Stream IO CmmGroupSRTs CgInfos
         pipeline_stream = do
-          (non_cafs, lf_infos) <-
+          (non_cafs, (used_info, lf_infos)) <-
             {-# SCC "cmmPipeline" #-}
             Stream.mapAccumL_ (cmmPipeline hsc_env) (emptySRT this_mod) ppr_stream1
               <&> first (srtMapNonCAFs . moduleSRTMap)
 
-          return CgInfos{ cgNonCafs = non_cafs, cgLFInfos = lf_infos }
+          return CgInfos{ cgNonCafs = non_cafs, cgLFInfos = lf_infos, cgIPEStub = used_info }
 
         dump2 a = do
           unless (null a) $
@@ -1737,19 +1746,20 @@ doCodeGen hsc_env this_mod data_tycons
 
     return (Stream.mapM dump2 pipeline_stream)
 
-myCoreToStg :: Logger -> DynFlags -> Module -> CoreProgram
+myCoreToStg :: Logger -> DynFlags -> Module -> ModLocation -> CoreProgram
             -> IO ( [StgTopBinding] -- output program
+                  , InfoTableProvMap
                   , CollectedCCs )  -- CAF cost centre info (declared and used)
-myCoreToStg logger dflags this_mod prepd_binds = do
-    let (stg_binds, cost_centre_info)
+myCoreToStg logger dflags this_mod ml prepd_binds = do
+    let (stg_binds, denv, cost_centre_info)
          = {-# SCC "Core2Stg" #-}
-           coreToStg dflags this_mod prepd_binds
+           coreToStg dflags this_mod ml prepd_binds
 
     stg_binds2
         <- {-# SCC "Stg2Stg" #-}
            stg2stg logger dflags this_mod stg_binds
 
-    return (stg_binds2, cost_centre_info)
+    return (stg_binds2, denv, cost_centre_info)
 
 
 {- **********************************************************************
diff --git a/compiler/GHC/Driver/Pipeline.hs b/compiler/GHC/Driver/Pipeline.hs
index df54f35e04e4..e0367d08d448 100644
--- a/compiler/GHC/Driver/Pipeline.hs
+++ b/compiler/GHC/Driver/Pipeline.hs
@@ -1394,7 +1394,9 @@ runPhase (RealPhase Cmm) input_fn = do
        let next_phase = hscPostBackendPhase HsSrcFile (backend dflags)
        output_fn <- phaseOutputFilename next_phase
        PipeState{hsc_env} <- getPipeState
-       liftIO $ hscCompileCmmFile hsc_env input_fn output_fn
+       mstub <- liftIO $ hscCompileCmmFile hsc_env input_fn output_fn
+       stub_o <- liftIO (mapM (compileStub hsc_env) mstub)
+       setForeignOs (maybeToList stub_o)
        return (RealPhase next_phase, output_fn)
 
 -----------------------------------------------------------------------------
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index f7424f500375..98c46427e691 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -2776,6 +2776,8 @@ dynamic_flags_deps = [
   , make_ord_flag defGhcFlag "fprof-callers"
          (HasArg setCallerCcFilters)
 
+  , make_ord_flag defGhcFlag "finfo-table-map"
+      (NoArg (setGeneralFlag Opt_InfoTableMap))
         ------ Compiler flags -----------------------------------------------
 
   , make_ord_flag defGhcFlag "fasm"             (NoArg (setObjBackend NCG))
diff --git a/compiler/GHC/HsToCore/Coverage.hs b/compiler/GHC/HsToCore/Coverage.hs
index df7d00071bbc..8180696700aa 100644
--- a/compiler/GHC/HsToCore/Coverage.hs
+++ b/compiler/GHC/HsToCore/Coverage.hs
@@ -1036,13 +1036,17 @@ data TickTransEnv = TTE { fileName     :: FastString
 data TickishType = ProfNotes | HpcTicks | Breakpoints | SourceNotes
                  deriving (Eq)
 
+sourceNotesEnabled :: DynFlags -> Bool
+sourceNotesEnabled dflags =
+  (debugLevel dflags > 0) || (gopt Opt_InfoTableMap dflags)
+
 coveragePasses :: DynFlags -> [TickishType]
 coveragePasses dflags =
     ifa (breakpointsEnabled dflags)          Breakpoints $
     ifa (gopt Opt_Hpc dflags)                HpcTicks $
     ifa (sccProfilingEnabled dflags &&
          profAuto dflags /= NoProfAuto)      ProfNotes $
-    ifa (debugLevel dflags > 0)              SourceNotes []
+    ifa (sourceNotesEnabled dflags)          SourceNotes []
   where ifa f x xs | f         = x:xs
                    | otherwise = xs
 
diff --git a/compiler/GHC/Stg/Debug.hs b/compiler/GHC/Stg/Debug.hs
new file mode 100644
index 000000000000..e6e85f7db71f
--- /dev/null
+++ b/compiler/GHC/Stg/Debug.hs
@@ -0,0 +1,140 @@
+{-# LANGUAGE TupleSections #-}
+-- This module contains functions which implement
+-- the -finfo-table-map and -fdistinct-constructor-tables flags
+module GHC.Stg.Debug(collectDebugInformation) where
+
+
+import GHC.Prelude
+
+import GHC.Core
+import GHC.Stg.Syntax
+
+import GHC.Types.Id
+import GHC.Core.DataCon
+import GHC.Types.IPE
+import GHC.Unit.Module
+import GHC.Types.Name   ( getName, getOccName, occNameString, nameSrcSpan)
+import GHC.Data.FastString
+import GHC.Driver.Session
+import GHC.Driver.Ppr
+
+import Control.Monad (when)
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.State
+import Control.Monad.Trans.Class
+import GHC.Types.Unique.Map
+import GHC.Types.SrcLoc
+import Control.Applicative
+
+data SpanWithLabel = SpanWithLabel RealSrcSpan String
+
+data R = R { rDynFlags :: DynFlags, rModLocation :: ModLocation, rSpan :: Maybe SpanWithLabel }
+
+type M a = ReaderT R (State InfoTableProvMap) a
+
+withSpan :: (RealSrcSpan, String) -> M a -> M a
+withSpan (new_s, new_l) act = local maybe_replace act
+  where
+    maybe_replace r@R{ rModLocation = cur_mod, rSpan = Just (SpanWithLabel old_s _old_l) }
+      -- prefer spans from the current module
+      | Just (unpackFS $ srcSpanFile old_s) == ml_hs_file cur_mod
+      , Just (unpackFS $ srcSpanFile new_s) /= ml_hs_file cur_mod
+      = r
+    maybe_replace r
+      = r { rSpan = Just (SpanWithLabel new_s new_l) }
+
+collectDebugInformation :: DynFlags -> ModLocation -> [StgTopBinding] -> ([StgTopBinding], InfoTableProvMap)
+collectDebugInformation dflags ml bs =
+    runState (runReaderT (mapM collectTop bs) (R dflags ml Nothing)) emptyInfoTableProvMap
+
+collectTop :: StgTopBinding -> M StgTopBinding
+collectTop (StgTopLifted t) = StgTopLifted <$> collectStgBind t
+collectTop tb = return tb
+
+collectStgBind :: StgBinding -> M StgBinding
+collectStgBind  (StgNonRec bndr rhs) = do
+    rhs' <- collectStgRhs bndr rhs
+    return (StgNonRec bndr rhs')
+collectStgBind (StgRec pairs) = do
+    es <- mapM (\(b, e) -> (b,) <$> collectStgRhs b e) pairs
+    return (StgRec es)
+
+collectStgRhs :: Id -> StgRhs -> M StgRhs
+collectStgRhs bndr (StgRhsClosure ext cc us bs e)= do
+  e' <- collectExpr e
+  recordInfo bndr e'
+  return $ StgRhsClosure ext cc us bs e'
+collectStgRhs _bndr (StgRhsCon cc dc args) = do
+  --n' <- incDc dc ticks
+  return (StgRhsCon cc dc args)
+
+
+recordInfo :: Id -> StgExpr -> M ()
+recordInfo bndr new_rhs = do
+  modLoc <- asks rModLocation
+  let
+    thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
+    -- A span from the ticks surrounding the new_rhs
+    best_span = quickSourcePos thisFile new_rhs
+    -- A back-up span if the bndr had a source position, many do not (think internally generated ids)
+    bndr_span = (\s -> SpanWithLabel s (occNameString (getOccName bndr)))
+                  <$> srcSpanToRealSrcSpan (nameSrcSpan (getName bndr))
+  recordStgIdPosition bndr best_span bndr_span
+
+collectExpr :: StgExpr -> M StgExpr
+collectExpr = go
+  where
+    go (StgApp occ as) = return $ StgApp occ as
+    go (StgLit lit) = return $ StgLit lit
+    go (StgConApp dc as tys) = do
+--      n' <- incDc dc []
+      return (StgConApp dc as tys)
+    go (StgOpApp op as ty) = return (StgOpApp op as ty)
+    go (StgCase scrut bndr ty alts) =
+      StgCase <$> collectExpr scrut <*> pure bndr <*> pure ty <*> mapM collectAlt alts
+    go (StgLet ext bind body) = do
+        bind' <- collectStgBind bind
+        body' <- go body
+        return (StgLet ext bind' body')
+    go (StgLetNoEscape ext bind body) = do
+        bind' <- collectStgBind bind
+        body' <- go body
+        return (StgLetNoEscape ext bind' body')
+
+    go (StgTick tick e) = do
+       let k = case tick of
+                SourceNote ss fp -> withSpan (ss, fp)
+                _ -> id
+       e' <- k (go e)
+       return (StgTick tick e')
+
+collectAlt :: StgAlt -> M StgAlt
+collectAlt (ac, bs, e) = (ac, bs, ) <$> collectExpr e
+
+-- | Try to find the best source position surrounding a 'StgExpr'. The
+-- heuristic strips ticks from the current expression until it finds one which
+-- is from the module currently being compiled. This is the same method that
+-- the DWARF information uses to give locations to info tables.
+--
+-- It is usually a better alternative than using the 'RealSrcSpan' which is carefully
+-- propagated downwards by 'withSpan'. It's "quick" because it works only using immediate context rather
+-- than looking at the parent context like 'withSpan'
+quickSourcePos :: FastString -> StgExpr -> Maybe SpanWithLabel
+quickSourcePos cur_mod (StgTick (SourceNote ss m) e)
+  | srcSpanFile ss == cur_mod = Just (SpanWithLabel ss m)
+  | otherwise = quickSourcePos cur_mod e
+quickSourcePos _ _ = Nothing
+
+recordStgIdPosition :: Id -> Maybe SpanWithLabel -> Maybe SpanWithLabel -> M ()
+recordStgIdPosition id best_span ss = do
+  dflags <- asks rDynFlags
+  when (gopt Opt_InfoTableMap dflags) $ do
+    let tyString = showPpr dflags (idType id)
+    cc <- asks rSpan
+    --Useful for debugging why a certain Id gets given a certain span
+    --pprTraceM "recordStgIdPosition" (ppr id $$ ppr cc $$ ppr best_span $$ ppr ss)
+    case best_span <|> cc <|> ss of
+      Nothing -> return ()
+      Just (SpanWithLabel rss d) ->
+        lift $ modify (\env -> env { provClosure = addToUniqMap (provClosure env) (idName id) (idType id, rss, d)})
+
diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs
index 3d1f962267d5..f89f465d12e8 100644
--- a/compiler/GHC/StgToCmm.hs
+++ b/compiler/GHC/StgToCmm.hs
@@ -19,7 +19,7 @@ import GHC.Prelude as Prelude
 import GHC.Driver.Backend
 import GHC.Driver.Session
 
-import GHC.StgToCmm.Prof (initCostCentres, ldvEnter)
+import GHC.StgToCmm.Prof (initInfoTableProv, initCostCentres, ldvEnter)
 import GHC.StgToCmm.Monad
 import GHC.StgToCmm.Env
 import GHC.StgToCmm.Bind
@@ -39,6 +39,7 @@ import GHC.Cmm.Graph
 import GHC.Stg.Syntax
 
 import GHC.Types.CostCentre
+import GHC.Types.IPE
 import GHC.Types.HpcInfo
 import GHC.Types.Id
 import GHC.Types.Id.Info
@@ -64,41 +65,56 @@ import GHC.SysTools.FileCleanup
 import GHC.Data.Stream
 import GHC.Data.OrdList
 
-import Data.IORef
 import Control.Monad (when,void)
 import GHC.Utils.Misc
 import System.IO.Unsafe
 import qualified Data.ByteString as BS
+import Data.Maybe
+import Data.IORef
+
+data CodeGenState = CodeGenState { codegen_used_info :: !(OrdList CmmInfoTable)
+                                 , codegen_state :: !CgState }
+
 
 codeGen :: Logger
         -> DynFlags
         -> Module
+        -> InfoTableProvMap
         -> [TyCon]
         -> CollectedCCs                -- (Local/global) cost-centres needing declaring/registering.
         -> [CgStgTopBinding]           -- Bindings to convert
         -> HpcInfo
-        -> Stream IO CmmGroup ModuleLFInfos
-                                       -- Output as a stream, so codegen can
+        -> Stream IO CmmGroup (SDoc, ModuleLFInfos)       -- Output as a stream, so codegen can
                                        -- be interleaved with output
 
-codeGen logger dflags this_mod data_tycons
+codeGen logger dflags this_mod ip_map@(InfoTableProvMap _) data_tycons
         cost_centre_info stg_binds hpc_info
   = do  {     -- cg: run the code generator, and yield the resulting CmmGroup
               -- Using an IORef to store the state is a bit crude, but otherwise
-              -- we would need to add a state monad layer.
-        ; cgref <- liftIO $ newIORef =<< initC
-        ; let cg :: FCode () -> Stream IO CmmGroup ()
+              -- we would need to add a state monad layer which regresses
+              -- allocations by 0.5-2%.
+        ; cgref <- liftIO $ initC >>= \s -> newIORef (CodeGenState mempty s)
+        ; let cg :: FCode a -> Stream IO CmmGroup a
               cg fcode = do
-                cmm <- liftIO . withTimingSilent logger dflags (text "STG -> Cmm") (`seq` ()) $ do
-                         st <- readIORef cgref
+                (a, cmm) <- liftIO . withTimingSilent logger dflags (text "STG -> Cmm") (`seq` ()) $ do
+                         CodeGenState ts st <- readIORef cgref
                          let (a,st') = runC dflags this_mod st (getCmm fcode)
 
                          -- NB. stub-out cgs_tops and cgs_stmts.  This fixes
                          -- a big space leak.  DO NOT REMOVE!
-                         writeIORef cgref $! st'{ cgs_tops = nilOL,
-                                                  cgs_stmts = mkNop }
+                         -- This is observed by the #3294 test
+                         let !used_info
+                                | gopt Opt_InfoTableMap dflags = toOL (mapMaybe topInfoTable (snd a)) `mappend` ts
+                                | otherwise = mempty
+                         writeIORef cgref $!
+                                    CodeGenState used_info
+                                      (st'{ cgs_tops = nilOL,
+                                            cgs_stmts = mkNop
+                                          })
+
                          return a
                 yield cmm
+                return a
 
                -- Note [codegen-split-init] the cmm_init block must come
                -- FIRST.  This is because when -split-objs is on we need to
@@ -107,7 +123,6 @@ codeGen logger dflags this_mod data_tycons
         ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
 
         ; mapM_ (cg . cgTopBinding logger dflags) stg_binds
-
                 -- Put datatype_stuff after code_stuff, because the
                 -- datatype closure table (for enumeration types) to
                 -- (say) PrelBase_True_closure, which is defined in
@@ -121,7 +136,11 @@ codeGen logger dflags this_mod data_tycons
 
         ; mapM_ do_tycon data_tycons
 
-        ; cg_id_infos <- cgs_binds <$> liftIO (readIORef cgref)
+        ; final_state <- liftIO (readIORef cgref)
+        ; let cg_id_infos = cgs_binds . codegen_state $ final_state
+              used_info = fromOL . codegen_used_info $ final_state
+
+        ; !foreign_stub <- cg (initInfoTableProv used_info ip_map this_mod)
 
           -- See Note [Conveying CAF-info and LFInfo between modules] in
           -- GHC.StgToCmm.Types
@@ -136,7 +155,7 @@ codeGen logger dflags this_mod data_tycons
                 | otherwise
                 = mkNameEnv (Prelude.map extractInfo (eltsUFM cg_id_infos))
 
-        ; return generatedInfo
+        ; return (foreign_stub, generatedInfo)
         }
 
 ---------------------------------------------------------------
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index ddd8a8a988e5..5c9b904896a2 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -97,6 +97,7 @@ import GHC.Types.Basic
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Misc
+import GHC.Unit.Module
 
 import Data.Coerce (coerce)
 import qualified Data.ByteString.Char8 as BS8
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
index 3066609d7e26..163f7a2a8a8a 100644
--- a/compiler/GHC/StgToCmm/Monad.hs
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -59,9 +59,8 @@ module GHC.StgToCmm.Monad (
         -- more localised access to monad state
         CgIdInfo(..),
         getBinds, setBinds,
-
         -- out of general friendliness, we also export ...
-        CgInfoDownwards(..), CgState(..)        -- non-abstract
+        CgInfoDownwards(..), CgState(..) -- non-abstract
     ) where
 
 import GHC.Prelude hiding( sequence, succ )
@@ -335,6 +334,9 @@ data CgState
      cgs_hp_usg  :: HeapUsage,
 
      cgs_uniqs :: UniqSupply }
+-- If you are wondering why you have to be careful forcing CgState then
+-- the reason is the knot-tying in 'getHeapUsage'. This problem is tracked
+-- in #19245
 
 data HeapUsage   -- See Note [Virtual and real heap pointers]
   = HeapUsage {
@@ -400,7 +402,6 @@ s1 `addCodeBlocksFrom` s2
   = s1 { cgs_stmts = cgs_stmts s1 CmmGraph.<*> cgs_stmts s2,
          cgs_tops  = cgs_tops  s1 `appOL` cgs_tops  s2 }
 
-
 -- The heap high water mark is the larger of virtHp and hwHp.  The latter is
 -- only records the high water marks of forked-off branches, so to find the
 -- heap high water mark you have to take the max of virtHp and hwHp.  Remember,
@@ -828,15 +829,15 @@ emitProc mb_info lbl live blocks offset do_layout
         ; state <- getState
         ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
 
-getCmm :: FCode () -> FCode CmmGroup
+getCmm :: FCode a -> FCode (a, CmmGroup)
 -- Get all the CmmTops (there should be no stmts)
 -- Return a single Cmm which may be split from other Cmms by
 -- object splitting (at a later stage)
 getCmm code
   = do  { state1 <- getState
-        ; ((), state2) <- withState code (state1 { cgs_tops  = nilOL })
+        ; (a, state2) <- withState code (state1 { cgs_tops  = nilOL })
         ; setState $ state2 { cgs_tops = cgs_tops state1 }
-        ; return (fromOL (cgs_tops state2)) }
+        ; return (a, fromOL (cgs_tops state2)) }
 
 
 mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
index 473e240a54ac..451d38ec4ce2 100644
--- a/compiler/GHC/StgToCmm/Prof.hs
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -10,6 +10,9 @@ module GHC.StgToCmm.Prof (
         initCostCentres, ccType, ccsType,
         mkCCostCentre, mkCCostCentreStack,
 
+        -- infoTablePRov
+        initInfoTableProv, emitInfoTableProv,
+
         -- Cost-centre Profiling
         dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
         enterCostCentreThunk, enterCostCentreFun,
@@ -41,10 +44,13 @@ import GHC.Cmm.Utils
 import GHC.Cmm.CLabel
 
 import GHC.Types.CostCentre
+import GHC.Types.IPE
 import GHC.Data.FastString
 import GHC.Unit.Module as Module
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
+import GHC.Driver.CodeOutput ( ipInitCode )
+
 
 import Control.Monad
 import Data.Char (ord)
@@ -269,6 +275,53 @@ sizeof_ccs_words platform
   where
    (ws,ms) = pc_SIZEOF_CostCentreStack (platformConstants platform) `divMod` platformWordSizeInBytes platform
 
+
+initInfoTableProv ::  [CmmInfoTable] -> InfoTableProvMap -> Module -> FCode SDoc
+-- Emit the declarations
+initInfoTableProv infos itmap this_mod
+  = do
+       dflags <- getDynFlags
+       let ents = convertInfoProvMap dflags infos this_mod itmap
+       --pprTraceM "UsedInfo" (ppr (length infos))
+       --pprTraceM "initInfoTable" (ppr (length ents))
+       -- Output the actual IPE data
+       mapM_ emitInfoTableProv ents
+       -- Create the C stub which initialises the IPE_LIST
+       return (ipInitCode dflags this_mod ents)
+
+--- Info Table Prov stuff
+emitInfoTableProv :: InfoProvEnt  -> FCode ()
+emitInfoTableProv ip = do
+  { dflags <- getDynFlags
+  ; let mod = infoProvModule ip
+  ; let (src, label) = maybe ("", "") (\(s, l) -> (showPpr dflags s, l)) (infoTableProv ip)
+  ; platform <- getPlatform
+                        -- NB. bytesFS: we want the UTF-8 bytes here (#5559)
+  ; label <- newByteStringCLit (bytesFS $ mkFastString label)
+  ; modl  <- newByteStringCLit (bytesFS $ moduleNameFS
+                                        $ moduleName
+                                        $ mod)
+
+  ; ty_string  <- newByteStringCLit (bytesFS (mkFastString (infoTableType ip)))
+  ; loc <- newByteStringCLit $ bytesFS $ mkFastString $ src
+           -- XXX going via FastString to get UTF-8 encoding is silly
+  ; table_name <- newByteStringCLit $ bytesFS $ mkFastString $
+                    showPpr dflags (pprCLabel platform CStyle (infoTablePtr ip))
+
+  ; closure_type <- newByteStringCLit $ bytesFS $ mkFastString $
+                    showPpr dflags (text $ show $ infoProvEntClosureType ip)
+  ; let
+     lits = [ CmmLabel (infoTablePtr ip), -- Info table pointer
+              table_name,     -- char *table_name
+              closure_type,   -- char *closure_desc -- Filled in from the InfoTable
+              ty_string,      -- char *ty_string
+              label,          -- char *label,
+              modl,           -- char *module,
+              loc,            -- char *srcloc,
+              zero platform   -- struct _InfoProvEnt *link
+            ]
+  ; emitDataLits (mkIPELabel ip) lits
+  }
 -- ---------------------------------------------------------------------------
 -- Set the current cost centre stack
 
diff --git a/compiler/GHC/StgToCmm/Types.hs b/compiler/GHC/StgToCmm/Types.hs
index e59792cb57fc..944ff4b072d3 100644
--- a/compiler/GHC/StgToCmm/Types.hs
+++ b/compiler/GHC/StgToCmm/Types.hs
@@ -20,6 +20,7 @@ import GHC.Types.Name.Env
 import GHC.Types.Name.Set
 import GHC.Utils.Outputable
 
+
 {-
 Note [Conveying CAF-info and LFInfo between modules]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -86,6 +87,8 @@ data CgInfos = CgInfos
       -- either not exported of CAFFY.
   , cgLFInfos :: !ModuleLFInfos
       -- ^ LambdaFormInfos of exported closures in the current module.
+  , cgIPEStub :: !SDoc
+      -- ^ The C stub which is used for IPE information
   }
 
 --------------------------------------------------------------------------------
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index a900de3677c7..bc10eaf4ead9 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -44,6 +44,8 @@ module GHC.StgToCmm.Utils (
         whenUpdRemSetEnabled,
         emitUpdRemSetPush,
         emitUpdRemSetPushThunk,
+
+        convertInfoProvMap, cmmInfoTableToInfoProvEnt
   ) where
 
 #include "HsVersions.h"
@@ -79,6 +81,7 @@ import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Types.RepType
 import GHC.Types.CostCentre
+import GHC.Types.IPE
 
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Char8 as BS8
@@ -86,7 +89,9 @@ import qualified Data.Map as M
 import Data.Char
 import Data.List (sortBy)
 import Data.Ord
-
+import GHC.Types.Unique.Map
+import Data.Maybe
+import GHC.Driver.Ppr
 
 -------------------------------------------------------------------------
 --
@@ -631,3 +636,32 @@ emitUpdRemSetPushThunk ptr =
       [(CmmReg (CmmGlobal BaseReg), AddrHint),
        (ptr, AddrHint)]
       False
+
+-- | A bare bones InfoProvEnt for things which don't have a good source location
+cmmInfoTableToInfoProvEnt :: Module -> CmmInfoTable -> InfoProvEnt
+cmmInfoTableToInfoProvEnt this_mod cmit =
+    let cl = cit_lbl cmit
+        cn  = rtsClosureType (cit_rep cmit)
+    in InfoProvEnt cl cn "" this_mod Nothing
+
+-- | Convert source information collected about identifiers in 'GHC.STG.Debug'
+-- to entries suitable for placing into the info table provenenance table.
+convertInfoProvMap :: DynFlags -> [CmmInfoTable] -> Module -> InfoTableProvMap -> [InfoProvEnt]
+convertInfoProvMap dflags defns this_mod (InfoTableProvMap denv) =
+  map (\cmit ->
+    let cl = cit_lbl cmit
+        cn  = rtsClosureType (cit_rep cmit)
+
+        tyString :: Outputable a => a -> String
+        tyString t = showPpr dflags t
+
+        lookupClosureMap :: Maybe InfoProvEnt
+        lookupClosureMap = case hasHaskellName cl >>= lookupUniqMap denv of
+                                Just (ty, ss, l) -> Just (InfoProvEnt cl cn (tyString ty) this_mod (Just (ss, l)))
+                                Nothing -> Nothing
+
+        -- This catches things like prim closure types and anything else which doesn't have a
+        -- source location
+        simpleFallback = cmmInfoTableToInfoProvEnt this_mod cmit
+
+    in fromMaybe simpleFallback lookupClosureMap) defns
diff --git a/compiler/GHC/Types/IPE.hs b/compiler/GHC/Types/IPE.hs
new file mode 100644
index 000000000000..78c929c4dbe3
--- /dev/null
+++ b/compiler/GHC/Types/IPE.hs
@@ -0,0 +1,24 @@
+module GHC.Types.IPE(ClosureMap, InfoTableProvMap(..)
+                    , emptyInfoTableProvMap) where
+
+import GHC.Prelude
+
+import GHC.Types.Name
+import GHC.Types.SrcLoc
+
+import GHC.Types.Unique.Map
+import GHC.Core.Type
+
+-- | A map from a 'Name' to the best approximate source position that
+-- name arose from.
+type ClosureMap = UniqMap Name  -- The binding
+                          (Type, RealSrcSpan, String)
+                          -- The best approximate source position.
+                          -- (rendered type, source position, source note
+                          -- label)
+
+data InfoTableProvMap = InfoTableProvMap
+                          { provClosure :: ClosureMap }
+
+emptyInfoTableProvMap :: InfoTableProvMap
+emptyInfoTableProvMap = InfoTableProvMap emptyUniqMap
diff --git a/compiler/GHC/Types/SrcLoc.hs b/compiler/GHC/Types/SrcLoc.hs
index 7dd7b297e341..1ec91017caa1 100644
--- a/compiler/GHC/Types/SrcLoc.hs
+++ b/compiler/GHC/Types/SrcLoc.hs
@@ -51,6 +51,7 @@ module GHC.Types.SrcLoc (
         pprUserRealSpan, pprUnhelpfulSpanReason,
         pprUserSpan,
         unhelpfulSpanFS,
+        srcSpanToRealSrcSpan,
 
         -- ** Unsafely deconstructing SrcSpan
         -- These are dubious exports, because they crash on some inputs
@@ -616,6 +617,10 @@ srcSpanFileName_maybe :: SrcSpan -> Maybe FastString
 srcSpanFileName_maybe (RealSrcSpan s _) = Just (srcSpanFile s)
 srcSpanFileName_maybe (UnhelpfulSpan _) = Nothing
 
+srcSpanToRealSrcSpan :: SrcSpan -> Maybe RealSrcSpan
+srcSpanToRealSrcSpan (RealSrcSpan ss _) = Just ss
+srcSpanToRealSrcSpan _ = Nothing
+
 {-
 ************************************************************************
 *                                                                      *
diff --git a/compiler/GHC/Types/Unique/Map.hs b/compiler/GHC/Types/Unique/Map.hs
index 667d5806d0dc..d31043353df3 100644
--- a/compiler/GHC/Types/Unique/Map.hs
+++ b/compiler/GHC/Types/Unique/Map.hs
@@ -10,7 +10,7 @@
 --
 -- Key preservation is right-biased.
 module GHC.Types.Unique.Map (
-    UniqMap,
+    UniqMap(..),
     emptyUniqMap,
     isNullUniqMap,
     unitUniqMap,
diff --git a/compiler/GHC/Unit/Module/Name.hs b/compiler/GHC/Unit/Module/Name.hs
index 76c40f6a8777..cc5e430bd6dc 100644
--- a/compiler/GHC/Unit/Module/Name.hs
+++ b/compiler/GHC/Unit/Module/Name.hs
@@ -30,7 +30,7 @@ import Text.ParserCombinators.ReadP (ReadP)
 import Data.Char (isAlphaNum)
 
 -- | A ModuleName is essentially a simple string, e.g. @Data.List@.
-newtype ModuleName = ModuleName FastString
+newtype ModuleName = ModuleName FastString deriving Show
 
 instance Uniquable ModuleName where
   getUnique (ModuleName nm) = getUnique nm
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index e0d2f48aa683..8e479fa1980b 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -524,6 +524,7 @@ Library
         GHC.Settings.Constants
         GHC.Settings.IO
         GHC.Stg.CSE
+        GHC.Stg.Debug
         GHC.Stg.DepAnal
         GHC.Stg.FVs
         GHC.Stg.Lift
@@ -635,6 +636,7 @@ Library
         GHC.Types.ForeignStubs
         GHC.Types.HpcInfo
         GHC.Types.Id
+        GHC.Types.IPE
         GHC.Types.Id.Info
         GHC.Types.Id.Make
         GHC.Types.Literal
diff --git a/docs/users_guide/debug-info.rst b/docs/users_guide/debug-info.rst
index e18f0287a4cb..ce640691a21f 100644
--- a/docs/users_guide/debug-info.rst
+++ b/docs/users_guide/debug-info.rst
@@ -335,3 +335,61 @@ Further Reading
 For more information about the debug information produced by GHC see
 Peter Wortmann's PhD thesis, `*Profiling Optimized Haskell: Causal
 Analysis and Implementation* <http://etheses.whiterose.ac.uk/8321/>`__.
+
+
+Direct Mapping
+--------------
+
+In addition to the DWARF debug information, which can be used by many
+standard tools, there is also a GHC specific way to map info table pointers
+to a source location. This lookup table is generated by using the ``-finfo-table-map`` flag.
+
+
+.. ghc-flag:: -finfo-table-map
+    :shortdesc: Embed a lookup table in the generated binary which
+                maps the address of an info table to the source position
+                the closure originated from.
+    :type: dynamic
+    :category: debugging
+
+    :since: 9.2
+
+    This flag enables the generation of a table which maps the address of
+    an info table to an approximate source position of where that
+    info table statically originated from. If you
+    also want more precise information about constructor info tables then you
+    should also use :ghc-flag:`-fdistinct-constructor-tables`.
+
+    This flag will increase the binary size by quite a lot, depending on how
+    big your project is. For compiling a project the size of GHC the overhead was
+    about 200 megabytes.
+
+.. ghc-flag:: -fdistinct-constructor-tables
+    :shortdesc: Generate a fresh info table for each usage
+                of a data constructor.
+    :type: dynamic
+    :category: debugging
+
+    :since: 9.2
+
+    For every usage of a data constructor in the source program
+    a new info table will be created. This is useful for debugging
+    as if each usage has a unique info table then the info table map
+    and profiling modes can distinguish the allocation sites of
+    a data constructor.
+
+
+
+Querying the Info Table Map
+---------------------------
+
+If it is generated then the info table map can be used
+in two ways.
+
+1. The ``whereFrom`` function can be used to determine the source
+   position which we think a specific closure was created.
+2. The complete mapping is also dumped into the eventlog.
+
+If you are using gdb then you can use the ``lookupIPE`` function
+directly in order to find any information which is known
+about the info table for a specific closure.
diff --git a/docs/users_guide/eventlog-formats.rst b/docs/users_guide/eventlog-formats.rst
index e0f4430a3f3c..9ccd6bb2cf09 100644
--- a/docs/users_guide/eventlog-formats.rst
+++ b/docs/users_guide/eventlog-formats.rst
@@ -598,6 +598,24 @@ A variable-length packet produced once for each cost centre,
 
      * bit 0: is the cost-centre a CAF?
 
+Info Table Provenance definitions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A message which describes an approximate source position for
+info tables. See :ghc-flag:`-finfo-table-map` for more information.
+
+.. event-type:: IPE
+
+   :tag: 169
+   :length: fixed
+   :field Word64: info table address
+   :field String: table name
+   :field String: closure type
+   :field String: type
+   :field String: source position label
+   :field String: source position module
+   :field String: source position location
+
 
 Sample event types
 ^^^^^^^^^^^^^^^^^^
diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst
index 75569f64091c..097d7d0de35a 100644
--- a/docs/users_guide/profiling.rst
+++ b/docs/users_guide/profiling.rst
@@ -802,8 +802,9 @@ following RTS options select which break-down to use:
 
 .. rts-flag:: -hi
 
-    Break down the graph by the address of the info table of a closure. This
-    profiling mode is intended to be used with :ghc-flag:`-finfo-table-map`.
+    Break down the graph by the address of the info table of a closure. For this
+    to produce useful output the program must have been compiled with
+    :ghc-flag:`-finfo-table-map`.
 
 .. rts-flag:: -l
     :noindex:
diff --git a/includes/Rts.h b/includes/Rts.h
index 50a3f665de7a..0f96ba2eca68 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -242,6 +242,7 @@ void _assertFail(const char *filename, unsigned int linenum)
 #include "rts/PrimFloat.h"
 #include "rts/Main.h"
 #include "rts/Profiling.h"
+#include "rts/IPE.h"
 #include "rts/StaticPtrTable.h"
 #include "rts/Libdw.h"
 #include "rts/LibdwPool.h"
diff --git a/includes/rts/EventLogFormat.h b/includes/rts/EventLogFormat.h
index 4b50adfe5bf7..b80a9d3a94c3 100644
--- a/includes/rts/EventLogFormat.h
+++ b/includes/rts/EventLogFormat.h
@@ -142,6 +142,7 @@
 #define EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN   166
 #define EVENT_PROF_SAMPLE_COST_CENTRE      167
 #define EVENT_PROF_BEGIN                   168
+#define EVENT_IPE                          169
 
 #define EVENT_USER_BINARY_MSG              181
 
diff --git a/includes/rts/IPE.h b/includes/rts/IPE.h
new file mode 100644
index 000000000000..81a6d553d024
--- /dev/null
+++ b/includes/rts/IPE.h
@@ -0,0 +1,35 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2017-2018
+ *
+ * IPE API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+
+typedef struct InfoProv_{
+    char * table_name;
+    char * closure_desc;
+    char * ty_desc;
+    char * label;
+    char * module;
+    char * srcloc;
+} InfoProv;
+
+typedef struct InfoProvEnt_ {
+    StgInfoTable * info;
+    InfoProv prov;
+    struct InfoProvEnt_ *link;
+} InfoProvEnt;
+
+extern InfoProvEnt * RTS_VAR(IPE_LIST);               // registered IP list
+
+void registerInfoProvList(InfoProvEnt **cc_list);
+InfoProvEnt * lookupIPE(StgInfoTable *info);
diff --git a/rts/IPE.c b/rts/IPE.c
new file mode 100644
index 000000000000..d881682e7dab
--- /dev/null
+++ b/rts/IPE.c
@@ -0,0 +1,81 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2000
+ *
+ * Support for mapping info table pointers to source locations
+ *
+ * ---------------------------------------------------------------------------*/
+
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+#include "RtsUtils.h"
+#include "Profiling.h"
+#include "Arena.h"
+#include "IPE.h"
+#include "Printer.h"
+#include "Capability.h"
+
+#include <fs_rts.h>
+#include <string.h>
+
+
+#if defined(TRACING)
+#include "Trace.h"
+#endif
+
+InfoProvEnt *IPE_LIST = NULL;
+
+void dumpIPEToEventLog(void)
+{
+#if defined(TRACING)
+    InfoProvEnt *ip, *next;
+    for (ip = IPE_LIST; ip != NULL; ip = next) {
+        next = ip->link;
+        traceIPE(ip->info, ip->prov.table_name, ip->prov.closure_desc, ip->prov.ty_desc
+                , ip->prov.label, ip->prov.module, ip->prov.srcloc);
+    }
+#endif
+    return;
+}
+
+
+/* -----------------------------------------------------------------------------
+   Registering IPEs
+
+   Registering a IPE consists of linking it onto the list of registered IPEs
+
+   IPEs are registered at startup by a C constructor function
+   generated by the compiler (ProfInit.hs) in the _stub.c file for each module.
+ -------------------------------------------------------------------------- */
+
+static void
+registerInfoProvEnt(InfoProvEnt *ipe)
+{
+        ASSERT(ipe->link == NULL);
+        ipe->link = IPE_LIST;
+        IPE_LIST = ipe;
+}
+
+void registerInfoProvList(InfoProvEnt **ent_list)
+{
+    for (InfoProvEnt **i = ent_list; *i != NULL; i++) {
+        registerInfoProvEnt(*i);
+    }
+}
+
+
+// MP: TODO: This should not be a linear search, need to improve
+// the IPE_LIST structure
+InfoProvEnt * lookupIPE(StgInfoTable *info)
+{
+    InfoProvEnt *ip, *next;
+    for (ip = IPE_LIST; ip != NULL; ip = next) {
+        if (ip->info == info) {
+            return ip;
+        }
+        next = ip->link;
+    }
+    return NULL;
+}
diff --git a/rts/IPE.h b/rts/IPE.h
new file mode 100644
index 000000000000..48b4c62f0020
--- /dev/null
+++ b/rts/IPE.h
@@ -0,0 +1,18 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Support for IPE
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stdio.h>
+#include "Rts.h"
+
+#include "BeginPrivate.h"
+
+void dumpIPEToEventLog(void);
+
+#include "EndPrivate.h"
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index bd8e5d57334c..5cad851b8095 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -32,6 +32,7 @@
 #include "StaticPtrTable.h"
 #include "Hash.h"
 #include "Profiling.h"
+#include "IPE.h"
 #include "ProfHeap.h"
 #include "Timer.h"
 #include "Globals.h"
@@ -369,6 +370,7 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
 #if defined(PROFILING)
     initProfiling();
 #endif
+    dumpIPEToEventLog();
     initHeapProfiling();
 
     /* start the virtual timer 'subsystem'. */
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 989b878b5643..f4c15e113b14 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -536,6 +536,7 @@
 #define RTS_PROF_SYMBOLS                        \
       SymI_HasProto(CCS_DONT_CARE)              \
       SymI_HasProto(CC_LIST)                    \
+      SymI_HasProto(IPE_LIST)                    \
       SymI_HasProto(stg_restore_cccs_info)      \
       SymI_HasProto(enterFunCCS)                \
       SymI_HasProto(pushCostCentre)             \
@@ -1001,6 +1002,8 @@
       SymI_HasProto(cas)                                                \
       SymI_HasProto(_assertFail)                                        \
       SymI_HasProto(keepCAFs)                                           \
+      SymI_HasProto(registerInfoProvList)                               \
+      SymI_HasProto(lookupIPE)                                          \
       RTS_USER_SIGNALS_SYMBOLS                                          \
       RTS_INTCHAR_SYMBOLS
 
diff --git a/rts/Trace.c b/rts/Trace.c
index 2f1e3f9c90a5..765617839e56 100644
--- a/rts/Trace.c
+++ b/rts/Trace.c
@@ -638,6 +638,19 @@ void traceHeapProfSampleString(StgWord8 profile_id,
     }
 }
 
+void traceIPE(StgInfoTable * info,
+              const char *table_name,
+              const char *closure_desc,
+              const char *ty_desc,
+              const char *label,
+              const char *module,
+              const char *srcloc )
+{
+    if (eventlog_enabled) {
+        postIPE((W_) INFO_PTR_TO_STRUCT(info), table_name, closure_desc, ty_desc, label, module, srcloc);
+    }
+}
+
 #if defined(PROFILING)
 void traceHeapProfCostCentre(StgWord32 ccID,
                              const char *label,
diff --git a/rts/Trace.h b/rts/Trace.h
index 08b42fe9bda8..f9d677d063bb 100644
--- a/rts/Trace.h
+++ b/rts/Trace.h
@@ -319,6 +319,14 @@ void traceConcSweepEnd(void);
 void traceConcUpdRemSetFlush(Capability *cap);
 void traceNonmovingHeapCensus(uint32_t log_blk_size,
                               const struct NonmovingAllocCensus *census);
+
+void traceIPE(StgInfoTable *info,
+               const char *table_name,
+               const char *closure_desc,
+               const char *ty_desc,
+               const char *label,
+               const char *module,
+               const char *srcloc );
 void flushTrace(void);
 
 #else /* !TRACING */
@@ -353,6 +361,7 @@ void flushTrace(void);
 #define traceTaskDelete_(taskID) /* nothing */
 #define traceHeapProfBegin(profile_id) /* nothing */
 #define traceHeapProfCostCentre(ccID, label, module, srcloc, is_caf) /* nothing */
+#define traceIPE(info, table_name, closure_desc, ty_desc, label, module, srcloc) /* nothing */
 #define traceHeapProfSampleBegin(era) /* nothing */
 #define traceHeapBioProfSampleBegin(era, time) /* nothing */
 #define traceHeapProfSampleEnd(era) /* nothing */
diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c
index c1eda283f574..0a1ed09f6fda 100644
--- a/rts/eventlog/EventLog.c
+++ b/rts/eventlog/EventLog.c
@@ -165,6 +165,7 @@ char *EventDesc[] = {
   [EVENT_HACK_BUG_T9003]      = "Empty event for bug #9003",
   [EVENT_HEAP_PROF_BEGIN]     = "Start of heap profile",
   [EVENT_HEAP_PROF_COST_CENTRE]   = "Cost center definition",
+  [EVENT_IPE]                     = "Info Table Source Position",
   [EVENT_HEAP_PROF_SAMPLE_BEGIN]  = "Start of heap profile sample",
   [EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN]  = "Start of heap profile (biographical) sample",
   [EVENT_HEAP_PROF_SAMPLE_END]    = "End of heap profile sample",
@@ -497,6 +498,9 @@ init_event_types(void)
         case EVENT_HEAP_PROF_COST_CENTRE:
             eventTypes[t].size = EVENT_SIZE_DYNAMIC;
             break;
+        case EVENT_IPE:
+            eventTypes[t].size = EVENT_SIZE_DYNAMIC;
+            break;
 
         case EVENT_HEAP_PROF_SAMPLE_BEGIN:
             eventTypes[t].size = 8;
@@ -1640,6 +1644,36 @@ void postTickyCounterSamples(StgEntCounter *counters)
     RELEASE_LOCK(&eventBufMutex);
 }
 #endif /* TICKY_TICKY */
+void postIPE(StgWord64 info,
+             const char *table_name,
+             const char *closure_desc,
+             const char *ty_desc,
+             const char *label,
+             const char *module,
+             const char *srcloc)
+{
+    ACQUIRE_LOCK(&eventBufMutex);
+    StgWord table_name_len = strlen(table_name);
+    StgWord closure_desc_len = strlen(closure_desc);
+    StgWord ty_desc_len = strlen(ty_desc);
+    StgWord label_len = strlen(label);
+    StgWord module_len = strlen(module);
+    StgWord srcloc_len = strlen(srcloc);
+    // 8 for the info word
+    // 6 for the number of strings in the payload as postString adds 1 to the length
+    StgWord len = 8+table_name_len+closure_desc_len+ty_desc_len+label_len+module_len+srcloc_len+6;
+    ensureRoomForVariableEvent(&eventBuf, len);
+    postEventHeader(&eventBuf, EVENT_IPE);
+    postPayloadSize(&eventBuf, len);
+    postWord64(&eventBuf, info);
+    postString(&eventBuf, table_name);
+    postString(&eventBuf, closure_desc);
+    postString(&eventBuf, ty_desc);
+    postString(&eventBuf, label);
+    postString(&eventBuf, module);
+    postString(&eventBuf, srcloc);
+    RELEASE_LOCK(&eventBufMutex);
+}
 
 void printAndClearEventBuf (EventsBuf *ebuf)
 {
diff --git a/rts/eventlog/EventLog.h b/rts/eventlog/EventLog.h
index a412b491bbf5..b0675db14df2 100644
--- a/rts/eventlog/EventLog.h
+++ b/rts/eventlog/EventLog.h
@@ -171,6 +171,14 @@ void postProfSampleCostCentre(Capability *cap,
 void postProfBegin(void);
 #endif /* PROFILING */
 
+void postIPE(StgWord64 info,
+             const char *table_name,
+             const char *closure_desc,
+             const char *ty_desc,
+             const char *label,
+             const char *module,
+             const char *srcloc);
+
 void postConcUpdRemSetFlush(Capability *cap);
 void postConcMarkEnd(StgWord32 marked_obj_count);
 void postNonmovingHeapCensus(int log_blk_size,
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 108b4d6b9bcd..427c78c40f52 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -166,6 +166,7 @@ library
                       rts/Parallel.h
                       rts/PrimFloat.h
                       rts/Profiling.h
+                      rts/IPE.h
                       rts/Signals.h
                       rts/SpinLock.h
                       rts/StableName.h
@@ -448,6 +449,7 @@ library
                ProfilerReport.c
                ProfilerReportJson.c
                Profiling.c
+               IPE.c
                Proftimer.c
                RaiseAsync.c
                RetainerProfile.c
diff --git a/testsuite/tests/parser/should_run/CountAstDeps.stdout b/testsuite/tests/parser/should_run/CountAstDeps.stdout
index 84819595a652..a825c2bac7e6 100644
--- a/testsuite/tests/parser/should_run/CountAstDeps.stdout
+++ b/testsuite/tests/parser/should_run/CountAstDeps.stdout
@@ -1,4 +1,4 @@
-Found 239 Language.Haskell.Syntax module dependencies
+Found 241 Language.Haskell.Syntax module dependencies
 GHC.Builtin.Names
 GHC.Builtin.PrimOps
 GHC.Builtin.Types
@@ -156,6 +156,7 @@ GHC.Types.Fixity.Env
 GHC.Types.ForeignCall
 GHC.Types.ForeignStubs
 GHC.Types.HpcInfo
+GHC.Types.IPE
 GHC.Types.Id
 GHC.Types.Id.Info
 GHC.Types.Id.Make
@@ -181,6 +182,7 @@ GHC.Types.Unique
 GHC.Types.Unique.DFM
 GHC.Types.Unique.DSet
 GHC.Types.Unique.FM
+GHC.Types.Unique.Map
 GHC.Types.Unique.Set
 GHC.Types.Unique.Supply
 GHC.Types.Var
diff --git a/testsuite/tests/parser/should_run/CountParserDeps.stdout b/testsuite/tests/parser/should_run/CountParserDeps.stdout
index a7fe9c604ed2..e27ba9384630 100644
--- a/testsuite/tests/parser/should_run/CountParserDeps.stdout
+++ b/testsuite/tests/parser/should_run/CountParserDeps.stdout
@@ -1,4 +1,4 @@
-Found 247 GHC.Parser module dependencies
+Found 249 GHC.Parser module dependencies
 GHC.Builtin.Names
 GHC.Builtin.PrimOps
 GHC.Builtin.Types
@@ -164,6 +164,7 @@ GHC.Types.Fixity.Env
 GHC.Types.ForeignCall
 GHC.Types.ForeignStubs
 GHC.Types.HpcInfo
+GHC.Types.IPE
 GHC.Types.Id
 GHC.Types.Id.Info
 GHC.Types.Id.Make
@@ -189,6 +190,7 @@ GHC.Types.Unique
 GHC.Types.Unique.DFM
 GHC.Types.Unique.DSet
 GHC.Types.Unique.FM
+GHC.Types.Unique.Map
 GHC.Types.Unique.Set
 GHC.Types.Unique.Supply
 GHC.Types.Var
diff --git a/testsuite/tests/regalloc/regalloc_unit_tests.hs b/testsuite/tests/regalloc/regalloc_unit_tests.hs
index afc6fa0fca88..cc4dcf7f9bc9 100644
--- a/testsuite/tests/regalloc/regalloc_unit_tests.hs
+++ b/testsuite/tests/regalloc/regalloc_unit_tests.hs
@@ -46,6 +46,7 @@ import GHC.Driver.Errors
 import GHC.Utils.Error
 import GHC.Utils.Outputable
 import GHC.Types.Basic
+import GHC.Unit.Home
 
 import GHC.Data.Stream as Stream (collect, yield)
 
@@ -114,7 +115,8 @@ compileCmmForRegAllocStats logger dflags' cmmFile ncgImplF us = do
     hscEnv <- newHscEnv dflags
 
     -- parse the cmm file and output any warnings or errors
-    (warnings, errors, parsedCmm) <- parseCmmFile dflags (hsc_home_unit hscEnv) cmmFile
+    let fake_mod = mkHomeModule (hsc_home_unit hscEnv) (mkModuleName "fake")
+    (warnings, errors, parsedCmm) <- parseCmmFile dflags fake_mod (hsc_home_unit hscEnv) cmmFile
     let warningMsgs = fmap pprWarning warnings
         errorMsgs   = fmap pprError errors
 
@@ -122,7 +124,7 @@ compileCmmForRegAllocStats logger dflags' cmmFile ncgImplF us = do
     mapM_ (printBagOfErrors logger dflags) [warningMsgs, errorMsgs]
 
     let initTopSRT = emptySRT thisMod
-    cmmGroup <- fmap snd $ cmmPipeline hscEnv initTopSRT $ fromJust parsedCmm
+    cmmGroup <- fmap snd $ cmmPipeline hscEnv initTopSRT $ fst $ fromJust parsedCmm
 
     rawCmms <- cmmToRawCmm logger dflags (Stream.yield cmmGroup)
 
-- 
GitLab