From b0293f78cb6acf2540389e22bdda420d0ab874da Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Fri, 8 Dec 2023 14:11:22 +0000
Subject: [PATCH] rts: eras profiling mode

The eras profiling mode is useful for tracking the life-time of
closures. When a closure is written, the current era is recorded in the
profiling header. This records the era in which the closure was created.

* Enable with -he
* User mode: Use functions ghc-experimental module GHC.Profiling.Eras to modify the era
* Automatically: --automatic-era-increment, increases the user era on major
  collections
* The first era is era 1
* -he<era> can be used with other profiling modes to select a specific
  era

If you just want to record the era but not to perform heap profiling you
can use `-he --no-automatic-heap-samples`.

https://well-typed.com/blog/2024/01/ghc-eras-profiling/

Fixes #24332
---
 compiler/GHC/Cmm/Parser.y                     |  2 +-
 compiler/GHC/StgToCmm/Prof.hs                 | 87 +++++++++++++------
 docs/users_guide/profiling.rst                | 23 +++++
 .../ghc-experimental/ghc-experimental.cabal   |  3 +-
 libraries/ghc-experimental/src/Dummy.hs       | 10 ---
 .../src/GHC/Profiling/Eras.hs                 | 20 +++++
 libraries/ghc-internal/src/GHC/Profiling.hs   |  1 +
 rts/CloneStack.c                              |  1 +
 rts/Continuation.c                            |  1 +
 rts/Heap.c                                    |  1 +
 rts/Interpreter.c                             |  1 +
 rts/LdvProfile.h                              |  4 +-
 rts/Messages.c                                |  1 +
 rts/PrimOps.cmm                               | 16 ++--
 rts/ProfHeap.c                                | 58 +++++++++----
 rts/ProfHeap.h                                |  5 --
 rts/Profiling.c                               |  1 +
 rts/RaiseAsync.c                              |  1 +
 rts/RtsAPI.c                                  |  1 +
 rts/RtsFlags.c                                | 45 ++++++++++
 rts/RtsFlags.h                                |  5 ++
 rts/RtsSymbols.c                              |  6 +-
 rts/STM.c                                     |  1 +
 rts/Schedule.c                                |  1 +
 rts/ThreadLabels.c                            |  1 +
 rts/Threads.c                                 |  1 +
 rts/Updates.h                                 |  8 +-
 rts/Weak.c                                    |  1 +
 rts/eventlog/EventLog.c                       |  2 +
 rts/include/rts/EventLogFormat.h              |  3 +-
 rts/include/rts/Flags.h                       |  3 +
 rts/include/rts/prof/CCS.h                    |  1 +
 rts/include/rts/prof/Heap.h                   |  3 +
 rts/include/rts/prof/LDV.h                    |  9 +-
 rts/include/rts/storage/ClosureMacros.h       | 27 +++++-
 rts/include/rts/storage/Closures.h            |  1 +
 rts/include/stg/MiscClosures.h                |  1 +
 rts/sm/CNF.c                                  |  1 +
 rts/sm/Evac.c                                 | 11 ++-
 rts/sm/Storage.c                              |  1 +
 .../ghc-experimental-exports.stdout           |  6 ++
 41 files changed, 297 insertions(+), 78 deletions(-)
 delete mode 100644 libraries/ghc-experimental/src/Dummy.hs
 create mode 100644 libraries/ghc-experimental/src/GHC/Profiling/Eras.hs

diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index 97f07d3695a8..07847af27427 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -1273,7 +1273,7 @@ stmtMacros = listToUFM [
   ( fsLit "POP_ARG_REGS",          \[live_regs] -> emitPopArgRegs live_regs ),
 
   ( fsLit "LDV_ENTER",             \[e] -> ldvEnter e ),
-  ( fsLit "LDV_RECORD_CREATE",     \[e] -> ldvRecordCreate e ),
+  ( fsLit "PROF_HEADER_CREATE",     \[e] -> profHeaderCreate e ),
 
   ( fsLit "PUSH_UPD_FRAME",        \[sp,e] -> emitPushUpdateFrame sp e ),
   ( fsLit "SET_HDR",               \[ptr,info,ccs] ->
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
index bc01198b7c8e..cea5991300ee 100644
--- a/compiler/GHC/StgToCmm/Prof.hs
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -23,7 +23,7 @@ module GHC.StgToCmm.Prof (
         saveCurrentCostCentre, restoreCurrentCostCentre,
 
         -- Lag/drag/void stuff
-        ldvEnter, ldvEnterClosure, ldvRecordCreate
+        ldvEnter, ldvEnterClosure, profHeaderCreate
   ) where
 
 import GHC.Prelude
@@ -88,14 +88,14 @@ costCentreFrom platform cl = CmmLoad (cmmOffsetB platform cl (pc_OFFSET_StgHeade
 -- | The profiling header words in a static closure
 staticProfHdr :: Profile -> CostCentreStack -> [CmmLit]
 staticProfHdr profile ccs
-  | profileIsProfiling profile = [mkCCostCentreStack ccs, staticLdvInit platform]
+  | profileIsProfiling profile = [mkCCostCentreStack ccs, staticProfHeaderInit platform]
   | otherwise                  = []
   where platform = profilePlatform profile
 
 -- | Profiling header words in a dynamic closure
 dynProfHdr :: Profile -> CmmExpr -> [CmmExpr]
 dynProfHdr profile ccs
-  | profileIsProfiling profile = [ccs, dynLdvInit (profilePlatform profile)]
+  | profileIsProfiling profile = [ccs, dynProfInit (profilePlatform profile)]
   | otherwise                  = []
 
 -- | Initialise the profiling field of an update frame
@@ -322,33 +322,62 @@ bumpSccCount platform ccs
 
 -----------------------------------------------------------------------------
 --
---                Lag/drag/void stuff
+--                Profiling header stuff
 --
 -----------------------------------------------------------------------------
 
---
--- Initial value for the LDV field in a static closure
---
-staticLdvInit :: Platform -> CmmLit
-staticLdvInit = zeroCLit
 
---
--- Initial value of the LDV field in a dynamic closure
---
+-- Header initialisation for static objects happens to coicincide for the
+-- three uses of the header
+--  * LDV profiling = 0 (era = 0, LDV_STATE_CREATE)
+--  * Eras profiling = 0 (user_era = 0, ignored by profiler)
+--  * Retainer profiling = 0
+
+staticProfHeaderInit :: Platform -> CmmLit
+staticProfHeaderInit plat = zeroCLit plat
+
+
+-- Dynamic initialisation
+
+dynErasInit :: Platform -> CmmExpr
+dynErasInit platform = loadUserEra platform
+
 dynLdvInit :: Platform -> CmmExpr
-dynLdvInit platform =     -- (era << LDV_SHIFT) | LDV_STATE_CREATE
+dynLdvInit platform =
+-- (era << LDV_SHIFT) | LDV_STATE_CREATE
   CmmMachOp (mo_wordOr platform) [
       CmmMachOp (mo_wordShl platform) [loadEra platform, mkIntExpr platform (pc_LDV_SHIFT (platformConstants platform))],
       CmmLit (mkWordCLit platform (pc_ILDV_STATE_CREATE (platformConstants platform)))
   ]
 
---
--- Initialise the LDV word of a new closure
---
-ldvRecordCreate :: CmmExpr -> FCode ()
-ldvRecordCreate closure = do
+
+-- | If LDV profiling the user_era = 0
+-- , if eras profiling then (ldv)era = 0, so we can initialise correctly by OR the two expressions.
+dynProfInit :: Platform -> CmmExpr
+dynProfInit platform = CmmMachOp (mo_wordOr platform) [(dynLdvInit platform), dynErasInit platform]
+
+
+-- |  Initialise the profiling word of a new dynamic closure
+-- * When LDV profiling is enabled (era > 0) - Initialise to the LDV word
+-- * When eras profiling is enabled (user_era > 0) - Initialise to current user_era
+profHeaderCreate :: CmmExpr -> FCode ()
+profHeaderCreate closure = do
   platform <- getPlatform
-  emit $ mkStore (ldvWord platform closure) (dynLdvInit platform)
+  let prof_header_wd = profHeaderWord platform closure
+
+  let check_ldv = mkCmmIfThenElse (CmmMachOp (mo_wordUGt platform) [loadEra platform, CmmLit (zeroCLit platform)])
+  let check_eras = mkCmmIfThenElse (CmmMachOp (mo_wordUGt platform) [loadUserEra platform, CmmLit (zeroCLit platform)])
+  -- Case 2: user_era > 0, eras profiling is enabled
+  check_1 <- check_eras (mkStore prof_header_wd (dynErasInit platform)) mkNop
+  -- Case 1: era > 0, LDV profiling is enabled
+  check_2 <- check_ldv (mkStore prof_header_wd (dynLdvInit platform)) check_1
+  emit check_2
+
+
+
+
+
+
 
 --
 -- | Called when a closure is entered, marks the closure as having
@@ -368,27 +397,35 @@ ldvEnter cl_ptr = do
     platform <- getPlatform
     let constants = platformConstants platform
         -- don't forget to subtract node's tag
-        ldv_wd = ldvWord platform cl_ptr
+        ldv_wd = profHeaderWord platform cl_ptr
         new_ldv_wd = cmmOrWord platform
                         (cmmAndWord platform (cmmLoadBWord platform ldv_wd)
                                              (CmmLit (mkWordCLit platform (pc_ILDV_CREATE_MASK constants))))
                         (cmmOrWord platform (loadEra platform) (CmmLit (mkWordCLit platform (pc_ILDV_STATE_USE constants))))
-    ifProfiling $
-         -- if (era > 0) {
+    ifProfiling $ do
+       -- if (era > 0) {
          --    LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
          --                era | LDV_STATE_USE }
         emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt platform) [loadEra platform, CmmLit (zeroCLit platform)])
                      (mkStore ldv_wd new_ldv_wd)
                      mkNop
 
+
+
 loadEra :: Platform -> CmmExpr
 loadEra platform = CmmMachOp (MO_UU_Conv (cIntWidth platform) (wordWidth platform))
     [CmmLoad (mkLblExpr (mkRtsCmmDataLabel (fsLit "era")))
              (cInt platform)
              NaturallyAligned]
 
+loadUserEra :: Platform -> CmmExpr
+loadUserEra platform = CmmLoad (mkLblExpr (mkRtsCmmDataLabel (fsLit "user_era")))
+             (bWord platform)
+             NaturallyAligned
+
 -- | Takes the address of a closure, and returns
--- the address of the LDV word in the closure
-ldvWord :: Platform -> CmmExpr -> CmmExpr
-ldvWord platform closure_ptr
+-- the address of the prof header word in the closure (this is used to store LDV info,
+-- retainer profiling info and eras profiling info).
+profHeaderWord :: Platform -> CmmExpr -> CmmExpr
+profHeaderWord platform closure_ptr
     = cmmOffsetB platform closure_ptr (pc_OFFSET_StgHeader_ldvw (platformConstants platform))
diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst
index 85c59fb2f126..03812db8b405 100644
--- a/docs/users_guide/profiling.rst
+++ b/docs/users_guide/profiling.rst
@@ -898,6 +898,17 @@ following RTS options select which break-down to use:
     which have function type or unknown/polymorphic type, the string will
     represent an approximation to the actual type.
 
+.. rts-flag:: -he
+
+    :since: 9.10.1
+
+    *Requires* :ghc-flag:`-prof`. Break down the graph by era.
+
+    Each closure is tagged with the era in which it is created. Eras start at 1
+    and can be set in your program to domain specific values using functions from
+    ``GHC.Profiling.Eras`` or incremented automatically by the
+    :rts-flag:`--automatic-era-increment`.
+
 .. rts-flag:: -hr
 
     *Requires* :ghc-flag:`-prof`. Break down the graph by retainer set. Retainer
@@ -965,6 +976,11 @@ follows:
 
     Restrict the profile to closures with the specified types.
 
+.. rts-flag:: -he ⟨era⟩
+    :noindex:
+
+    Restrict the profile to the specified era.
+
 .. rts-flag:: -hr ⟨cc⟩
     :noindex:
 
@@ -1017,6 +1033,13 @@ There are three more options which relate to heap profiling:
     option is enabled, it's expected that the user will manually start time
     profiling or request specific samples using functions from ``GHC.Profiling``.
 
+.. rts-flag:: --automatic-era-increment
+
+    :since: 9.10.1
+
+    Increment the era by 1 on each major garbage collection. This is used
+    in conjunction with :rts-flag:`-he`.
+
 .. rts-flag:: --null-eventlog-writer
 
     :since: 9.2.2
diff --git a/libraries/ghc-experimental/ghc-experimental.cabal b/libraries/ghc-experimental/ghc-experimental.cabal
index 7c7cf61d5036..2ba118a71c8f 100644
--- a/libraries/ghc-experimental/ghc-experimental.cabal
+++ b/libraries/ghc-experimental/ghc-experimental.cabal
@@ -22,8 +22,7 @@ common warnings
 
 library
     import:           warnings
-    exposed-modules:
-    other-modules:    Dummy
+    exposed-modules:  GHC.Profiling.Eras
     other-extensions:
     build-depends:    base ^>=4.19,
                       ghc-internal >= 0.1 && < 0.2
diff --git a/libraries/ghc-experimental/src/Dummy.hs b/libraries/ghc-experimental/src/Dummy.hs
deleted file mode 100644
index 5ae990e20d9f..000000000000
--- a/libraries/ghc-experimental/src/Dummy.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-
--- | This module merely serves as a placeholder since
--- Haskell packages must contain at least one module.
--- This can be dropped once a real module has been introduced to
--- @ghc-experimental@.
-module Dummy () where
-
--- for build system ordering
-import GHC.Base ()
diff --git a/libraries/ghc-experimental/src/GHC/Profiling/Eras.hs b/libraries/ghc-experimental/src/GHC/Profiling/Eras.hs
new file mode 100644
index 000000000000..508ae2f1994b
--- /dev/null
+++ b/libraries/ghc-experimental/src/GHC/Profiling/Eras.hs
@@ -0,0 +1,20 @@
+{-# LANGUAGE Trustworthy #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | TODO move this module into ghc-internals
+module GHC.Profiling.Eras ( setUserEra
+                     , getUserEra
+                     , incrementUserEra
+                     ) where
+
+import GHC.Base
+
+-- | Set the heap profiling era, setting the era to 0 will stop closures being
+-- counted.
+foreign import ccall setUserEra :: Word -> IO ()
+
+-- | Query the profiling era
+foreign import ccall getUserEra :: IO Word
+
+-- | Increment the era by a given amount, and return the new era.
+foreign import ccall incrementUserEra :: Word -> IO Word
diff --git a/libraries/ghc-internal/src/GHC/Profiling.hs b/libraries/ghc-internal/src/GHC/Profiling.hs
index 9b8ff06fd21c..b7bfa911f94a 100644
--- a/libraries/ghc-internal/src/GHC/Profiling.hs
+++ b/libraries/ghc-internal/src/GHC/Profiling.hs
@@ -50,3 +50,4 @@ foreign import ccall startHeapProfTimer :: IO ()
 --
 -- @since 4.16.0.0
 foreign import ccall stopHeapProfTimer :: IO ()
+
diff --git a/rts/CloneStack.c b/rts/CloneStack.c
index 3ec96aa19d06..fa0c97f8454a 100644
--- a/rts/CloneStack.c
+++ b/rts/CloneStack.c
@@ -7,6 +7,7 @@
  *---------------------------------------------------------------------------*/
 
 #include "Rts.h"
+#include "RtsFlags.h"
 #include "rts/Messages.h"
 #include "Messages.h"
 #include "rts/Types.h"
diff --git a/rts/Continuation.c b/rts/Continuation.c
index db687cb42148..455f327c2e66 100644
--- a/rts/Continuation.c
+++ b/rts/Continuation.c
@@ -8,6 +8,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "sm/Storage.h"
 #include "sm/Sanity.h"
diff --git a/rts/Heap.c b/rts/Heap.c
index 1ecf1b6e1456..85ff1e360bdb 100644
--- a/rts/Heap.c
+++ b/rts/Heap.c
@@ -9,6 +9,7 @@
 #include "Rts.h"
 #include "RtsAPI.h"
 #include "RtsUtils.h"
+#include "RtsFlags.h"
 
 #include "Capability.h"
 #include "Printer.h"
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index f032e577b9d3..fe1cdd24d247 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -8,6 +8,7 @@
 #include "rts/PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "RtsFlags.h"
 #include "rts/Bytecodes.h"
 
 // internal headers
diff --git a/rts/LdvProfile.h b/rts/LdvProfile.h
index e746d6cfaa9f..43d8370bc0a3 100644
--- a/rts/LdvProfile.h
+++ b/rts/LdvProfile.h
@@ -33,6 +33,8 @@ RTS_PRIVATE void LdvCensusKillAll ( void );
 // Evacuated objects are no longer needed, so we just store its original size in
 // the LDV field.
 #define SET_EVACUAEE_FOR_LDV(c, size)   \
-    LDVW((c)) = (size)
+    if (doingLDVProfiling()){ \
+      LDVW((c)) = (size); \
+    }
 
 #endif /* PROFILING */
diff --git a/rts/Messages.c b/rts/Messages.c
index 730d81df9ce9..4a3c8767a9ee 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -7,6 +7,7 @@
  * --------------------------------------------------------------------------*/
 
 #include "Rts.h"
+#include "RtsFlags.h"
 #include "Messages.h"
 #include "Trace.h"
 #include "Capability.h"
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 5adcce01a002..2284b640d265 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -227,7 +227,7 @@ stg_isMutableByteArrayPinnedzh ( gcptr mba )
  * means we don't track their time of use and eventual destruction. We just
  * assume they get used.
  *
- * Thus it is not necessary to call LDV_RECORD_CREATE when resizing them as we
+ * Thus it is not necessary to call PROF_HEADER_CREATE when resizing them as we
  * used to as the LDV profiler will essentially ignore arrays anyways.
  */
 
@@ -240,7 +240,7 @@ stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size )
    OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
                                      ROUNDUP_BYTES_TO_WDS(new_size)));
    StgArrBytes_bytes(mba) = new_size;
-   // No need to call LDV_RECORD_CREATE. See Note [LDV profiling and resizing arrays]
+   // No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays]
 
    return ();
 }
@@ -264,7 +264,7 @@ stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size )
       OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) +
                                         new_size_wds));
       StgArrBytes_bytes(mba) = new_size;
-      // No need to call LDV_RECORD_CREATE. See Note [LDV profiling and resizing arrays]
+      // No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays]
 
       return (mba);
    } else {
@@ -308,7 +308,7 @@ again:
    OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) +
                                      new_size));
    StgSmallMutArrPtrs_ptrs(mba) = new_size;
-   // No need to call LDV_RECORD_CREATE. See Note [LDV profiling and resizing arrays]
+   // No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays]
 
    return ();
 }
@@ -758,14 +758,14 @@ stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f )
     CCCS_ALLOC(THUNK_2_SIZE);
     z = Hp - THUNK_2_SIZE + WDS(1);
     SET_HDR(z, stg_ap_2_upd_info, CCCS);
-    LDV_RECORD_CREATE(z);
+    PROF_HEADER_CREATE(z);
     StgThunk_payload(z,0) = f;
 
     TICK_ALLOC_THUNK_1();
     CCCS_ALLOC(THUNK_1_SIZE);
     y = z - THUNK_1_SIZE;
     SET_HDR(y, stg_sel_0_upd_info, CCCS);
-    LDV_RECORD_CREATE(y);
+    PROF_HEADER_CREATE(y);
     StgThunk_payload(y,0) = z;
 
   retry:
@@ -814,7 +814,7 @@ stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f )
     CCCS_ALLOC(THUNK_SIZE);
     z = Hp - THUNK_SIZE + WDS(1);
     SET_HDR(z, stg_ap_2_upd_info, CCCS);
-    LDV_RECORD_CREATE(z);
+    PROF_HEADER_CREATE(z);
     StgThunk_payload(z,0) = f;
 
   retry:
@@ -949,7 +949,7 @@ stg_finalizzeWeakzh ( gcptr w )
     //
     unlockClosure(w, stg_DEAD_WEAK_info);
 
-    LDV_RECORD_CREATE(w);
+    PROF_HEADER_CREATE(w);
 
     if (list != stg_NO_FINALIZER_closure) {
       ccall runCFinalizers(list);
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 288c5c8a5418..dad27da10223 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -142,6 +142,23 @@ restore_locale( void )
 unsigned int era;
 static uint32_t max_era;
 
+StgWord user_era;
+
+void
+setUserEra (StgWord w){
+  user_era = w;
+}
+
+StgWord
+getUserEra (void){
+  return user_era;
+}
+
+StgWord
+incrementUserEra (StgWord w){
+  return atomic_inc(&user_era, w);
+}
+
 inline void
 initLDVCtr( counter *ctr )
 {
@@ -180,6 +197,11 @@ closureIdentity( const StgClosure *p )
         return p->header.prof.ccs->cc->module;
     case HEAP_BY_DESCR:
         return GET_PROF_DESC(get_itbl(p));
+    case HEAP_BY_ERA:
+        // Static objects should have user_era = 0
+        // MP: If user_era == 0 then closureIdentity returns the NULL pointer, and
+        // the closure is not counted to the census
+        return (void *)p->header.prof.hp.era;
     case HEAP_BY_TYPE:
         return GET_PROF_TYPE(get_itbl(p));
     case HEAP_BY_RETAINER:
@@ -220,21 +242,6 @@ closureIdentity( const StgClosure *p )
 /* --------------------------------------------------------------------------
  * Profiling type predicates
  * ----------------------------------------------------------------------- */
-#if defined(PROFILING)
-STATIC_INLINE bool
-doingLDVProfiling( void )
-{
-    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
-            || RtsFlags.ProfFlags.bioSelector != NULL);
-}
-
-bool
-doingRetainerProfiling( void )
-{
-    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
-            || RtsFlags.ProfFlags.retainerSelector != NULL);
-}
-#endif /* PROFILING */
 
 // Processes a closure 'c' being destroyed whose size is 'size'.
 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
@@ -356,6 +363,10 @@ freeEra(Census *census)
 static void
 nextEra( void )
 {
+    if (user_era > 0 && RtsFlags.ProfFlags.incrementUserEra){
+      user_era++;
+    }
+
 #if defined(PROFILING)
     if (doingLDVProfiling()) {
         era++;
@@ -483,6 +494,14 @@ initHeapProfiling(void)
 #endif
 #endif
 
+#if defined(PROFILING)
+    if (doingErasProfiling()){
+      user_era = 1;
+    }
+#else
+    user_era = 0;
+#endif
+
     // we only count eras if we're doing LDV profiling.  Otherwise era
     // is fixed at zero.
 #if defined(PROFILING)
@@ -703,6 +722,9 @@ closureSatisfiesConstraints( const StgClosure* p )
                                 RtsFlags.ProfFlags.typeSelector );
        if (!b) return false;
    }
+   if (RtsFlags.ProfFlags.eraSelector) {
+      return (p->header.prof.hp.era == RtsFlags.ProfFlags.eraSelector);
+   }
    if (RtsFlags.ProfFlags.retainerSelector) {
        RetainerSet *rs;
        uint32_t i;
@@ -939,6 +961,12 @@ dumpCensus( Census *census )
             traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
                                           count * sizeof(W_));
             break;
+        case HEAP_BY_ERA:
+            fprintf(hp_file, "%lu", (StgWord)ctr->identity);
+            char str_era[100];
+            sprintf(str_era, "%lu", (StgWord)ctr->identity);
+            traceHeapProfSampleString(0, str_era, count * sizeof(W_));
+            break;
         case HEAP_BY_MOD:
         case HEAP_BY_DESCR:
         case HEAP_BY_TYPE:
diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h
index f63433feceb6..2b473ac894be 100644
--- a/rts/ProfHeap.h
+++ b/rts/ProfHeap.h
@@ -16,9 +16,4 @@ void        endHeapProfiling   (void);
 void        freeHeapProfiling  (void);
 bool        strMatchesSelector (const char* str, const char* sel);
 
-#if defined(PROFILING)
-// doingRetainerProfiling: `-hr` or `-hr<cc> -h<x>`
-bool doingRetainerProfiling(void);
-#endif
-
 #include "EndPrivate.h"
diff --git a/rts/Profiling.c b/rts/Profiling.c
index 3e5b49d96fdc..a9a53679638d 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -10,6 +10,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "RtsUtils.h"
 #include "Profiling.h"
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 6601a929c657..31208c2f6fb8 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -8,6 +8,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "sm/Storage.h"
 #include "Threads.h"
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index 5221059b3145..e9c073db74c9 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -9,6 +9,7 @@
 #include "rts/PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "RtsFlags.h"
 #include "HsFFI.h"
 
 #include "RtsUtils.h"
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index bf25bab9c7e9..d658a523efbb 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -217,6 +217,7 @@ void initRtsFlagsDefaults(void)
     RtsFlags.ProfFlags.heapProfileInterval = USToTime(100000); // 100ms
     RtsFlags.ProfFlags.startHeapProfileAtStartup = true;
     RtsFlags.ProfFlags.startTimeProfileAtStartup = true;
+    RtsFlags.ProfFlags.incrementUserEra = false;
 
 #if defined(PROFILING)
     RtsFlags.ProfFlags.showCCSOnException = false;
@@ -229,6 +230,7 @@ void initRtsFlagsDefaults(void)
     RtsFlags.ProfFlags.ccsSelector        = NULL;
     RtsFlags.ProfFlags.retainerSelector   = NULL;
     RtsFlags.ProfFlags.bioSelector        = NULL;
+    RtsFlags.ProfFlags.eraSelector        = 0;
 #endif
 
 #if defined(TRACING)
@@ -384,6 +386,7 @@ usage_text[] = {
 "                 d = closure description",
 "                 y = type description",
 "                 i = info table",
+"                 e = era",
 "                 r = retainer",
 "                 b = biography (LAG,DRAG,VOID,USE)",
 "  A subset of closures may be selected thusly:",
@@ -394,6 +397,7 @@ usage_text[] = {
 "    -hy<typ>...  closures with specified type descriptions",
 "    -hr<cc>...   closures with specified retainers",
 "    -hb<bio>...  closures with specified biographies (lag,drag,void,use)",
+"    -he<era>...  closures with specified era",
 "",
 "  -R<size>       Set the maximum retainer set size (default: 8)",
 "",
@@ -402,6 +406,8 @@ usage_text[] = {
 "",
 "  -xt            Include threads (TSOs) in a heap profile",
 "",
+"  --automatic-era-increment Increment the era on each major garbage collection",
+"",
 "  -xc      Show current cost centre stack on raising an exception",
 #else /* PROFILING */
 "  -h       Heap residency profile (output file <program>.hp)",
@@ -1161,6 +1167,13 @@ error = true;
                       RtsFlags.ProfFlags.startTimeProfileAtStartup = false;
                       break;
                   }
+
+                  else if (strequal("automatic-era-increment",
+                               &rts_argv[arg][2])) {
+                      OPTION_SAFE;
+                      RtsFlags.ProfFlags.incrementUserEra = true;
+                      break;
+                  }
                   else {
                       OPTION_SAFE;
                       errorBelch("unknown RTS option: %s",rts_argv[arg]);
@@ -2270,6 +2283,7 @@ static bool read_heap_profiling_flag(const char *arg)
     case 'r':
     case 'B':
     case 'b':
+    case 'e':
     case 'T':
         if (arg[2] != '\0' && arg[3] != '\0') {
             {
@@ -2315,6 +2329,10 @@ static bool read_heap_profiling_flag(const char *arg)
                 case 'b': // biography select
                     RtsFlags.ProfFlags.bioSelector = selector;
                     break;
+                case 'E':
+                case 'e': // era select
+                    RtsFlags.ProfFlags.eraSelector = strtoul(selector, (char **) NULL, 10);
+                    break;
                 default:
                     stgFree(selector);
                 }
@@ -2360,6 +2378,9 @@ static bool read_heap_profiling_flag(const char *arg)
         case 'T':
             RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CLOSURE_TYPE;
             break;
+        case 'e':
+            RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_ERA;
+            break;
         }
         break;
 
@@ -2698,3 +2719,27 @@ bool is_io_mng_native_p (void)
   return false;
 #endif
 }
+
+
+#if defined(PROFILING)
+bool
+doingLDVProfiling( void )
+{
+    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
+            || RtsFlags.ProfFlags.bioSelector != NULL);
+}
+
+bool
+doingRetainerProfiling( void )
+{
+    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
+            || RtsFlags.ProfFlags.retainerSelector != NULL);
+}
+bool
+doingErasProfiling( void )
+{
+    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_ERA
+            || RtsFlags.ProfFlags.eraSelector != 0);
+}
+#endif /* PROFILING */
+
diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h
index 6c81081d4da2..05a00af4e773 100644
--- a/rts/RtsFlags.h
+++ b/rts/RtsFlags.h
@@ -24,6 +24,11 @@ void initRtsFlagsDefaults (void);
 void setupRtsFlags        (int *argc, char *argv[], RtsConfig rtsConfig);
 void freeRtsArgs          (void);
 bool is_io_mng_native_p   (void);
+#if defined(PROFILING)
+bool doingLDVProfiling (void);
+bool doingRetainerProfiling(void);
+bool doingErasProfiling(void);
+#endif
 
 extern RtsConfig rtsConfig;
 
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index e49f19c500ef..baf93cc38f28 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -464,7 +464,8 @@ extern char **environ;
       SymI_HasProto(mkCostCentre)               \
       SymI_HasProto(registerCcList)             \
       SymI_HasProto(registerCcsList)            \
-      SymI_HasProto(era)
+      SymI_HasProto(era)                        \
+      SymI_HasProto(user_era)
 #else
 #define RTS_PROF_SYMBOLS /* empty */
 #endif
@@ -921,6 +922,9 @@ extern char **environ;
       SymI_HasProto(stopProfTimer)                                      \
       SymI_HasProto(startHeapProfTimer)                                 \
       SymI_HasProto(stopHeapProfTimer)                                  \
+      SymI_HasProto(setUserEra)                                         \
+      SymI_HasProto(incrementUserEra)                                   \
+      SymI_HasProto(getUserEra)                                         \
       SymI_HasProto(requestHeapCensus)                                  \
       SymI_HasProto(atomic_inc)                                         \
       SymI_HasProto(atomic_inc64)                                       \
diff --git a/rts/STM.c b/rts/STM.c
index dd136100b085..69b00fd12756 100644
--- a/rts/STM.c
+++ b/rts/STM.c
@@ -85,6 +85,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "RtsUtils.h"
 #include "Schedule.h"
diff --git a/rts/Schedule.c b/rts/Schedule.c
index be19aac18e5d..4f0dcf3e81ce 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -9,6 +9,7 @@
 #include "rts/PosixSource.h"
 #define KEEP_LOCKCLOSURE
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "sm/Storage.h"
 #include "RtsUtils.h"
diff --git a/rts/ThreadLabels.c b/rts/ThreadLabels.c
index 55154f1de98e..4c7ab57b6287 100644
--- a/rts/ThreadLabels.c
+++ b/rts/ThreadLabels.c
@@ -12,6 +12,7 @@
 
 #include "ThreadLabels.h"
 #include "RtsUtils.h"
+#include "RtsFlags.h"
 #include "Hash.h"
 #include "Trace.h"
 
diff --git a/rts/Threads.c b/rts/Threads.c
index ef1f5c790af4..e61f9918eb61 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -8,6 +8,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "Capability.h"
 #include "Updates.h"
diff --git a/rts/Updates.h b/rts/Updates.h
index 58967c332131..9bdc9d72c0a1 100644
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -10,8 +10,10 @@
 
 #if !defined(CMINUSMINUS)
 #include "BeginPrivate.h"
+#include "RtsFlags.h"
 #endif
 
+
 /* Note [Thunks, blackholes, and indirections]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * Consider the following STG binding:
@@ -418,10 +420,10 @@
    -------------------------------------------------------------------------- */
 
 /* LDV profiling:
- *   After all, we do *NOT* need to call LDV_RECORD_CREATE() for IND
+ *   After all, we do *NOT* need to call PROF_HEADER_CREATE() for IND
  *   closures because they are inherently used. But, it corrupts
  *   the invariants that every closure keeps its creation time in the profiling
- *  field. So, we call LDV_RECORD_CREATE().
+ *  field. So, we call PROF_HEADER_CREATE().
  */
 
 /*
@@ -462,7 +464,7 @@
     OVERWRITING_CLOSURE(p1);                                    \
     %release StgInd_indirectee(p1) = p2;                        \
     %release SET_INFO(p1, stg_BLACKHOLE_info);                  \
-    LDV_RECORD_CREATE(p1);                                      \
+    PROF_HEADER_CREATE(p1);                                     \
     and_then;
 
 #else /* !CMINUSMINUS */
diff --git a/rts/Weak.c b/rts/Weak.c
index 89096878e75c..b4994f392177 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -9,6 +9,7 @@
 #include "rts/PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
+#include "RtsFlags.h"
 
 #include "RtsUtils.h"
 #include "Weak.h"
diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c
index f50a56e60fc0..e90a347de859 100644
--- a/rts/eventlog/EventLog.c
+++ b/rts/eventlog/EventLog.c
@@ -1193,6 +1193,8 @@ static HeapProfBreakdown getHeapProfBreakdown(void)
         return HEAP_PROF_BREAKDOWN_CLOSURE_TYPE;
     case HEAP_BY_INFO_TABLE:
         return HEAP_PROF_BREAKDOWN_INFO_TABLE;
+    case HEAP_BY_ERA:
+        return HEAP_PROF_BREAKDOWN_ERA;
     default:
         barf("getHeapProfBreakdown: unknown heap profiling mode");
     }
diff --git a/rts/include/rts/EventLogFormat.h b/rts/include/rts/EventLogFormat.h
index 2eaeb0b582b7..7888d119b2f2 100644
--- a/rts/include/rts/EventLogFormat.h
+++ b/rts/include/rts/EventLogFormat.h
@@ -125,7 +125,8 @@ typedef enum {
     HEAP_PROF_BREAKDOWN_RETAINER,
     HEAP_PROF_BREAKDOWN_BIOGRAPHY,
     HEAP_PROF_BREAKDOWN_CLOSURE_TYPE,
-    HEAP_PROF_BREAKDOWN_INFO_TABLE
+    HEAP_PROF_BREAKDOWN_INFO_TABLE,
+    HEAP_PROF_BREAKDOWN_ERA
 } HeapProfBreakdown;
 
 #if !defined(EVENTLOG_CONSTANTS_ONLY)
diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h
index 3861a1852d6c..f73f6978c7cf 100644
--- a/rts/include/rts/Flags.h
+++ b/rts/include/rts/Flags.h
@@ -144,11 +144,13 @@ typedef struct _PROFILING_FLAGS {
 
 # define HEAP_BY_CLOSURE_TYPE   8
 # define HEAP_BY_INFO_TABLE     9
+# define HEAP_BY_ERA            10
 
     Time        heapProfileInterval; /* time between samples */
     uint32_t    heapProfileIntervalTicks; /* ticks between samples (derived) */
     bool        startHeapProfileAtStartup; /* true if we start profiling from program startup */
     bool        startTimeProfileAtStartup; /* true if we start profiling from program startup */
+    bool        incrementUserEra;
 
 
     bool        showCCSOnException;
@@ -163,6 +165,7 @@ typedef struct _PROFILING_FLAGS {
     const char*         ccSelector;
     const char*         ccsSelector;
     const char*         retainerSelector;
+    StgWord             eraSelector;
     const char*         bioSelector;
 
 } PROFILING_FLAGS;
diff --git a/rts/include/rts/prof/CCS.h b/rts/include/rts/prof/CCS.h
index a155e1385ba2..65819d80151d 100644
--- a/rts/include/rts/prof/CCS.h
+++ b/rts/include/rts/prof/CCS.h
@@ -168,6 +168,7 @@ extern CostCentreStack CCS_IDLE[];       // capability is idle
 #endif /* IN_STG_CODE */
 
 extern unsigned int RTS_VAR(era);
+extern StgWord RTS_VAR(user_era);
 
 /* -----------------------------------------------------------------------------
  * Functions
diff --git a/rts/include/rts/prof/Heap.h b/rts/include/rts/prof/Heap.h
index 90700c809bd9..6c87d6b8b077 100644
--- a/rts/include/rts/prof/Heap.h
+++ b/rts/include/rts/prof/Heap.h
@@ -22,3 +22,6 @@
 void requestHeapCensus ( void );
 void startHeapProfTimer ( void );
 void stopHeapProfTimer ( void );
+void setUserEra ( StgWord w );
+StgWord getUserEra ( void );
+StgWord incrementUserEra ( StgWord w );
diff --git a/rts/include/rts/prof/LDV.h b/rts/include/rts/prof/LDV.h
index 73f77865379e..b8b4e7c8455f 100644
--- a/rts/include/rts/prof/LDV.h
+++ b/rts/include/rts/prof/LDV.h
@@ -18,6 +18,8 @@
 /* retrieves the LDV word from closure c */
 #define LDVW(c)                 (((StgClosure *)(c))->header.prof.hp.ldvw)
 
+#define ERAW(c)                 (((StgClosure *)(c))->header.prof.hp.era)
+
 /*
  * Stores the creation time for closure c.
  * This macro is called at the very moment of closure creation.
@@ -33,7 +35,12 @@
 #else
 
 #define LDV_RECORD_CREATE(c)   \
-  LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE
+  if (doingLDVProfiling()){ \
+    LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE; \
+  }
+
+#define ERA_RECORD_CREATE(c)   \
+  ERAW((c)) = (StgWord)RTS_DEREF(user_era);
 
 #endif
 
diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h
index 1c9d050b97d1..dbed34d490d6 100644
--- a/rts/include/rts/storage/ClosureMacros.h
+++ b/rts/include/rts/storage/ClosureMacros.h
@@ -158,9 +158,29 @@ EXTERN_INLINE StgHalfWord GET_TAG(const StgClosure *con)
 
  [1]: Technically we should set 'rs' to `NULL | flip`.
  */
-#define SET_PROF_HDR(c,ccs_)            \
-        ((c)->header.prof.ccs = ccs_,   \
-        LDV_RECORD_CREATE((c)))
+/*
+  MP: Various other places use the check era > 0 to check whether LDV profiling
+  is enabled. The use of these predicates here is the reason for including RtsFlags.h in
+  a lot of places.
+
+  We could also check user_era > 0 for eras profiling, which would remove the need
+  for so many includes.
+*/
+#define SET_PROF_HDR(c, ccs_) \
+  { \
+  (c)->header.prof.ccs = ccs_; \
+  if (doingLDVProfiling()) { \
+    LDV_RECORD_CREATE((c)); \
+  } \
+\
+  if (doingRetainerProfiling()) { \
+    LDV_RECORD_CREATE((c)); \
+  }; \
+  if (doingErasProfiling()){ \
+    ERA_RECORD_CREATE((c)); \
+  }; \
+  }
+
 #else
 #define SET_PROF_HDR(c,ccs)
 #endif
@@ -185,6 +205,7 @@ EXTERN_INLINE StgHalfWord GET_TAG(const StgClosure *con)
 #define OVERWRITE_INFO(c, new_info)                             \
     OVERWRITING_CLOSURE((StgClosure *)(c));                     \
     SET_INFO_RELAXED((StgClosure *)(c), (new_info));                    \
+    /* MP: Should this be SET_PROF_HEADER?  */ \
     LDV_RECORD_CREATE(c);
 
 /* -----------------------------------------------------------------------------
diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h
index 915dfc4f6fca..f0b8b346e26f 100644
--- a/rts/include/rts/storage/Closures.h
+++ b/rts/include/rts/storage/Closures.h
@@ -29,6 +29,7 @@ typedef struct {
   union {
     StgWord trav;             /* Heap traversal */
     StgWord ldvw;             /* Lag/Drag/Void Word */
+    StgWord era;              /* User-era */
   } hp;
     // Heap profiling header. This field is shared among the various heap
     // profiling modes. Currently it is used by ProfHeap.c for Lag/Drag/Void
diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h
index c5a1681df606..ef303bd4e039 100644
--- a/rts/include/stg/MiscClosures.h
+++ b/rts/include/stg/MiscClosures.h
@@ -627,6 +627,7 @@ extern StgWord RTS_VAR(stable_name_table);
 
 // Profiling.c
 extern unsigned int RTS_VAR(era);
+extern StgWord RTS_VAR(user_era);
 extern unsigned int RTS_VAR(entering_PAP);
 extern StgWord      CCS_OVERHEAD[];
 extern StgWord      CCS_SYSTEM[];
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index 1f40402c6347..50918d11cf99 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -13,6 +13,7 @@
 #include "rts/PosixSource.h"
 #include "Rts.h"
 #include "RtsUtils.h"
+#include "RtsFlags.h"
 
 #include "Capability.h"
 #include "GC.h"
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 4eba06692831..724ce6594a4d 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -16,6 +16,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "Evac.h"
 #include "Storage.h"
@@ -260,7 +261,9 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
             // profiler when it encounters this closure in
             // processHeapClosureForDead.  So we reset the LDVW field
             // here.
-            LDVW(to) = 0;
+            if (doingLDVProfiling()){
+              LDVW(to) = 0;
+            }
 #endif
             return evacuate(p); // does the failed_to_evac stuff
         } else {
@@ -1213,9 +1216,13 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
             SET_INFO_RELEASE((StgClosure *)p, &stg_IND_info);
         }
 
+#if defined(PROFILING)
         // For the purposes of LDV profiling, we have created an
         // indirection.
-        LDV_RECORD_CREATE(p);
+        if (doingLDVProfiling()){
+          LDV_RECORD_CREATE(p);
+        }
+#endif
 
         p = prev;
     }
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 14a858e141ed..722d9dc19b29 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -25,6 +25,7 @@
 
 #include "rts/PosixSource.h"
 #include "Rts.h"
+#include "RtsFlags.h"
 
 #include "Storage.h"
 #include "GCThread.h"
diff --git a/testsuite/tests/interface-stability/ghc-experimental-exports.stdout b/testsuite/tests/interface-stability/ghc-experimental-exports.stdout
index 34480e4ef503..1a8abff937e1 100644
--- a/testsuite/tests/interface-stability/ghc-experimental-exports.stdout
+++ b/testsuite/tests/interface-stability/ghc-experimental-exports.stdout
@@ -1,3 +1,9 @@
 
+module GHC.Profiling.Eras where
+  -- Safety: Trustworthy
+  getUserEra :: GHC.Types.IO GHC.Types.Word
+  incrementUserEra :: GHC.Types.Word -> GHC.Types.IO GHC.Types.Word
+  setUserEra :: GHC.Types.Word -> GHC.Types.IO ()
+
 
 -- Instances:
-- 
GitLab