diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 97f07d3695a88b995ac0c16d4de6298a1297685d..07847af274277ac6215bb94fe78343421970d7fa 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1273,7 +1273,7 @@ stmtMacros = listToUFM [ ( fsLit "POP_ARG_REGS", \[live_regs] -> emitPopArgRegs live_regs ), ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ), - ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ), + ( fsLit "PROF_HEADER_CREATE", \[e] -> profHeaderCreate e ), ( fsLit "PUSH_UPD_FRAME", \[sp,e] -> emitPushUpdateFrame sp e ), ( fsLit "SET_HDR", \[ptr,info,ccs] -> diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs index bc01198b7c8ee599970d5e4c338ea9ce740d027b..cea5991300eef7b77ed1cd2fbbe2a43b355d86d4 100644 --- a/compiler/GHC/StgToCmm/Prof.hs +++ b/compiler/GHC/StgToCmm/Prof.hs @@ -23,7 +23,7 @@ module GHC.StgToCmm.Prof ( saveCurrentCostCentre, restoreCurrentCostCentre, -- Lag/drag/void stuff - ldvEnter, ldvEnterClosure, ldvRecordCreate + ldvEnter, ldvEnterClosure, profHeaderCreate ) where import GHC.Prelude @@ -88,14 +88,14 @@ costCentreFrom platform cl = CmmLoad (cmmOffsetB platform cl (pc_OFFSET_StgHeade -- | The profiling header words in a static closure staticProfHdr :: Profile -> CostCentreStack -> [CmmLit] staticProfHdr profile ccs - | profileIsProfiling profile = [mkCCostCentreStack ccs, staticLdvInit platform] + | profileIsProfiling profile = [mkCCostCentreStack ccs, staticProfHeaderInit platform] | otherwise = [] where platform = profilePlatform profile -- | Profiling header words in a dynamic closure dynProfHdr :: Profile -> CmmExpr -> [CmmExpr] dynProfHdr profile ccs - | profileIsProfiling profile = [ccs, dynLdvInit (profilePlatform profile)] + | profileIsProfiling profile = [ccs, dynProfInit (profilePlatform profile)] | otherwise = [] -- | Initialise the profiling field of an update frame @@ -322,33 +322,62 @@ bumpSccCount platform ccs ----------------------------------------------------------------------------- -- --- Lag/drag/void stuff +-- Profiling header stuff -- ----------------------------------------------------------------------------- --- --- Initial value for the LDV field in a static closure --- -staticLdvInit :: Platform -> CmmLit -staticLdvInit = zeroCLit --- --- Initial value of the LDV field in a dynamic closure --- +-- Header initialisation for static objects happens to coicincide for the +-- three uses of the header +-- * LDV profiling = 0 (era = 0, LDV_STATE_CREATE) +-- * Eras profiling = 0 (user_era = 0, ignored by profiler) +-- * Retainer profiling = 0 + +staticProfHeaderInit :: Platform -> CmmLit +staticProfHeaderInit plat = zeroCLit plat + + +-- Dynamic initialisation + +dynErasInit :: Platform -> CmmExpr +dynErasInit platform = loadUserEra platform + dynLdvInit :: Platform -> CmmExpr -dynLdvInit platform = -- (era << LDV_SHIFT) | LDV_STATE_CREATE +dynLdvInit platform = +-- (era << LDV_SHIFT) | LDV_STATE_CREATE CmmMachOp (mo_wordOr platform) [ CmmMachOp (mo_wordShl platform) [loadEra platform, mkIntExpr platform (pc_LDV_SHIFT (platformConstants platform))], CmmLit (mkWordCLit platform (pc_ILDV_STATE_CREATE (platformConstants platform))) ] --- --- Initialise the LDV word of a new closure --- -ldvRecordCreate :: CmmExpr -> FCode () -ldvRecordCreate closure = do + +-- | If LDV profiling the user_era = 0 +-- , if eras profiling then (ldv)era = 0, so we can initialise correctly by OR the two expressions. +dynProfInit :: Platform -> CmmExpr +dynProfInit platform = CmmMachOp (mo_wordOr platform) [(dynLdvInit platform), dynErasInit platform] + + +-- | Initialise the profiling word of a new dynamic closure +-- * When LDV profiling is enabled (era > 0) - Initialise to the LDV word +-- * When eras profiling is enabled (user_era > 0) - Initialise to current user_era +profHeaderCreate :: CmmExpr -> FCode () +profHeaderCreate closure = do platform <- getPlatform - emit $ mkStore (ldvWord platform closure) (dynLdvInit platform) + let prof_header_wd = profHeaderWord platform closure + + let check_ldv = mkCmmIfThenElse (CmmMachOp (mo_wordUGt platform) [loadEra platform, CmmLit (zeroCLit platform)]) + let check_eras = mkCmmIfThenElse (CmmMachOp (mo_wordUGt platform) [loadUserEra platform, CmmLit (zeroCLit platform)]) + -- Case 2: user_era > 0, eras profiling is enabled + check_1 <- check_eras (mkStore prof_header_wd (dynErasInit platform)) mkNop + -- Case 1: era > 0, LDV profiling is enabled + check_2 <- check_ldv (mkStore prof_header_wd (dynLdvInit platform)) check_1 + emit check_2 + + + + + + -- -- | Called when a closure is entered, marks the closure as having @@ -368,27 +397,35 @@ ldvEnter cl_ptr = do platform <- getPlatform let constants = platformConstants platform -- don't forget to subtract node's tag - ldv_wd = ldvWord platform cl_ptr + ldv_wd = profHeaderWord platform cl_ptr new_ldv_wd = cmmOrWord platform (cmmAndWord platform (cmmLoadBWord platform ldv_wd) (CmmLit (mkWordCLit platform (pc_ILDV_CREATE_MASK constants)))) (cmmOrWord platform (loadEra platform) (CmmLit (mkWordCLit platform (pc_ILDV_STATE_USE constants)))) - ifProfiling $ - -- if (era > 0) { + ifProfiling $ do + -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | -- era | LDV_STATE_USE } emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt platform) [loadEra platform, CmmLit (zeroCLit platform)]) (mkStore ldv_wd new_ldv_wd) mkNop + + loadEra :: Platform -> CmmExpr loadEra platform = CmmMachOp (MO_UU_Conv (cIntWidth platform) (wordWidth platform)) [CmmLoad (mkLblExpr (mkRtsCmmDataLabel (fsLit "era"))) (cInt platform) NaturallyAligned] +loadUserEra :: Platform -> CmmExpr +loadUserEra platform = CmmLoad (mkLblExpr (mkRtsCmmDataLabel (fsLit "user_era"))) + (bWord platform) + NaturallyAligned + -- | Takes the address of a closure, and returns --- the address of the LDV word in the closure -ldvWord :: Platform -> CmmExpr -> CmmExpr -ldvWord platform closure_ptr +-- the address of the prof header word in the closure (this is used to store LDV info, +-- retainer profiling info and eras profiling info). +profHeaderWord :: Platform -> CmmExpr -> CmmExpr +profHeaderWord platform closure_ptr = cmmOffsetB platform closure_ptr (pc_OFFSET_StgHeader_ldvw (platformConstants platform)) diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst index 85c59fb2f12608b3b5fce7ca82aa2c8b8bc6644a..03812db8b405ee29986f5696911c9f92f121b6cc 100644 --- a/docs/users_guide/profiling.rst +++ b/docs/users_guide/profiling.rst @@ -898,6 +898,17 @@ following RTS options select which break-down to use: which have function type or unknown/polymorphic type, the string will represent an approximation to the actual type. +.. rts-flag:: -he + + :since: 9.10.1 + + *Requires* :ghc-flag:`-prof`. Break down the graph by era. + + Each closure is tagged with the era in which it is created. Eras start at 1 + and can be set in your program to domain specific values using functions from + ``GHC.Profiling.Eras`` or incremented automatically by the + :rts-flag:`--automatic-era-increment`. + .. rts-flag:: -hr *Requires* :ghc-flag:`-prof`. Break down the graph by retainer set. Retainer @@ -965,6 +976,11 @@ follows: Restrict the profile to closures with the specified types. +.. rts-flag:: -he ⟨era⟩ + :noindex: + + Restrict the profile to the specified era. + .. rts-flag:: -hr ⟨cc⟩ :noindex: @@ -1017,6 +1033,13 @@ There are three more options which relate to heap profiling: option is enabled, it's expected that the user will manually start time profiling or request specific samples using functions from ``GHC.Profiling``. +.. rts-flag:: --automatic-era-increment + + :since: 9.10.1 + + Increment the era by 1 on each major garbage collection. This is used + in conjunction with :rts-flag:`-he`. + .. rts-flag:: --null-eventlog-writer :since: 9.2.2 diff --git a/libraries/ghc-experimental/ghc-experimental.cabal b/libraries/ghc-experimental/ghc-experimental.cabal index 7c7cf61d50361f6545c4fb8e936eeeb5c918a591..2ba118a71c8fa97d7aed53b5656d0d4a99be4e5b 100644 --- a/libraries/ghc-experimental/ghc-experimental.cabal +++ b/libraries/ghc-experimental/ghc-experimental.cabal @@ -22,8 +22,7 @@ common warnings library import: warnings - exposed-modules: - other-modules: Dummy + exposed-modules: GHC.Profiling.Eras other-extensions: build-depends: base ^>=4.19, ghc-internal >= 0.1 && < 0.2 diff --git a/libraries/ghc-experimental/src/Dummy.hs b/libraries/ghc-experimental/src/Dummy.hs deleted file mode 100644 index 5ae990e20d9fe16a4aff1506a6b05eae3ad07f3d..0000000000000000000000000000000000000000 --- a/libraries/ghc-experimental/src/Dummy.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE NoImplicitPrelude #-} - --- | This module merely serves as a placeholder since --- Haskell packages must contain at least one module. --- This can be dropped once a real module has been introduced to --- @ghc-experimental@. -module Dummy () where - --- for build system ordering -import GHC.Base () diff --git a/libraries/ghc-experimental/src/GHC/Profiling/Eras.hs b/libraries/ghc-experimental/src/GHC/Profiling/Eras.hs new file mode 100644 index 0000000000000000000000000000000000000000..508ae2f1994bf4e3f0c903b15862c8b6024c3de3 --- /dev/null +++ b/libraries/ghc-experimental/src/GHC/Profiling/Eras.hs @@ -0,0 +1,20 @@ +{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | TODO move this module into ghc-internals +module GHC.Profiling.Eras ( setUserEra + , getUserEra + , incrementUserEra + ) where + +import GHC.Base + +-- | Set the heap profiling era, setting the era to 0 will stop closures being +-- counted. +foreign import ccall setUserEra :: Word -> IO () + +-- | Query the profiling era +foreign import ccall getUserEra :: IO Word + +-- | Increment the era by a given amount, and return the new era. +foreign import ccall incrementUserEra :: Word -> IO Word diff --git a/libraries/ghc-internal/src/GHC/Profiling.hs b/libraries/ghc-internal/src/GHC/Profiling.hs index 9b8ff06fd21c302530e57cd6c337cfdc6319bb69..b7bfa911f94a5174c2d1a5d54f40da472e830ea5 100644 --- a/libraries/ghc-internal/src/GHC/Profiling.hs +++ b/libraries/ghc-internal/src/GHC/Profiling.hs @@ -50,3 +50,4 @@ foreign import ccall startHeapProfTimer :: IO () -- -- @since 4.16.0.0 foreign import ccall stopHeapProfTimer :: IO () + diff --git a/rts/CloneStack.c b/rts/CloneStack.c index 3ec96aa19d06c32e4561a258b61f134a647514dd..fa0c97f8454af4c9126b89f18b7c0c809929fc96 100644 --- a/rts/CloneStack.c +++ b/rts/CloneStack.c @@ -7,6 +7,7 @@ *---------------------------------------------------------------------------*/ #include "Rts.h" +#include "RtsFlags.h" #include "rts/Messages.h" #include "Messages.h" #include "rts/Types.h" diff --git a/rts/Continuation.c b/rts/Continuation.c index db687cb4214812062ca399ee640eddd859b51f99..455f327c2e6661aa84e725d6a9e72eea94233e9b 100644 --- a/rts/Continuation.c +++ b/rts/Continuation.c @@ -8,6 +8,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "sm/Storage.h" #include "sm/Sanity.h" diff --git a/rts/Heap.c b/rts/Heap.c index 1ecf1b6e1456af1daa35c9c0e5d1e3e45764a43c..85ff1e360bdb7f4a5953a67b41249b909a011c6f 100644 --- a/rts/Heap.c +++ b/rts/Heap.c @@ -9,6 +9,7 @@ #include "Rts.h" #include "RtsAPI.h" #include "RtsUtils.h" +#include "RtsFlags.h" #include "Capability.h" #include "Printer.h" diff --git a/rts/Interpreter.c b/rts/Interpreter.c index f032e577b9d3dd8f976d58b951f7c837047fcb9f..fe1cdd24d247663d5808f6ba8eea41cf21754735 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -8,6 +8,7 @@ #include "rts/PosixSource.h" #include "Rts.h" #include "RtsAPI.h" +#include "RtsFlags.h" #include "rts/Bytecodes.h" // internal headers diff --git a/rts/LdvProfile.h b/rts/LdvProfile.h index e746d6cfaa9fa1a6dfdf43870041d6a348ec9336..43d8370bc0a3433e622f91cd55876ca8043670d5 100644 --- a/rts/LdvProfile.h +++ b/rts/LdvProfile.h @@ -33,6 +33,8 @@ RTS_PRIVATE void LdvCensusKillAll ( void ); // Evacuated objects are no longer needed, so we just store its original size in // the LDV field. #define SET_EVACUAEE_FOR_LDV(c, size) \ - LDVW((c)) = (size) + if (doingLDVProfiling()){ \ + LDVW((c)) = (size); \ + } #endif /* PROFILING */ diff --git a/rts/Messages.c b/rts/Messages.c index 730d81df9ce916a0b992beb93b0fde3a435ee9ce..4a3c8767a9ee44d9e95e769e0a02ef28771a321e 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -7,6 +7,7 @@ * --------------------------------------------------------------------------*/ #include "Rts.h" +#include "RtsFlags.h" #include "Messages.h" #include "Trace.h" #include "Capability.h" diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 5adcce01a0020988e29357e401f65788b7d4c430..2284b640d2650f74d8da6062dba493040a6bf2b1 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -227,7 +227,7 @@ stg_isMutableByteArrayPinnedzh ( gcptr mba ) * means we don't track their time of use and eventual destruction. We just * assume they get used. * - * Thus it is not necessary to call LDV_RECORD_CREATE when resizing them as we + * Thus it is not necessary to call PROF_HEADER_CREATE when resizing them as we * used to as the LDV profiler will essentially ignore arrays anyways. */ @@ -240,7 +240,7 @@ stg_shrinkMutableByteArrayzh ( gcptr mba, W_ new_size ) OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) + ROUNDUP_BYTES_TO_WDS(new_size))); StgArrBytes_bytes(mba) = new_size; - // No need to call LDV_RECORD_CREATE. See Note [LDV profiling and resizing arrays] + // No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays] return (); } @@ -264,7 +264,7 @@ stg_resizzeMutableByteArrayzh ( gcptr mba, W_ new_size ) OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgArrBytes) + new_size_wds)); StgArrBytes_bytes(mba) = new_size; - // No need to call LDV_RECORD_CREATE. See Note [LDV profiling and resizing arrays] + // No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays] return (mba); } else { @@ -308,7 +308,7 @@ again: OVERWRITING_CLOSURE_MUTABLE(mba, (BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + new_size)); StgSmallMutArrPtrs_ptrs(mba) = new_size; - // No need to call LDV_RECORD_CREATE. See Note [LDV profiling and resizing arrays] + // No need to call PROF_HEADER_CREATE. See Note [LDV profiling and resizing arrays] return (); } @@ -758,14 +758,14 @@ stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f ) CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, CCCS); - LDV_RECORD_CREATE(z); + PROF_HEADER_CREATE(z); StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); y = z - THUNK_1_SIZE; SET_HDR(y, stg_sel_0_upd_info, CCCS); - LDV_RECORD_CREATE(y); + PROF_HEADER_CREATE(y); StgThunk_payload(y,0) = z; retry: @@ -814,7 +814,7 @@ stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f ) CCCS_ALLOC(THUNK_SIZE); z = Hp - THUNK_SIZE + WDS(1); SET_HDR(z, stg_ap_2_upd_info, CCCS); - LDV_RECORD_CREATE(z); + PROF_HEADER_CREATE(z); StgThunk_payload(z,0) = f; retry: @@ -949,7 +949,7 @@ stg_finalizzeWeakzh ( gcptr w ) // unlockClosure(w, stg_DEAD_WEAK_info); - LDV_RECORD_CREATE(w); + PROF_HEADER_CREATE(w); if (list != stg_NO_FINALIZER_closure) { ccall runCFinalizers(list); diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 288c5c8a54184aeaa751796d77023f04b9414f5a..dad27da102232383e1fa4037272ceb5e320c46ce 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -142,6 +142,23 @@ restore_locale( void ) unsigned int era; static uint32_t max_era; +StgWord user_era; + +void +setUserEra (StgWord w){ + user_era = w; +} + +StgWord +getUserEra (void){ + return user_era; +} + +StgWord +incrementUserEra (StgWord w){ + return atomic_inc(&user_era, w); +} + inline void initLDVCtr( counter *ctr ) { @@ -180,6 +197,11 @@ closureIdentity( const StgClosure *p ) return p->header.prof.ccs->cc->module; case HEAP_BY_DESCR: return GET_PROF_DESC(get_itbl(p)); + case HEAP_BY_ERA: + // Static objects should have user_era = 0 + // MP: If user_era == 0 then closureIdentity returns the NULL pointer, and + // the closure is not counted to the census + return (void *)p->header.prof.hp.era; case HEAP_BY_TYPE: return GET_PROF_TYPE(get_itbl(p)); case HEAP_BY_RETAINER: @@ -220,21 +242,6 @@ closureIdentity( const StgClosure *p ) /* -------------------------------------------------------------------------- * Profiling type predicates * ----------------------------------------------------------------------- */ -#if defined(PROFILING) -STATIC_INLINE bool -doingLDVProfiling( void ) -{ - return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV - || RtsFlags.ProfFlags.bioSelector != NULL); -} - -bool -doingRetainerProfiling( void ) -{ - return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER - || RtsFlags.ProfFlags.retainerSelector != NULL); -} -#endif /* PROFILING */ // Processes a closure 'c' being destroyed whose size is 'size'. // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures @@ -356,6 +363,10 @@ freeEra(Census *census) static void nextEra( void ) { + if (user_era > 0 && RtsFlags.ProfFlags.incrementUserEra){ + user_era++; + } + #if defined(PROFILING) if (doingLDVProfiling()) { era++; @@ -483,6 +494,14 @@ initHeapProfiling(void) #endif #endif +#if defined(PROFILING) + if (doingErasProfiling()){ + user_era = 1; + } +#else + user_era = 0; +#endif + // we only count eras if we're doing LDV profiling. Otherwise era // is fixed at zero. #if defined(PROFILING) @@ -703,6 +722,9 @@ closureSatisfiesConstraints( const StgClosure* p ) RtsFlags.ProfFlags.typeSelector ); if (!b) return false; } + if (RtsFlags.ProfFlags.eraSelector) { + return (p->header.prof.hp.era == RtsFlags.ProfFlags.eraSelector); + } if (RtsFlags.ProfFlags.retainerSelector) { RetainerSet *rs; uint32_t i; @@ -939,6 +961,12 @@ dumpCensus( Census *census ) traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity, count * sizeof(W_)); break; + case HEAP_BY_ERA: + fprintf(hp_file, "%lu", (StgWord)ctr->identity); + char str_era[100]; + sprintf(str_era, "%lu", (StgWord)ctr->identity); + traceHeapProfSampleString(0, str_era, count * sizeof(W_)); + break; case HEAP_BY_MOD: case HEAP_BY_DESCR: case HEAP_BY_TYPE: diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h index f63433feceb68ca882c818b39779b088c9797bac..2b473ac894becafd556ef0a8afb0edc3a8b5cca0 100644 --- a/rts/ProfHeap.h +++ b/rts/ProfHeap.h @@ -16,9 +16,4 @@ void endHeapProfiling (void); void freeHeapProfiling (void); bool strMatchesSelector (const char* str, const char* sel); -#if defined(PROFILING) -// doingRetainerProfiling: `-hr` or `-hr<cc> -h<x>` -bool doingRetainerProfiling(void); -#endif - #include "EndPrivate.h" diff --git a/rts/Profiling.c b/rts/Profiling.c index 3e5b49d96fdc10ab52553597ad0edfcedb713dc3..a9a53679638de195045a3745f69b481769c8e6af 100644 --- a/rts/Profiling.c +++ b/rts/Profiling.c @@ -10,6 +10,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "RtsUtils.h" #include "Profiling.h" diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 6601a929c6575f4d60ff4b3c69d619635ce7c251..31208c2f6fb84f299e21703ae68990d46b312e43 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -8,6 +8,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "sm/Storage.h" #include "Threads.h" diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c index 5221059b31458c6c0a3fb23a7b34ac3ec522667e..e9c073db74c9cc171f7053e129a83ed254c73c16 100644 --- a/rts/RtsAPI.c +++ b/rts/RtsAPI.c @@ -9,6 +9,7 @@ #include "rts/PosixSource.h" #include "Rts.h" #include "RtsAPI.h" +#include "RtsFlags.h" #include "HsFFI.h" #include "RtsUtils.h" diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index bf25bab9c7e9d75c102f9728334ad0635b5a5d71..d658a523efbb8c27694f3a48f7088870a54b0a7f 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -217,6 +217,7 @@ void initRtsFlagsDefaults(void) RtsFlags.ProfFlags.heapProfileInterval = USToTime(100000); // 100ms RtsFlags.ProfFlags.startHeapProfileAtStartup = true; RtsFlags.ProfFlags.startTimeProfileAtStartup = true; + RtsFlags.ProfFlags.incrementUserEra = false; #if defined(PROFILING) RtsFlags.ProfFlags.showCCSOnException = false; @@ -229,6 +230,7 @@ void initRtsFlagsDefaults(void) RtsFlags.ProfFlags.ccsSelector = NULL; RtsFlags.ProfFlags.retainerSelector = NULL; RtsFlags.ProfFlags.bioSelector = NULL; + RtsFlags.ProfFlags.eraSelector = 0; #endif #if defined(TRACING) @@ -384,6 +386,7 @@ usage_text[] = { " d = closure description", " y = type description", " i = info table", +" e = era", " r = retainer", " b = biography (LAG,DRAG,VOID,USE)", " A subset of closures may be selected thusly:", @@ -394,6 +397,7 @@ usage_text[] = { " -hy<typ>... closures with specified type descriptions", " -hr<cc>... closures with specified retainers", " -hb<bio>... closures with specified biographies (lag,drag,void,use)", +" -he<era>... closures with specified era", "", " -R<size> Set the maximum retainer set size (default: 8)", "", @@ -402,6 +406,8 @@ usage_text[] = { "", " -xt Include threads (TSOs) in a heap profile", "", +" --automatic-era-increment Increment the era on each major garbage collection", +"", " -xc Show current cost centre stack on raising an exception", #else /* PROFILING */ " -h Heap residency profile (output file <program>.hp)", @@ -1161,6 +1167,13 @@ error = true; RtsFlags.ProfFlags.startTimeProfileAtStartup = false; break; } + + else if (strequal("automatic-era-increment", + &rts_argv[arg][2])) { + OPTION_SAFE; + RtsFlags.ProfFlags.incrementUserEra = true; + break; + } else { OPTION_SAFE; errorBelch("unknown RTS option: %s",rts_argv[arg]); @@ -2270,6 +2283,7 @@ static bool read_heap_profiling_flag(const char *arg) case 'r': case 'B': case 'b': + case 'e': case 'T': if (arg[2] != '\0' && arg[3] != '\0') { { @@ -2315,6 +2329,10 @@ static bool read_heap_profiling_flag(const char *arg) case 'b': // biography select RtsFlags.ProfFlags.bioSelector = selector; break; + case 'E': + case 'e': // era select + RtsFlags.ProfFlags.eraSelector = strtoul(selector, (char **) NULL, 10); + break; default: stgFree(selector); } @@ -2360,6 +2378,9 @@ static bool read_heap_profiling_flag(const char *arg) case 'T': RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CLOSURE_TYPE; break; + case 'e': + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_ERA; + break; } break; @@ -2698,3 +2719,27 @@ bool is_io_mng_native_p (void) return false; #endif } + + +#if defined(PROFILING) +bool +doingLDVProfiling( void ) +{ + return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV + || RtsFlags.ProfFlags.bioSelector != NULL); +} + +bool +doingRetainerProfiling( void ) +{ + return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER + || RtsFlags.ProfFlags.retainerSelector != NULL); +} +bool +doingErasProfiling( void ) +{ + return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_ERA + || RtsFlags.ProfFlags.eraSelector != 0); +} +#endif /* PROFILING */ + diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h index 6c81081d4da2a6044b37d55bacde789f77086f5a..05a00af4e773fa3b35f25c01a658e0c191fad22d 100644 --- a/rts/RtsFlags.h +++ b/rts/RtsFlags.h @@ -24,6 +24,11 @@ void initRtsFlagsDefaults (void); void setupRtsFlags (int *argc, char *argv[], RtsConfig rtsConfig); void freeRtsArgs (void); bool is_io_mng_native_p (void); +#if defined(PROFILING) +bool doingLDVProfiling (void); +bool doingRetainerProfiling(void); +bool doingErasProfiling(void); +#endif extern RtsConfig rtsConfig; diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index e49f19c500ef2cc859432eb0f638ddf6e095bc05..baf93cc38f28c5740a3e9fb0167b863e57f5f8bf 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -464,7 +464,8 @@ extern char **environ; SymI_HasProto(mkCostCentre) \ SymI_HasProto(registerCcList) \ SymI_HasProto(registerCcsList) \ - SymI_HasProto(era) + SymI_HasProto(era) \ + SymI_HasProto(user_era) #else #define RTS_PROF_SYMBOLS /* empty */ #endif @@ -921,6 +922,9 @@ extern char **environ; SymI_HasProto(stopProfTimer) \ SymI_HasProto(startHeapProfTimer) \ SymI_HasProto(stopHeapProfTimer) \ + SymI_HasProto(setUserEra) \ + SymI_HasProto(incrementUserEra) \ + SymI_HasProto(getUserEra) \ SymI_HasProto(requestHeapCensus) \ SymI_HasProto(atomic_inc) \ SymI_HasProto(atomic_inc64) \ diff --git a/rts/STM.c b/rts/STM.c index dd136100b085ea32d39a6ce4b66cc2fa488dc60e..69b00fd127568d33f0da7a8a9f7c140de1bc129f 100644 --- a/rts/STM.c +++ b/rts/STM.c @@ -85,6 +85,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "RtsUtils.h" #include "Schedule.h" diff --git a/rts/Schedule.c b/rts/Schedule.c index be19aac18e5d7cf637633d80b44728382e46ec5b..4f0dcf3e81ce8fbd2b34e5d2538631d3eece1a8a 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -9,6 +9,7 @@ #include "rts/PosixSource.h" #define KEEP_LOCKCLOSURE #include "Rts.h" +#include "RtsFlags.h" #include "sm/Storage.h" #include "RtsUtils.h" diff --git a/rts/ThreadLabels.c b/rts/ThreadLabels.c index 55154f1de98e0b052ad137d0410a4225ac694f0a..4c7ab57b628708f93e2c2776b3205417ae80eb53 100644 --- a/rts/ThreadLabels.c +++ b/rts/ThreadLabels.c @@ -12,6 +12,7 @@ #include "ThreadLabels.h" #include "RtsUtils.h" +#include "RtsFlags.h" #include "Hash.h" #include "Trace.h" diff --git a/rts/Threads.c b/rts/Threads.c index ef1f5c790af47378081f5abbf1a7c1bf6f812b15..e61f9918eb6130c55a5db31ac4135d88393d713b 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -8,6 +8,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "Capability.h" #include "Updates.h" diff --git a/rts/Updates.h b/rts/Updates.h index 58967c332131597ab932b043fd664b463ef5b315..9bdc9d72c0a18a15382bf2e063cfcd4f5d5cf78d 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -10,8 +10,10 @@ #if !defined(CMINUSMINUS) #include "BeginPrivate.h" +#include "RtsFlags.h" #endif + /* Note [Thunks, blackholes, and indirections] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Consider the following STG binding: @@ -418,10 +420,10 @@ -------------------------------------------------------------------------- */ /* LDV profiling: - * After all, we do *NOT* need to call LDV_RECORD_CREATE() for IND + * After all, we do *NOT* need to call PROF_HEADER_CREATE() for IND * closures because they are inherently used. But, it corrupts * the invariants that every closure keeps its creation time in the profiling - * field. So, we call LDV_RECORD_CREATE(). + * field. So, we call PROF_HEADER_CREATE(). */ /* @@ -462,7 +464,7 @@ OVERWRITING_CLOSURE(p1); \ %release StgInd_indirectee(p1) = p2; \ %release SET_INFO(p1, stg_BLACKHOLE_info); \ - LDV_RECORD_CREATE(p1); \ + PROF_HEADER_CREATE(p1); \ and_then; #else /* !CMINUSMINUS */ diff --git a/rts/Weak.c b/rts/Weak.c index 89096878e75c67d25fe74125997a88767376411c..b4994f392177e0b9e29946f7edb226537bf54f93 100644 --- a/rts/Weak.c +++ b/rts/Weak.c @@ -9,6 +9,7 @@ #include "rts/PosixSource.h" #include "Rts.h" #include "RtsAPI.h" +#include "RtsFlags.h" #include "RtsUtils.h" #include "Weak.h" diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c index f50a56e60fc06b092cca06c0fa873c90e80dfbca..e90a347de8593fe2ceab6525703f8ef12063e020 100644 --- a/rts/eventlog/EventLog.c +++ b/rts/eventlog/EventLog.c @@ -1193,6 +1193,8 @@ static HeapProfBreakdown getHeapProfBreakdown(void) return HEAP_PROF_BREAKDOWN_CLOSURE_TYPE; case HEAP_BY_INFO_TABLE: return HEAP_PROF_BREAKDOWN_INFO_TABLE; + case HEAP_BY_ERA: + return HEAP_PROF_BREAKDOWN_ERA; default: barf("getHeapProfBreakdown: unknown heap profiling mode"); } diff --git a/rts/include/rts/EventLogFormat.h b/rts/include/rts/EventLogFormat.h index 2eaeb0b582b797a2b95fe092ee660a2d2df76596..7888d119b2f2f30bcb26d3d5f209e6c9a67cee06 100644 --- a/rts/include/rts/EventLogFormat.h +++ b/rts/include/rts/EventLogFormat.h @@ -125,7 +125,8 @@ typedef enum { HEAP_PROF_BREAKDOWN_RETAINER, HEAP_PROF_BREAKDOWN_BIOGRAPHY, HEAP_PROF_BREAKDOWN_CLOSURE_TYPE, - HEAP_PROF_BREAKDOWN_INFO_TABLE + HEAP_PROF_BREAKDOWN_INFO_TABLE, + HEAP_PROF_BREAKDOWN_ERA } HeapProfBreakdown; #if !defined(EVENTLOG_CONSTANTS_ONLY) diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h index 3861a1852d6c0c1892b73e27b6e66e171a2186ff..f73f6978c7cf0df4fb67915d2e7ad91b8991f496 100644 --- a/rts/include/rts/Flags.h +++ b/rts/include/rts/Flags.h @@ -144,11 +144,13 @@ typedef struct _PROFILING_FLAGS { # define HEAP_BY_CLOSURE_TYPE 8 # define HEAP_BY_INFO_TABLE 9 +# define HEAP_BY_ERA 10 Time heapProfileInterval; /* time between samples */ uint32_t heapProfileIntervalTicks; /* ticks between samples (derived) */ bool startHeapProfileAtStartup; /* true if we start profiling from program startup */ bool startTimeProfileAtStartup; /* true if we start profiling from program startup */ + bool incrementUserEra; bool showCCSOnException; @@ -163,6 +165,7 @@ typedef struct _PROFILING_FLAGS { const char* ccSelector; const char* ccsSelector; const char* retainerSelector; + StgWord eraSelector; const char* bioSelector; } PROFILING_FLAGS; diff --git a/rts/include/rts/prof/CCS.h b/rts/include/rts/prof/CCS.h index a155e1385ba2a12cca4e2b54a3eba4f2379ec401..65819d80151d31ec8648d46bfb0920ef9ec14378 100644 --- a/rts/include/rts/prof/CCS.h +++ b/rts/include/rts/prof/CCS.h @@ -168,6 +168,7 @@ extern CostCentreStack CCS_IDLE[]; // capability is idle #endif /* IN_STG_CODE */ extern unsigned int RTS_VAR(era); +extern StgWord RTS_VAR(user_era); /* ----------------------------------------------------------------------------- * Functions diff --git a/rts/include/rts/prof/Heap.h b/rts/include/rts/prof/Heap.h index 90700c809bd9f693ee6fbadf7ddc3821ea80ebe8..6c87d6b8b077536b77eb4e61c75222707366bc54 100644 --- a/rts/include/rts/prof/Heap.h +++ b/rts/include/rts/prof/Heap.h @@ -22,3 +22,6 @@ void requestHeapCensus ( void ); void startHeapProfTimer ( void ); void stopHeapProfTimer ( void ); +void setUserEra ( StgWord w ); +StgWord getUserEra ( void ); +StgWord incrementUserEra ( StgWord w ); diff --git a/rts/include/rts/prof/LDV.h b/rts/include/rts/prof/LDV.h index 73f77865379ec28b75b4f5143877b545375ddad9..b8b4e7c8455f8c728f6f22c67b44991b7c462787 100644 --- a/rts/include/rts/prof/LDV.h +++ b/rts/include/rts/prof/LDV.h @@ -18,6 +18,8 @@ /* retrieves the LDV word from closure c */ #define LDVW(c) (((StgClosure *)(c))->header.prof.hp.ldvw) +#define ERAW(c) (((StgClosure *)(c))->header.prof.hp.era) + /* * Stores the creation time for closure c. * This macro is called at the very moment of closure creation. @@ -33,7 +35,12 @@ #else #define LDV_RECORD_CREATE(c) \ - LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE + if (doingLDVProfiling()){ \ + LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE; \ + } + +#define ERA_RECORD_CREATE(c) \ + ERAW((c)) = (StgWord)RTS_DEREF(user_era); #endif diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h index 1c9d050b97d175bca99e2b7f2b1b52a8d0d4375f..dbed34d490d64db3ad8bbfb460c6ecea44697643 100644 --- a/rts/include/rts/storage/ClosureMacros.h +++ b/rts/include/rts/storage/ClosureMacros.h @@ -158,9 +158,29 @@ EXTERN_INLINE StgHalfWord GET_TAG(const StgClosure *con) [1]: Technically we should set 'rs' to `NULL | flip`. */ -#define SET_PROF_HDR(c,ccs_) \ - ((c)->header.prof.ccs = ccs_, \ - LDV_RECORD_CREATE((c))) +/* + MP: Various other places use the check era > 0 to check whether LDV profiling + is enabled. The use of these predicates here is the reason for including RtsFlags.h in + a lot of places. + + We could also check user_era > 0 for eras profiling, which would remove the need + for so many includes. +*/ +#define SET_PROF_HDR(c, ccs_) \ + { \ + (c)->header.prof.ccs = ccs_; \ + if (doingLDVProfiling()) { \ + LDV_RECORD_CREATE((c)); \ + } \ +\ + if (doingRetainerProfiling()) { \ + LDV_RECORD_CREATE((c)); \ + }; \ + if (doingErasProfiling()){ \ + ERA_RECORD_CREATE((c)); \ + }; \ + } + #else #define SET_PROF_HDR(c,ccs) #endif @@ -185,6 +205,7 @@ EXTERN_INLINE StgHalfWord GET_TAG(const StgClosure *con) #define OVERWRITE_INFO(c, new_info) \ OVERWRITING_CLOSURE((StgClosure *)(c)); \ SET_INFO_RELAXED((StgClosure *)(c), (new_info)); \ + /* MP: Should this be SET_PROF_HEADER? */ \ LDV_RECORD_CREATE(c); /* ----------------------------------------------------------------------------- diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h index 915dfc4f6fca30945abd87fef45d312b7d937078..f0b8b346e26f64e1ab1278f13e651371ee0b5832 100644 --- a/rts/include/rts/storage/Closures.h +++ b/rts/include/rts/storage/Closures.h @@ -29,6 +29,7 @@ typedef struct { union { StgWord trav; /* Heap traversal */ StgWord ldvw; /* Lag/Drag/Void Word */ + StgWord era; /* User-era */ } hp; // Heap profiling header. This field is shared among the various heap // profiling modes. Currently it is used by ProfHeap.c for Lag/Drag/Void diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h index c5a1681df606ad3de05540cce356a800c1f7bd21..ef303bd4e0390a0e34a57c23c92ab1fdae32583b 100644 --- a/rts/include/stg/MiscClosures.h +++ b/rts/include/stg/MiscClosures.h @@ -627,6 +627,7 @@ extern StgWord RTS_VAR(stable_name_table); // Profiling.c extern unsigned int RTS_VAR(era); +extern StgWord RTS_VAR(user_era); extern unsigned int RTS_VAR(entering_PAP); extern StgWord CCS_OVERHEAD[]; extern StgWord CCS_SYSTEM[]; diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c index 1f40402c63471e24f2501b57c07a4ae6417fd1bd..50918d11cf99b9846287b9b88926d9b04a3530a2 100644 --- a/rts/sm/CNF.c +++ b/rts/sm/CNF.c @@ -13,6 +13,7 @@ #include "rts/PosixSource.h" #include "Rts.h" #include "RtsUtils.h" +#include "RtsFlags.h" #include "Capability.h" #include "GC.h" diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 4eba066928314ecda90b644a1df502b60bfe27b5..724ce6594a4d5e273456f674fa887a83eda282d3 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -16,6 +16,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "Evac.h" #include "Storage.h" @@ -260,7 +261,9 @@ copy_tag(StgClosure **p, const StgInfoTable *info, // profiler when it encounters this closure in // processHeapClosureForDead. So we reset the LDVW field // here. - LDVW(to) = 0; + if (doingLDVProfiling()){ + LDVW(to) = 0; + } #endif return evacuate(p); // does the failed_to_evac stuff } else { @@ -1213,9 +1216,13 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val) SET_INFO_RELEASE((StgClosure *)p, &stg_IND_info); } +#if defined(PROFILING) // For the purposes of LDV profiling, we have created an // indirection. - LDV_RECORD_CREATE(p); + if (doingLDVProfiling()){ + LDV_RECORD_CREATE(p); + } +#endif p = prev; } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 14a858e141ed6c14d575790e2ea5c4c871896d44..722d9dc19b29c9f16cb486b63eff7293c79cfd4b 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -25,6 +25,7 @@ #include "rts/PosixSource.h" #include "Rts.h" +#include "RtsFlags.h" #include "Storage.h" #include "GCThread.h" diff --git a/testsuite/tests/interface-stability/ghc-experimental-exports.stdout b/testsuite/tests/interface-stability/ghc-experimental-exports.stdout index 34480e4ef503682a683ca5022cdf38bef3e36492..1a8abff937e18e080eb94622d5ad873b1317fef0 100644 --- a/testsuite/tests/interface-stability/ghc-experimental-exports.stdout +++ b/testsuite/tests/interface-stability/ghc-experimental-exports.stdout @@ -1,3 +1,9 @@ +module GHC.Profiling.Eras where + -- Safety: Trustworthy + getUserEra :: GHC.Types.IO GHC.Types.Word + incrementUserEra :: GHC.Types.Word -> GHC.Types.IO GHC.Types.Word + setUserEra :: GHC.Types.Word -> GHC.Types.IO () + -- Instances: