Commit 50de6034 authored by Simon Marlow's avatar Simon Marlow

Make profiling work with multiple capabilities (+RTS -N)

This means that both time and heap profiling work for parallel
programs.  Main internal changes:

  - CCCS is no longer a global variable; it is now another
    pseudo-register in the StgRegTable struct.  Thus every
    Capability has its own CCCS.

  - There is a new built-in CCS called "IDLE", which records ticks for
    Capabilities in the idle state.  If you profile a single-threaded
    program with +RTS -N2, you'll see about 50% of time in "IDLE".

  - There is appropriate locking in rts/Profiling.c to protect the
    shared cost-centre-stack data structures.

This patch does enough to get it working, I have cut one big corner:
the cost-centre-stack data structure is still shared amongst all
Capabilities, which means that multiple Capabilities will race when
updating the "allocations" and "entries" fields of a CCS.  Not only
does this give unpredictable results, but it runs very slowly due to
cache line bouncing.

It is strongly recommended that you use -fno-prof-count-entries to
disable the "entries" count when profiling parallel programs. (I shall
add a note to this effect to the docs).
parent 1c2b8381
...@@ -343,7 +343,8 @@ data GlobalReg ...@@ -343,7 +343,8 @@ data GlobalReg
| SpLim -- Stack limit | SpLim -- Stack limit
| Hp -- Heap ptr; points to last occupied heap location. | Hp -- Heap ptr; points to last occupied heap location.
| HpLim -- Heap limit register | HpLim -- Heap limit register
| CurrentTSO -- pointer to current thread's TSO | CCCS -- Current cost-centre stack
| CurrentTSO -- pointer to current thread's TSO
| CurrentNursery -- pointer to allocation area | CurrentNursery -- pointer to allocation area
| HpAlloc -- allocation count for heap check failure | HpAlloc -- allocation count for heap check failure
...@@ -395,6 +396,7 @@ instance Ord GlobalReg where ...@@ -395,6 +396,7 @@ instance Ord GlobalReg where
compare SpLim SpLim = EQ compare SpLim SpLim = EQ
compare Hp Hp = EQ compare Hp Hp = EQ
compare HpLim HpLim = EQ compare HpLim HpLim = EQ
compare CCCS CCCS = EQ
compare CurrentTSO CurrentTSO = EQ compare CurrentTSO CurrentTSO = EQ
compare CurrentNursery CurrentNursery = EQ compare CurrentNursery CurrentNursery = EQ
compare HpAlloc HpAlloc = EQ compare HpAlloc HpAlloc = EQ
...@@ -419,6 +421,8 @@ instance Ord GlobalReg where ...@@ -419,6 +421,8 @@ instance Ord GlobalReg where
compare _ Hp = GT compare _ Hp = GT
compare HpLim _ = LT compare HpLim _ = LT
compare _ HpLim = GT compare _ HpLim = GT
compare CCCS _ = LT
compare _ CCCS = GT
compare CurrentTSO _ = LT compare CurrentTSO _ = LT
compare _ CurrentTSO = GT compare _ CurrentTSO = GT
compare CurrentNursery _ = LT compare CurrentNursery _ = LT
......
...@@ -106,8 +106,9 @@ $white_no_nl+ ; ...@@ -106,8 +106,9 @@ $white_no_nl+ ;
SpLim { global_reg SpLim } SpLim { global_reg SpLim }
Hp { global_reg Hp } Hp { global_reg Hp }
HpLim { global_reg HpLim } HpLim { global_reg HpLim }
CurrentTSO { global_reg CurrentTSO } CCCS { global_reg CCCS }
CurrentNursery { global_reg CurrentNursery } CurrentTSO { global_reg CurrentTSO }
CurrentNursery { global_reg CurrentNursery }
HpAlloc { global_reg HpAlloc } HpAlloc { global_reg HpAlloc }
BaseReg { global_reg BaseReg } BaseReg { global_reg BaseReg }
......
...@@ -775,8 +775,9 @@ isPtrGlobalReg Sp = True ...@@ -775,8 +775,9 @@ isPtrGlobalReg Sp = True
isPtrGlobalReg SpLim = True isPtrGlobalReg SpLim = True
isPtrGlobalReg Hp = True isPtrGlobalReg Hp = True
isPtrGlobalReg HpLim = True isPtrGlobalReg HpLim = True
isPtrGlobalReg CurrentTSO = True isPtrGlobalReg CCCS = True
isPtrGlobalReg CurrentNursery = True isPtrGlobalReg CurrentTSO = True
isPtrGlobalReg CurrentNursery = True
isPtrGlobalReg (VanillaReg _ VGcPtr) = True isPtrGlobalReg (VanillaReg _ VGcPtr) = True
isPtrGlobalReg _ = False isPtrGlobalReg _ = False
......
...@@ -271,6 +271,7 @@ pprGlobalReg gr ...@@ -271,6 +271,7 @@ pprGlobalReg gr
SpLim -> ptext (sLit "SpLim") SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp") Hp -> ptext (sLit "Hp")
HpLim -> ptext (sLit "HpLim") HpLim -> ptext (sLit "HpLim")
CCCS -> ptext (sLit "CCCS")
CurrentTSO -> ptext (sLit "CurrentTSO") CurrentTSO -> ptext (sLit "CurrentTSO")
CurrentNursery -> ptext (sLit "CurrentNursery") CurrentNursery -> ptext (sLit "CurrentNursery")
HpAlloc -> ptext (sLit "HpAlloc") HpAlloc -> ptext (sLit "HpAlloc")
......
...@@ -670,6 +670,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC ...@@ -670,6 +670,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC
restoreCurrentCostCentre (Just slot) freeit restoreCurrentCostCentre (Just slot) freeit
= do { sp_rel <- getSpRelOffset slot = do { sp_rel <- getSpRelOffset slot
; whenC freeit (freeStackSlots [slot]) ; whenC freeit (freeStackSlots [slot])
; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) } ; stmtC (storeCurCCS (CmmLoad sp_rel bWord)) }
\end{code} \end{code}
...@@ -316,9 +316,10 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do ...@@ -316,9 +316,10 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
-- Do the business -- Do the business
; funWrapper cl_info reg_args reg_save_code $ do ; funWrapper cl_info reg_args reg_save_code $ do
{ tickyEnterFun cl_info { tickyEnterFun cl_info
; enterCostCentreFun cc $ ; enterCostCentreFun cc
CmmMachOp mo_wordSub [ CmmReg nodeReg (CmmMachOp mo_wordSub [ CmmReg nodeReg
, CmmLit (mkIntCLit (funTag cl_info)) ] , CmmLit (mkIntCLit (funTag cl_info)) ])
(node : map snd reg_args) -- live regs
; cgExpr body } ; cgExpr body }
} }
......
...@@ -240,8 +240,8 @@ emitLoadThreadState = do ...@@ -240,8 +240,8 @@ emitLoadThreadState = do
emitOpenNursery emitOpenNursery
-- and load the current cost centre stack from the TSO when profiling: -- and load the current cost centre stack from the TSO when profiling:
when opt_SccProfilingOn $ when opt_SccProfilingOn $
stmtC (CmmStore curCCSAddr stmtC $ storeCurCCS $
(CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord
emitOpenNursery :: Code emitOpenNursery :: Code
emitOpenNursery = stmtsC [ emitOpenNursery = stmtsC [
......
...@@ -21,7 +21,7 @@ module CgProf ( ...@@ -21,7 +21,7 @@ module CgProf (
enterCostCentreThunk, enterCostCentreThunk,
enterCostCentreFun, enterCostCentreFun,
costCentreFrom, costCentreFrom,
curCCS, curCCSAddr, curCCS, storeCurCCS,
emitCostCentreDecl, emitCostCentreStackDecl, emitCostCentreDecl, emitCostCentreStackDecl,
emitSetCCC, emitSetCCC,
...@@ -66,11 +66,10 @@ import Control.Monad ...@@ -66,11 +66,10 @@ import Control.Monad
-- Expression representing the current cost centre stack -- Expression representing the current cost centre stack
curCCS :: CmmExpr curCCS :: CmmExpr
curCCS = CmmLoad curCCSAddr bWord curCCS = CmmReg (CmmGlobal CCCS)
-- Address of current CCS variable, for storing into storeCurCCS :: CmmExpr -> CmmStmt
curCCSAddr :: CmmExpr storeCurCCS e = CmmAssign (CmmGlobal CCCS) e
curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc) mkCCostCentre cc = CmmLabel (mkCCLabel cc)
...@@ -135,14 +134,15 @@ profAlloc words ccs ...@@ -135,14 +134,15 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> Code enterCostCentreThunk :: CmmExpr -> Code
enterCostCentreThunk closure = enterCostCentreThunk closure =
ifProfiling $ do ifProfiling $ do
stmtC $ CmmStore curCCSAddr (costCentreFrom closure) stmtC $ storeCurCCS (costCentreFrom closure)
enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code enterCostCentreFun :: CostCentreStack -> CmmExpr -> [GlobalReg] -> Code
enterCostCentreFun ccs closure = enterCostCentreFun ccs closure vols =
ifProfiling $ do ifProfiling $ do
if isCurrentCCS ccs if isCurrentCCS ccs
then emitRtsCall rtsPackageId (fsLit "enterFunCCS") then emitRtsCallWithVols rtsPackageId (fsLit "enterFunCCS")
[CmmHinted (costCentreFrom closure) AddrHint] [CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint,
CmmHinted (costCentreFrom closure) AddrHint] vols
else return () -- top-level function, nothing to do else return () -- top-level function, nothing to do
ifProfiling :: Code -> Code ifProfiling :: Code -> Code
...@@ -226,7 +226,7 @@ emitSetCCC cc tick push ...@@ -226,7 +226,7 @@ emitSetCCC cc tick push
tmp <- newTemp bWord -- TODO FIXME NOW tmp <- newTemp bWord -- TODO FIXME NOW
pushCostCentre tmp curCCS cc pushCostCentre tmp curCCS cc
when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp))) when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp)))
when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) when push $ stmtC (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code
pushCostCentre result ccs cc pushCostCentre result ccs cc
......
...@@ -286,7 +286,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) ...@@ -286,7 +286,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load)
caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save) caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save) caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery, system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
{-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ] {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
regs_to_save = system_regs ++ vol_list regs_to_save = system_regs ++ vol_list
...@@ -384,6 +384,9 @@ callerSaves Hp = True ...@@ -384,6 +384,9 @@ callerSaves Hp = True
#ifdef CALLER_SAVES_HpLim #ifdef CALLER_SAVES_HpLim
callerSaves HpLim = True callerSaves HpLim = True
#endif #endif
#ifdef CALLER_SAVES_CCCS
callerSaves CCCS = True
#endif
#ifdef CALLER_SAVES_CurrentTSO #ifdef CALLER_SAVES_CurrentTSO
callerSaves CurrentTSO = True callerSaves CurrentTSO = True
#endif #endif
...@@ -423,6 +426,7 @@ baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 ...@@ -423,6 +426,7 @@ baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")") baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
baseRegOffset Hp = oFFSET_StgRegTable_rHp baseRegOffset Hp = oFFSET_StgRegTable_rHp
baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
......
...@@ -210,8 +210,8 @@ loadThreadState tso stack = do ...@@ -210,8 +210,8 @@ loadThreadState tso stack = do
openNursery, openNursery,
-- and load the current cost centre stack from the TSO when profiling: -- and load the current cost centre stack from the TSO when profiling:
if opt_SccProfilingOn then if opt_SccProfilingOn then
mkStore curCCSAddr storeCurCCS
(CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType) (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)
else mkNop] else mkNop]
emitLoadThreadState :: LocalReg -> LocalReg -> FCode () emitLoadThreadState :: LocalReg -> LocalReg -> FCode ()
emitLoadThreadState tso stack = emit $ loadThreadState tso stack emitLoadThreadState tso stack = emit $ loadThreadState tso stack
......
...@@ -21,7 +21,7 @@ module StgCmmProf ( ...@@ -21,7 +21,7 @@ module StgCmmProf (
dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
enterCostCentreThunk, enterCostCentreThunk,
costCentreFrom, costCentreFrom,
curCCS, curCCSAddr, curCCS, storeCurCCS,
emitSetCCC, emitSetCCC,
saveCurrentCostCentre, restoreCurrentCostCentre, saveCurrentCostCentre, restoreCurrentCostCentre,
...@@ -73,11 +73,10 @@ ccType :: CmmType -- Type of a cost centre ...@@ -73,11 +73,10 @@ ccType :: CmmType -- Type of a cost centre
ccType = bWord ccType = bWord
curCCS :: CmmExpr curCCS :: CmmExpr
curCCS = CmmLoad curCCSAddr ccsType curCCS = CmmReg (CmmGlobal CCCS)
-- Address of current CCS variable, for storing into storeCurCCS :: CmmExpr -> CmmAGraph
curCCSAddr :: CmmExpr storeCurCCS e = mkAssign (CmmGlobal CCCS) e
curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc) mkCCostCentre cc = CmmLabel (mkCCLabel cc)
...@@ -150,7 +149,7 @@ restoreCurrentCostCentre :: Maybe LocalReg -> FCode () ...@@ -150,7 +149,7 @@ restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
restoreCurrentCostCentre Nothing restoreCurrentCostCentre Nothing
= return () = return ()
restoreCurrentCostCentre (Just local_cc) restoreCurrentCostCentre (Just local_cc)
= emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
...@@ -186,7 +185,7 @@ profAlloc words ccs ...@@ -186,7 +185,7 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> FCode () enterCostCentreThunk :: CmmExpr -> FCode ()
enterCostCentreThunk closure = enterCostCentreThunk closure =
ifProfiling $ do ifProfiling $ do
emit $ mkStore curCCSAddr (costCentreFrom closure) emit $ storeCurCCS (costCentreFrom closure)
ifProfiling :: FCode () -> FCode () ifProfiling :: FCode () -> FCode ()
ifProfiling code ifProfiling code
...@@ -269,7 +268,7 @@ emitSetCCC cc tick push ...@@ -269,7 +268,7 @@ emitSetCCC cc tick push
tmp <- newTemp ccsType -- TODO FIXME NOW tmp <- newTemp ccsType -- TODO FIXME NOW
pushCostCentre tmp curCCS cc pushCostCentre tmp curCCS cc
when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp))) when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp)))
when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
pushCostCentre result ccs cc pushCostCentre result ccs cc
......
...@@ -253,7 +253,7 @@ callerSaveVolatileRegs = (caller_save, caller_load) ...@@ -253,7 +253,7 @@ callerSaveVolatileRegs = (caller_save, caller_load)
caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save)
caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save)
system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery
{- ,SparkHd,SparkTl,SparkBase,SparkLim -} {- ,SparkHd,SparkTl,SparkBase,SparkLim -}
, BaseReg ] , BaseReg ]
...@@ -366,6 +366,9 @@ callerSaves Hp = True ...@@ -366,6 +366,9 @@ callerSaves Hp = True
#ifdef CALLER_SAVES_HpLim #ifdef CALLER_SAVES_HpLim
callerSaves HpLim = True callerSaves HpLim = True
#endif #endif
#ifdef CALLER_SAVES_CCCS
callerSaves CCCS = True
#endif
#ifdef CALLER_SAVES_CurrentTSO #ifdef CALLER_SAVES_CurrentTSO
callerSaves CurrentTSO = True callerSaves CurrentTSO = True
#endif #endif
...@@ -385,7 +388,8 @@ baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim ...@@ -385,7 +388,8 @@ baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
baseRegOffset Hp = oFFSET_StgRegTable_rHp baseRegOffset Hp = oFFSET_StgRegTable_rHp
baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
baseRegOffset GCEnter1 = oFFSET_stgGCEnter1 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
......
...@@ -372,7 +372,7 @@ ...@@ -372,7 +372,7 @@
CCCS_ALLOC(bytes); CCCS_ALLOC(bytes);
/* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), W_[CCCS]) #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
#define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \ #define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \
HP_CHK_GEN(alloc,liveness,reentry); \ HP_CHK_GEN(alloc,liveness,reentry); \
......
...@@ -37,6 +37,15 @@ typedef struct StgClosure_ *HaskellObj; ...@@ -37,6 +37,15 @@ typedef struct StgClosure_ *HaskellObj;
*/ */
typedef struct Capability_ Capability; typedef struct Capability_ Capability;
/*
* The public view of a Capability: we can be sure it starts with
* these two components (but it may have more private fields).
*/
typedef struct CapabilityPublic_ {
StgFunTable f;
StgRegTable r;
} CapabilityPublic;
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
RTS configuration settings, for passing to hs_init_ghc() RTS configuration settings, for passing to hs_init_ghc()
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
......
...@@ -222,6 +222,7 @@ main(int argc, char *argv[]) ...@@ -222,6 +222,7 @@ main(int argc, char *argv[])
field_offset(StgRegTable, rSpLim); field_offset(StgRegTable, rSpLim);
field_offset(StgRegTable, rHp); field_offset(StgRegTable, rHp);
field_offset(StgRegTable, rHpLim); field_offset(StgRegTable, rHpLim);
field_offset(StgRegTable, rCCCS);
field_offset(StgRegTable, rCurrentTSO); field_offset(StgRegTable, rCurrentTSO);
field_offset(StgRegTable, rCurrentNursery); field_offset(StgRegTable, rCurrentNursery);
field_offset(StgRegTable, rHpAlloc); field_offset(StgRegTable, rHpAlloc);
......
...@@ -114,8 +114,6 @@ typedef struct _IndexTable { ...@@ -114,8 +114,6 @@ typedef struct _IndexTable {
Pre-defined cost centres and cost centre stacks Pre-defined cost centres and cost centre stacks
-------------------------------------------------------------------------- */ -------------------------------------------------------------------------- */
extern CostCentreStack * RTS_VAR(CCCS); /* current CCS */
#if IN_STG_CODE #if IN_STG_CODE
extern StgWord CC_MAIN[]; extern StgWord CC_MAIN[];
...@@ -153,6 +151,9 @@ extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set ...@@ -153,6 +151,9 @@ extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set
extern CostCentre CC_PINNED[]; extern CostCentre CC_PINNED[];
extern CostCentreStack CCS_PINNED[]; // pinned memory extern CostCentreStack CCS_PINNED[]; // pinned memory
extern CostCentre CC_IDLE[];
extern CostCentreStack CCS_IDLE[]; // capability is idle
#endif /* IN_STG_CODE */ #endif /* IN_STG_CODE */
extern unsigned int RTS_VAR(CC_ID); // global ids extern unsigned int RTS_VAR(CC_ID); // global ids
...@@ -165,7 +166,7 @@ extern unsigned int RTS_VAR(era); ...@@ -165,7 +166,7 @@ extern unsigned int RTS_VAR(era);
* ---------------------------------------------------------------------------*/ * ---------------------------------------------------------------------------*/
CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *); CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *);
void enterFunCCS (CostCentreStack *); void enterFunCCS (StgRegTable *reg, CostCentreStack *);
/* ----------------------------------------------------------------------------- /* -----------------------------------------------------------------------------
Registering CCs and CCSs Registering CCs and CCSs
......
...@@ -488,7 +488,6 @@ extern StgWord RTS_VAR(stable_ptr_table); ...@@ -488,7 +488,6 @@ extern StgWord RTS_VAR(stable_ptr_table);
// Profiling.c // Profiling.c
extern unsigned int RTS_VAR(era); extern unsigned int RTS_VAR(era);
extern StgWord RTS_VAR(CCCS); /* current CCS */
extern unsigned int RTS_VAR(entering_PAP); extern unsigned int RTS_VAR(entering_PAP);
extern StgWord RTS_VAR(CC_LIST); /* registered CC list */ extern StgWord RTS_VAR(CC_LIST); /* registered CC list */
extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */ extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */
......
...@@ -80,6 +80,7 @@ typedef struct StgRegTable_ { ...@@ -80,6 +80,7 @@ typedef struct StgRegTable_ {
StgPtr rSpLim; StgPtr rSpLim;
StgPtr rHp; StgPtr rHp;
StgPtr rHpLim; StgPtr rHpLim;
struct _CostCentreStack * rCCCS; // current cost-centre-stack
struct StgTSO_ * rCurrentTSO; struct StgTSO_ * rCurrentTSO;
struct nursery_ * rNursery; struct nursery_ * rNursery;
struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */ struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */
......
...@@ -86,7 +86,7 @@ stg_PAP_apply ...@@ -86,7 +86,7 @@ stg_PAP_apply
TICK_ENT_PAP(); TICK_ENT_PAP();
LDV_ENTER(pap); LDV_ENTER(pap);
#ifdef PROFILING #ifdef PROFILING
foreign "C" enterFunCCS(StgHeader_ccs(pap)); foreign "C" enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr");
#endif #endif
// Reload the stack // Reload the stack
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \ TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \
TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \ TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \
pap = Hp + WDS(1) - size; \ pap = Hp + WDS(1) - size; \
SET_HDR(pap, stg_PAP_info, W_[CCCS]); \ SET_HDR(pap, stg_PAP_info, CCCS); \
StgPAP_arity(pap) = HALF_W_(arity - m); \ StgPAP_arity(pap) = HALF_W_(arity - m); \
StgPAP_fun(pap) = R1; \ StgPAP_fun(pap) = R1; \
StgPAP_n_args(pap) = HALF_W_(n); \ StgPAP_n_args(pap) = HALF_W_(n); \
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \ TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \
TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \ TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \
new_pap = Hp + WDS(1) - size; \ new_pap = Hp + WDS(1) - size; \
SET_HDR(new_pap, stg_PAP_info, W_[CCCS]); \ SET_HDR(new_pap, stg_PAP_info, CCCS); \
StgPAP_arity(new_pap) = HALF_W_(arity - m); \ StgPAP_arity(new_pap) = HALF_W_(arity - m); \
W_ n_args; \ W_ n_args; \
n_args = TO_W_(StgPAP_n_args(pap)); \ n_args = TO_W_(StgPAP_n_args(pap)); \
...@@ -78,10 +78,10 @@ ...@@ -78,10 +78,10 @@
// Jump to target, saving CCCS and restoring it on return // Jump to target, saving CCCS and restoring it on return
#if defined(PROFILING) #if defined(PROFILING)
#define jump_SAVE_CCCS(target) \ #define jump_SAVE_CCCS(target) \
Sp(-1) = W_[CCCS]; \ Sp(-1) = CCCS; \
Sp(-2) = stg_restore_cccs_info; \ Sp(-2) = stg_restore_cccs_info; \
Sp_adj(-2); \ Sp_adj(-2); \
jump (target) jump (target)
#else #else
#define jump_SAVE_CCCS(target) jump (target) #define jump_SAVE_CCCS(target) jump (target)
......
...@@ -46,7 +46,7 @@ volatile StgWord waiting_for_gc = 0; ...@@ -46,7 +46,7 @@ volatile StgWord waiting_for_gc = 0;
/* Let foreign code get the current Capability -- assuming there is one! /* Let foreign code get the current Capability -- assuming there is one!
* This is useful for unsafe foreign calls because they are called with * This is useful for unsafe foreign calls because they are called with
* the current Capability held, but they are not passed it. For example, * the current Capability held, but they are not passed it. For example,
* see see the integer-gmp package which calls allocateLocal() in its * see see the integer-gmp package which calls allocate() in its
* stgAllocForGMP() function (which gets called by gmp functions). * stgAllocForGMP() function (which gets called by gmp functions).
* */ * */
Capability * rts_unsafeGetMyCapability (void) Capability * rts_unsafeGetMyCapability (void)
...@@ -265,6 +265,10 @@ initCapability( Capability *cap, nat i ) ...@@ -265,6 +265,10 @@ initCapability( Capability *cap, nat i )
cap->context_switch = 0; cap->context_switch = 0;
cap->pinned_object_block = NULL; cap->pinned_object_block = NULL;
#ifdef PROFILING
cap->r.rCCCS = CCS_SYSTEM;
#endif
traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i); traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i);
traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i); traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i);
#if defined(THREADED_RTS) #if defined(THREADED_RTS)
...@@ -453,6 +457,9 @@ releaseCapability_ (Capability* cap, ...@@ -453,6 +457,9 @@ releaseCapability_ (Capability* cap,
} }
} }
#ifdef PROFILING
cap->r.rCCCS = CCS_IDLE;
#endif
last_free_capability = cap; last_free_capability = cap;