Commit 7bb0447d authored by Simon Marlow's avatar Simon Marlow

Overhaul of infrastructure for profiling, coverage (HPC) and breakpoints

User visible changes
====================

Profilng
--------

Flags renamed (the old ones are still accepted for now):

  OLD            NEW
  ---------      ------------
  -auto-all      -fprof-auto
  -auto          -fprof-exported
  -caf-all       -fprof-cafs

New flags:

  -fprof-auto              Annotates all bindings (not just top-level
                           ones) with SCCs

  -fprof-top               Annotates just top-level bindings with SCCs

  -fprof-exported          Annotates just exported bindings with SCCs

  -fprof-no-count-entries  Do not maintain entry counts when profiling
                           (can make profiled code go faster; useful with
                           heap profiling where entry counts are not used)

Cost-centre stacks have a new semantics, which should in most cases
result in more useful and intuitive profiles.  If you find this not to
be the case, please let me know.  This is the area where I have been
experimenting most, and the current solution is probably not the
final version, however it does address all the outstanding bugs and
seems to be better than GHC 7.2.

Stack traces
------------

+RTS -xc now gives more information.  If the exception originates from
a CAF (as is common, because GHC tends to lift exceptions out to the
top-level), then the RTS walks up the stack and reports the stack in
the enclosing update frame(s).

Result: +RTS -xc is much more useful now - but you still have to
compile for profiling to get it.  I've played around a little with
adding 'head []' to GHC itself, and +RTS -xc does pinpoint the problem
quite accurately.

I plan to add more facilities for stack tracing (e.g. in GHCi) in the
future.

Coverage (HPC)
--------------

 * derived instances are now coloured yellow if they weren't used
 * likewise record field names
 * entry counts are more accurate (hpc --fun-entry-count)
 * tab width is now correct (markup was previously off in source with
   tabs)

Internal changes
================

In Core, the Note constructor has been replaced by

        Tick (Tickish b) (Expr b)

which is used to represent all the kinds of source annotation we
support: profiling SCCs, HPC ticks, and GHCi breakpoints.

Depending on the properties of the Tickish, different transformations
apply to Tick.  See CoreUtils.mkTick for details.

Tickets
=======

This commit closes the following tickets, test cases to follow:

  - Close #2552: not a bug, but the behaviour is now more intuitive
    (test is T2552)

  - Close #680 (test is T680)

  - Close #1531 (test is result001)

  - Close #949 (test is T949)

  - Close #2466: test case has bitrotted (doesn't compile against current
    version of vector-space package)
parent bd72eeb1
...@@ -54,8 +54,7 @@ module Id ( ...@@ -54,8 +54,7 @@ module Id (
isFCallId, isFCallId_maybe, isFCallId, isFCallId_maybe,
isDataConWorkId, isDataConWorkId_maybe, isDataConId_maybe, idDataCon, isDataConWorkId, isDataConWorkId_maybe, isDataConId_maybe, idDataCon,
isConLikeId, isBottomingId, idIsFrom, isConLikeId, isBottomingId, idIsFrom,
isTickBoxOp, isTickBoxOp_maybe, hasNoBinding,
hasNoBinding,
-- ** Evidence variables -- ** Evidence variables
DictId, isDictId, isEvVar, DictId, isDictId, isEvVar,
...@@ -426,20 +425,6 @@ isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr) ...@@ -426,20 +425,6 @@ isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr)
| otherwise = False -- TyVars count as not dead | otherwise = False -- TyVars count as not dead
\end{code} \end{code}
\begin{code}
isTickBoxOp :: Id -> Bool
isTickBoxOp id =
case Var.idDetails id of
TickBoxOpId _ -> True
_ -> False
isTickBoxOp_maybe :: Id -> Maybe TickBoxOp
isTickBoxOp_maybe id =
case Var.idDetails id of
TickBoxOpId tick -> Just tick
_ -> Nothing
\end{code}
%************************************************************************ %************************************************************************
%* * %* *
Evidence variables Evidence variables
......
...@@ -15,8 +15,7 @@ have a standard form, namely: ...@@ -15,8 +15,7 @@ have a standard form, namely:
module MkId ( module MkId (
mkDictFunId, mkDictFunTy, mkDictSelId, mkDictFunId, mkDictFunTy, mkDictSelId,
mkDataConIds, mkDataConIds, mkPrimOpId, mkFCallId,
mkPrimOpId, mkFCallId, mkTickBoxOpId, mkBreakPointOpId,
mkReboxingAlt, wrapNewTypeBody, unwrapNewTypeBody, mkReboxingAlt, wrapNewTypeBody, unwrapNewTypeBody,
wrapFamInstBody, unwrapFamInstScrut, wrapFamInstBody, unwrapFamInstScrut,
...@@ -65,7 +64,6 @@ import Pair ...@@ -65,7 +64,6 @@ import Pair
import Outputable import Outputable
import FastString import FastString
import ListSetOps import ListSetOps
import Module
\end{code} \end{code}
%************************************************************************ %************************************************************************
...@@ -766,30 +764,6 @@ mkFCallId uniq fcall ty ...@@ -766,30 +764,6 @@ mkFCallId uniq fcall ty
(arg_tys, _) = tcSplitFunTys tau (arg_tys, _) = tcSplitFunTys tau
arity = length arg_tys arity = length arg_tys
strict_sig = mkStrictSig (mkTopDmdType (replicate arity evalDmd) TopRes) strict_sig = mkStrictSig (mkTopDmdType (replicate arity evalDmd) TopRes)
-- Tick boxes and breakpoints are both represented as TickBoxOpIds,
-- except for the type:
--
-- a plain HPC tick box has type (State# RealWorld)
-- a breakpoint Id has type forall a.a
--
-- The breakpoint Id will be applied to a list of arbitrary free variables,
-- which is why it needs a polymorphic type.
mkTickBoxOpId :: Unique -> Module -> TickBoxId -> Id
mkTickBoxOpId uniq mod ix = mkTickBox' uniq mod ix realWorldStatePrimTy
mkBreakPointOpId :: Unique -> Module -> TickBoxId -> Id
mkBreakPointOpId uniq mod ix = mkTickBox' uniq mod ix ty
where ty = mkSigmaTy [openAlphaTyVar] [] openAlphaTy
mkTickBox' :: Unique -> Module -> TickBoxId -> Type -> Id
mkTickBox' uniq mod ix ty = mkGlobalId (TickBoxOpId tickbox) name ty info
where
tickbox = TickBox mod ix
occ_str = showSDoc (braces (ppr tickbox))
name = mkTickBoxOpName uniq occ_str
info = noCafIdInfo
\end{code} \end{code}
......
...@@ -40,9 +40,8 @@ module Name ( ...@@ -40,9 +40,8 @@ module Name (
mkSystemName, mkSystemNameAt, mkSystemName, mkSystemNameAt,
mkInternalName, mkDerivedInternalName, mkInternalName, mkDerivedInternalName,
mkSystemVarName, mkSysTvName, mkSystemVarName, mkSysTvName,
mkFCallName, mkFCallName,
mkTickBoxOpName, mkExternalName, mkWiredInName,
mkExternalName, mkWiredInName,
-- ** Manipulating and deconstructing 'Name's -- ** Manipulating and deconstructing 'Name's
nameUnique, setNameUnique, nameUnique, setNameUnique,
...@@ -254,8 +253,8 @@ mkInternalName uniq occ loc = Name { n_uniq = getKeyFastInt uniq ...@@ -254,8 +253,8 @@ mkInternalName uniq occ loc = Name { n_uniq = getKeyFastInt uniq
-- * the insides of the compiler don't care: they use the Unique -- * the insides of the compiler don't care: they use the Unique
-- * when printing for -ddump-xxx you can switch on -dppr-debug to get the -- * when printing for -ddump-xxx you can switch on -dppr-debug to get the
-- uniques if you get confused -- uniques if you get confused
-- * for interface files we tidyCore first, which puts the uniques -- * for interface files we tidyCore first, which makes
-- into the print name (see setNameVisibility below) -- the OccNames distinct when they need to be
mkDerivedInternalName :: (OccName -> OccName) -> Unique -> Name -> Name mkDerivedInternalName :: (OccName -> OccName) -> Unique -> Name -> Name
mkDerivedInternalName derive_occ uniq (Name { n_occ = occ, n_loc = loc }) mkDerivedInternalName derive_occ uniq (Name { n_occ = occ, n_loc = loc })
...@@ -291,15 +290,8 @@ mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs) ...@@ -291,15 +290,8 @@ mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs)
-- | Make a name for a foreign call -- | Make a name for a foreign call
mkFCallName :: Unique -> String -> Name mkFCallName :: Unique -> String -> Name
-- The encoded string completely describes the ccall mkFCallName uniq str = mkInternalName uniq (mkVarOcc str) noSrcSpan
mkFCallName uniq str = Name { n_uniq = getKeyFastInt uniq, n_sort = Internal, -- The encoded string completely describes the ccall
n_occ = mkVarOcc str, n_loc = noSrcSpan }
mkTickBoxOpName :: Unique -> String -> Name
mkTickBoxOpName uniq str
= Name { n_uniq = getKeyFastInt uniq, n_sort = Internal,
n_occ = mkVarOcc str, n_loc = noSrcSpan }
\end{code} \end{code}
\begin{code} \begin{code}
......
...@@ -798,7 +798,6 @@ stmtMacros :: UniqFM ([CmmExpr] -> Code) ...@@ -798,7 +798,6 @@ stmtMacros :: UniqFM ([CmmExpr] -> Code)
stmtMacros = listToUFM [ stmtMacros = listToUFM [
( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ), ( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ),
( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ), ( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ),
( fsLit "ENTER_CCS_PAP_CL", \[e] -> enterCostCentrePAP e ),
( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ), ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ),
( fsLit "HP_CHK_GEN", \[words,liveness,reentry] -> ( fsLit "HP_CHK_GEN", \[words,liveness,reentry] ->
hpChkGen words liveness reentry ), hpChkGen words liveness reentry ),
......
...@@ -28,6 +28,7 @@ module CgCallConv ( ...@@ -28,6 +28,7 @@ module CgCallConv (
) where ) where
import CgMonad import CgMonad
import CgProf
import SMRep import SMRep
import OldCmm import OldCmm
...@@ -160,10 +161,16 @@ constructSlowCall amodes ...@@ -160,10 +161,16 @@ constructSlowCall amodes
-- fewer arguments than we currently have. -- fewer arguments than we currently have.
slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)]
slowArgs [] = [] slowArgs [] = []
slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest slowArgs amodes
where (arg_pat, args, rest) = matchSlowPattern amodes | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat | otherwise = this_pat ++ slowArgs rest
where
(arg_pat, args, rest) = matchSlowPattern amodes
stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
this_pat = (NonPtrArg, mkLblExpr stg_ap_pat) : args
save_cccs = [(NonPtrArg, mkLblExpr save_cccs_lbl), (NonPtrArg, curCCS)]
save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
matchSlowPattern :: [(CgRep,CmmExpr)] matchSlowPattern :: [(CgRep,CmmExpr)]
-> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) -> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)])
matchSlowPattern amodes = (arg_pat, these, rest) matchSlowPattern amodes = (arg_pat, these, rest)
......
...@@ -108,7 +108,7 @@ cgStdRhsClosure ...@@ -108,7 +108,7 @@ cgStdRhsClosure
-> [StgArg] -- payload -> [StgArg] -- payload
-> FCode (Id, CgIdInfo) -> FCode (Id, CgIdInfo)
cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload cgStdRhsClosure bndr _cc _bndr_info _fvs _args _body lf_info payload
= do -- AHA! A STANDARD-FORM THUNK = do -- AHA! A STANDARD-FORM THUNK
{ -- LAY OUT THE OBJECT { -- LAY OUT THE OBJECT
amodes <- getArgAmodes payload amodes <- getArgAmodes payload
...@@ -122,10 +122,10 @@ cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload ...@@ -122,10 +122,10 @@ cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload
NoC_SRT -- No SRT for a std-form closure NoC_SRT -- No SRT for a std-form closure
descr descr
; (use_cc, blame_cc) <- chooseDynCostCentres cc args body -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
-- BUILD THE OBJECT -- BUILD THE OBJECT
; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets
-- RETURN -- RETURN
; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
...@@ -197,9 +197,9 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do ...@@ -197,9 +197,9 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do
; let ; let
to_amode (info, offset) = do { amode <- idInfoToAmode info to_amode (info, offset) = do { amode <- idInfoToAmode info
; return (amode, offset) } ; return (amode, offset) }
; (use_cc, blame_cc) <- chooseDynCostCentres cc args body -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
; amodes_w_offsets <- mapFCs to_amode bind_details ; amodes_w_offsets <- mapFCs to_amode bind_details
; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets
-- RETURN -- RETURN
; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
...@@ -239,16 +239,15 @@ So it should set up an update frame (if it is shared). ...@@ -239,16 +239,15 @@ So it should set up an update frame (if it is shared).
NB: Thunks cannot have a primitive type! NB: Thunks cannot have a primitive type!
\begin{code} \begin{code}
closureCodeBody _binder_info cl_info cc [{- No args i.e. thunk -}] body = do closureCodeBody _binder_info cl_info _cc [{- No args i.e. thunk -}] body = do
{ body_absC <- getCgStmts $ do { body_absC <- getCgStmts $ do
{ tickyEnterThunk cl_info { tickyEnterThunk cl_info
; ldvEnterClosure cl_info -- NB: Node always points when profiling ; ldvEnterClosure cl_info -- NB: Node always points when profiling
; thunkWrapper cl_info $ do ; thunkWrapper cl_info $ do
-- We only enter cc after setting up update so -- We only enter cc after setting up update so
-- that cc of enclosing scope will be recorded -- that cc of enclosing scope will be recorded
-- in update frame CAF/DICT functions will be -- in the update frame
-- subsumed by this enclosing cc { enterCostCentreThunk (CmmReg nodeReg)
{ enterCostCentre cl_info cc body
; cgExpr body } ; cgExpr body }
} }
...@@ -307,16 +306,14 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do ...@@ -307,16 +306,14 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
; bindArgsToStack stk_args ; bindArgsToStack stk_args
; setRealAndVirtualSp sp_top ; setRealAndVirtualSp sp_top
-- Enter the cost-centre, if required -- Do the business
-- ToDo: It's not clear why this is outside the funWrapper,
-- but the tickyEnterFun is inside. Perhaps we can put
-- them together?
; enterCostCentre cl_info cc body
-- Do the business
; funWrapper cl_info reg_args reg_save_code $ do ; funWrapper cl_info reg_args reg_save_code $ do
{ tickyEnterFun cl_info { tickyEnterFun cl_info
; cgExpr body } ; enterCostCentreFun cc $
CmmMachOp mo_wordSub [ CmmReg nodeReg
, CmmLit (mkIntCLit (funTag cl_info)) ]
; cgExpr body }
} }
\end{code} \end{code}
......
...@@ -223,9 +223,9 @@ buildDynCon' _ binder ccs con args ...@@ -223,9 +223,9 @@ buildDynCon' _ binder ccs con args
where where
lf_info = mkConLFInfo con lf_info = mkConLFInfo con
use_cc -- cost-centre to stick in the object use_cc -- cost-centre to stick in the object
| currentOrSubsumedCCS ccs = curCCS | isCurrentCCS ccs = curCCS
| otherwise = CmmLit (mkCCostCentreStack ccs) | otherwise = panic "buildDynCon: non-current CCS not implemented"
blame_cc = use_cc -- cost-centre on which to blame the alloc (same) blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
\end{code} \end{code}
......
...@@ -258,7 +258,7 @@ SCC expressions are treated specially. They set the current cost ...@@ -258,7 +258,7 @@ SCC expressions are treated specially. They set the current cost
centre. centre.
\begin{code} \begin{code}
cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr cgExpr (StgSCC cc tick push expr) = do emitSetCCC cc tick push; cgExpr expr
\end{code} \end{code}
%******************************************************** %********************************************************
......
...@@ -575,13 +575,7 @@ allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets ...@@ -575,13 +575,7 @@ allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets
-- SAY WHAT WE ARE ABOUT TO DO -- SAY WHAT WE ARE ABOUT TO DO
; profDynAlloc cl_info use_cc ; profDynAlloc cl_info use_cc
-- ToDo: This is almost certainly wrong ; tickyDynAlloc cl_info
-- We're ignoring blame_cc. But until we've
-- fixed the boxing hack in chooseDynCostCentres etc,
-- we're worried about making things worse by "fixing"
-- this part to use blame_cc!
; tickyDynAlloc cl_info
-- ALLOCATE THE OBJECT -- ALLOCATE THE OBJECT
; base <- getHpRelOffset info_offset ; base <- getHpRelOffset info_offset
......
...@@ -10,13 +10,13 @@ module CgProf ( ...@@ -10,13 +10,13 @@ module CgProf (
mkCCostCentre, mkCCostCentreStack, mkCCostCentre, mkCCostCentreStack,
-- Cost-centre Profiling -- Cost-centre Profiling
dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, enterCostCentreThunk,
chooseDynCostCentres, enterCostCentreFun,
costCentreFrom, costCentreFrom,
curCCS, curCCSAddr, curCCS, curCCSAddr,
emitCostCentreDecl, emitCostCentreStackDecl, emitCostCentreDecl, emitCostCentreStackDecl,
emitSetCCC, emitCCS, emitSetCCC,
-- Lag/drag/void stuff -- Lag/drag/void stuff
ldvEnter, ldvEnterClosure, ldvRecordCreate ldvEnter, ldvEnterClosure, ldvRecordCreate
...@@ -40,10 +40,8 @@ import OldCmm ...@@ -40,10 +40,8 @@ import OldCmm
import OldCmmUtils import OldCmmUtils
import CLabel import CLabel
import Id
import qualified Module import qualified Module
import CostCentre import CostCentre
import StgSyn
import StaticFlags import StaticFlags
import FastString import FastString
import Module import Module
...@@ -108,6 +106,9 @@ profDynAlloc cl_info ccs ...@@ -108,6 +106,9 @@ profDynAlloc cl_info ccs
-- | Record the allocation of a closure (size is given by a CmmExpr) -- | Record the allocation of a closure (size is given by a CmmExpr)
-- The size must be in words, because the allocation counter in a CCS counts -- The size must be in words, because the allocation counter in a CCS counts
-- in words. -- in words.
--
-- This API is used by the @CCS_ALLOC()@ macro in @.cmm@ code.
--
profAlloc :: CmmExpr -> CmmExpr -> Code profAlloc :: CmmExpr -> CmmExpr -> Code
profAlloc words ccs profAlloc words ccs
= ifProfiling $ = ifProfiling $
...@@ -121,160 +122,21 @@ profAlloc words ccs ...@@ -121,160 +122,21 @@ profAlloc words ccs
where where
alloc_rep = typeWidth REP_CostCentreStack_mem_alloc alloc_rep = typeWidth REP_CostCentreStack_mem_alloc
-- ----------------------------------------------------------------------
-- Setting the cost centre in a new closure
chooseDynCostCentres :: CostCentreStack
-> [Id] -- Args
-> StgExpr -- Body
-> FCode (CmmExpr, CmmExpr)
-- Called when alllcating a closure
-- Tells which cost centre to put in the object, and which
-- to blame the cost of allocation on
chooseDynCostCentres ccs args body = do
-- Cost-centre we record in the object
use_ccs <- emitCCS ccs
-- Cost-centre on whom we blame the allocation
let blame_ccs
| null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
| otherwise = use_ccs
return (use_ccs, blame_ccs)
-- Some CostCentreStacks are a sequence of pushes on top of CCCS.
-- These pushes must be performed before we can refer to the stack in
-- an expression.
emitCCS :: CostCentreStack -> FCode CmmExpr
emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
where
(cc's, ccs') = decomposeCCS ccs
push_em ccs [] = return ccs
push_em ccs (cc:rest) = do
tmp <- newTemp bWord -- TODO FIXME NOW
pushCostCentre tmp ccs cc
push_em (CmmReg (CmmLocal tmp)) rest
ccsExpr :: CostCentreStack -> CmmExpr
ccsExpr ccs
| isCurrentCCS ccs = curCCS
| otherwise = CmmLit (mkCCostCentreStack ccs)
isBox :: StgExpr -> Bool
-- If it's an utterly trivial RHS, then it must be
-- one introduced by boxHigherOrderArgs for profiling,
-- so we charge it to "OVERHEAD".
-- This looks like a GROSS HACK to me --SDM
isBox (StgApp _ []) = True
isBox _ = False
-- ----------------------------------------------------------------------- -- -----------------------------------------------------------------------
-- Setting the current cost centre on entry to a closure -- Setting the current cost centre on entry to a closure
-- For lexically scoped profiling we have to load the cost centre from
-- the closure entered, if the costs are not supposed to be inherited.
-- This is done immediately on entering the fast entry point.
-- Load current cost centre from closure, if not inherited.
-- Node is guaranteed to point to it, if profiling and not inherited.
enterCostCentre
:: ClosureInfo
-> CostCentreStack
-> StgExpr -- The RHS of the closure
-> Code
-- We used to have a special case for bindings of form
-- f = g True
-- where g has arity 2. The RHS is a thunk, but we don't
-- need to update it; and we want to subsume costs.
-- We don't have these sort of PAPs any more, so the special
-- case has gone away.
enterCostCentre closure_info ccs body
= ifProfiling $
ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
enter_cost_centre closure_info ccs body
enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> Code
enter_cost_centre closure_info ccs body
| isSubsumedCCS ccs
= ASSERT(isToplevClosure closure_info)
ASSERT(re_entrant)
enter_ccs_fsub
| isDerivedFromCurrentCCS ccs
= do {
if re_entrant && not is_box
then
enter_ccs_fun node_ccs
else
stmtC (CmmStore curCCSAddr node_ccs)
-- don't forget to bump the scc count. This closure might have been
-- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
-- pass has turned into simply let x = e in ...x... and attached
-- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
-- we don't lose the scc counter, bump it in the entry code for x.
-- ToDo: for a multi-push we should really bump the counter for
-- each of the intervening CCSs, not just the top one.
; when (not (isCurrentCCS ccs)) $
stmtC (bumpSccCount curCCS)
}
| isCafCCS ccs
= ASSERT(isToplevClosure closure_info)
ASSERT(not re_entrant)
do { -- This is just a special case of the isDerivedFromCurrentCCS
-- case above. We could delete this, but it's a micro
-- optimisation and saves a bit of code.
stmtC (CmmStore curCCSAddr enc_ccs)
; stmtC (bumpSccCount node_ccs)
}
| otherwise
= panic "enterCostCentre"
where
enc_ccs = CmmLit (mkCCostCentreStack ccs)
re_entrant = closureReEntrant closure_info
node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
is_box = isBox body
-- if this is a function, then node will be tagged; we must subract the tag
node_tag = funTag closure_info
-- set the current CCS when entering a PAP
enterCostCentrePAP :: CmmExpr -> Code
enterCostCentrePAP closure =
ifProfiling $ do
enter_ccs_fun (costCentreFrom closure)
enteringPAP 1
enterCostCentreThunk :: CmmExpr -> Code enterCostCentreThunk :: CmmExpr -> Code
enterCostCentreThunk closure = enterCostCentreThunk closure =
ifProfiling $ do ifProfiling $ do
stmtC $ CmmStore curCCSAddr (costCentreFrom closure) stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
enter_ccs_fun :: CmmExpr -> Code enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code
enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [CmmHinted stack AddrHint] False enterCostCentreFun ccs closure =
-- ToDo: vols ifProfiling $ do
if isCurrentCCS ccs
enter_ccs_fsub :: Code then emitRtsCall rtsPackageId (fsLit "enterFunCCS")
enter_ccs_fsub = enteringPAP 0 [CmmHinted (costCentreFrom closure) AddrHint] False
else return () -- top-level function, nothing to do
-- When entering a PAP, EnterFunCCS is called by both the PAP entry
-- code and the function entry code; we don't want the function's
-- entry code to also update CCCS in the event that it was called via
-- a PAP, so we set the flag entering_PAP to indicate that we are
-- entering via a PAP.
enteringPAP :: Integer -> Code
enteringPAP n
= stmtC (CmmStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP"))))
(CmmLit (CmmInt n cIntWidth)))
ifProfiling :: Code -> Code ifProfiling :: Code -> Code
ifProfiling code ifProfiling code
...@@ -286,7 +148,6 @@ ifProfilingL xs ...@@ -286,7 +148,6 @@ ifProfilingL xs
| opt_SccProfilingOn = xs | opt_SccProfilingOn = xs
| otherwise = [] | otherwise = []
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Initialising Cost Centres & CCSs -- Initialising Cost Centres & CCSs
...@@ -306,15 +167,15 @@ emitCostCentreDecl cc = do ...@@ -306,15 +167,15 @@ emitCostCentreDecl cc = do
modl, -- char *module, modl, -- char *module,
zero, -- StgWord time_ticks zero, -- StgWord time_ticks
zero64, -- StgWord64 mem_alloc