diff --git a/ghc/compiler/codeGen/CgExpr.lhs b/ghc/compiler/codeGen/CgExpr.lhs
index 2f4106404b16dfd878e65a63d5423fb7f733408a..6e02c259f03778541b0a5d8b246377dcb8563c19 100644
--- a/ghc/compiler/codeGen/CgExpr.lhs
+++ b/ghc/compiler/codeGen/CgExpr.lhs
@@ -1,7 +1,7 @@
 %
 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
 %
-% $Id: CgExpr.lhs,v 1.21 1999/03/22 16:57:11 simonm Exp $
+% $Id: CgExpr.lhs,v 1.22 1999/03/25 13:13:51 simonm Exp $
 %
 %********************************************************
 %*							*
@@ -36,7 +36,7 @@ import CgTailCall	( cgTailCall, performReturn, performPrimReturn,
 			)
 import ClosureInfo	( mkClosureLFInfo, mkSelectorLFInfo,
 			  mkApLFInfo, layOutDynCon )
-import CostCentre	( sccAbleCostCentre, isDictCC, isSccCountCostCentre )
+import CostCentre	( sccAbleCostCentre, isSccCountCostCentre )
 import Id		( idPrimRep, idType, Id )
 import VarSet
 import DataCon		( DataCon, dataConTyCon )
@@ -234,7 +234,7 @@ centre.
 cgExpr (StgSCC cc expr)
   = ASSERT(sccAbleCostCentre cc)
     costCentresC
-	(if isDictCC cc then SLIT("SET_DICT_CCC") else SLIT("SET_CCC"))
+	SLIT("SET_CCC")
 	[mkCCostCentre cc, mkIntCLit (if isSccCountCostCentre cc then 1 else 0)]
     `thenC`
     cgExpr expr
diff --git a/ghc/compiler/coreSyn/CoreUtils.lhs b/ghc/compiler/coreSyn/CoreUtils.lhs
index 38b8c70a0e7646fd86dc01cd47bbe62209110ca8..814426e185aa3c9f7db25daea5cad1b784f3fa7e 100644
--- a/ghc/compiler/coreSyn/CoreUtils.lhs
+++ b/ghc/compiler/coreSyn/CoreUtils.lhs
@@ -14,9 +14,7 @@ module CoreUtils (
 	cheapEqExpr,
 
 	substExpr, substId, substIds,
-	idSpecVars, idFreeVars,
-
-	squashableDictishCcExpr
+	idSpecVars, idFreeVars
     ) where
 
 #include "HsVersions.h"
@@ -38,7 +36,7 @@ import Id		( Id, idType, setIdType, idUnique, idAppIsBottom,
 			)
 import IdInfo		( arityLowerBound, InlinePragInfo(..) )
 import SpecEnv		( emptySpecEnv, specEnvToList, isEmptySpecEnv )
-import CostCentre	( isDictCC, CostCentre )
+import CostCentre	( CostCentre )
 import Const		( Con, conType )
 import Type		( Type, TyVarSubst, mkFunTy, mkForAllTy,
 			  splitFunTy_maybe, applyTys, tyVarsOfType, tyVarsOfTypes,
@@ -315,22 +313,6 @@ exprIsWHNF e@(App _ _)        = case collectArgs e of
 				  _	        -> False
 \end{code}
 
-I don't like this function but I'n not confidnt enough to change it.
-
-\begin{code}
-squashableDictishCcExpr :: CostCentre -> Expr b -> Bool
-squashableDictishCcExpr cc expr
-  | isDictCC cc = False		-- that was easy...
-  | otherwise   = squashable expr
-  where
-    squashable (Var _)      = True
-    squashable (Con  _ _)   = True -- I think so... WDP 94/09
-    squashable (App f a)
-      | isTypeArg a	    = squashable f
-    squashable other	    = False
-\end{code}
-
-
 @cheapEqExpr@ is a cheap equality test which bales out fast!
 	True  => definitely equal
 	False => may or may not be equal
diff --git a/ghc/compiler/deSugar/Desugar.lhs b/ghc/compiler/deSugar/Desugar.lhs
index 422dec02b17adc0267d57b0f376cee037bf8b559..4fc7be46e73bf17ee0f071469d0425b16c6ec344 100644
--- a/ghc/compiler/deSugar/Desugar.lhs
+++ b/ghc/compiler/deSugar/Desugar.lhs
@@ -13,7 +13,7 @@ import HsSyn		( MonoBinds )
 import TcHsSyn		( TypecheckedMonoBinds, TypecheckedForeignDecl )
 import CoreSyn
 import DsMonad
-import DsBinds		( dsMonoBinds )
+import DsBinds		( dsMonoBinds, AutoScc(..) )
 import DsForeign	( dsForeigns )
 import DsUtils
 import DsExpr		()	-- Forces DsExpr to be compiled; DsBinds only
@@ -42,7 +42,9 @@ deSugar us global_val_env mod_name all_binds fo_decls = do
 	beginPass "Desugar"
 	-- Do desugaring
 	let (core_prs, ds_warns1) = initDs us1 global_val_env module_and_group 
-				            (dsMonoBinds opt_SccProfilingOn all_binds [])
+				            (dsMonoBinds auto_scc all_binds [])
+	    auto_scc | opt_SccProfilingOn = TopLevel
+		     | otherwise          = NoSccs
             ds_binds' = [Rec core_prs]
 
     	    ((fi_binds, fe_binds, h_code, c_code), ds_warns2) = 
diff --git a/ghc/compiler/deSugar/DsBinds.lhs b/ghc/compiler/deSugar/DsBinds.lhs
index c0d1f772d57333c837b024f21f3d557b97bf16f9..f072048133a8cdbc379d8e64a84dee5b896bcde1 100644
--- a/ghc/compiler/deSugar/DsBinds.lhs
+++ b/ghc/compiler/deSugar/DsBinds.lhs
@@ -8,7 +8,7 @@ in that the @Rec@/@NonRec@/etc structure is thrown away (whereas at
 lower levels it is preserved with @let@/@letrec@s).
 
 \begin{code}
-module DsBinds ( dsMonoBinds ) where
+module DsBinds ( dsMonoBinds, AutoScc(..) ) where
 
 #include "HsVersions.h"
 
@@ -26,16 +26,18 @@ import Match		( matchWrapper )
 
 import BasicTypes       ( RecFlag(..) )
 import CmdLineOpts	( opt_SccProfilingOn, opt_AutoSccsOnAllToplevs, 
-			  opt_AutoSccsOnExportedToplevs
+			  opt_AutoSccsOnExportedToplevs, opt_AutoSccsOnDicts
 		        )
-import CostCentre	( mkAutoCC, IsCafCC(..), mkAllDictsCC )
+import CostCentre	( CostCentre, mkAutoCC, IsCafCC(..) )
 import Id		( idType, Id )
 import VarEnv
 import Name		( isExported )
-import Type		( mkTyVarTy, isDictTy, substTy
-			)
+import Type		( mkTyVarTy, isDictTy, substTy )
 import TysWiredIn	( voidTy )
 import Outputable
+
+import Maybe
+import IOExts (trace)
 \end{code}
 
 %************************************************************************
@@ -45,7 +47,7 @@ import Outputable
 %************************************************************************
 
 \begin{code}
-dsMonoBinds :: Bool		-- False => don't (auto-)annotate scc on toplevs.
+dsMonoBinds :: AutoScc			-- scc annotation policy (see below)
 	    -> TypecheckedMonoBinds
 	    -> [(Id,CoreExpr)]		-- Put this on the end (avoid quadratic append)
 	    -> DsM [(Id,CoreExpr)]	-- Result
@@ -76,33 +78,35 @@ dsMonoBinds auto_scc (FunMonoBind fun _ matches locn) rest
   where
     error_string = "function " ++ showSDoc (ppr fun)
 
-dsMonoBinds _ (PatMonoBind pat grhss locn) rest
+dsMonoBinds auto_scc (PatMonoBind pat grhss locn) rest
   = putSrcLocDs locn $
-    dsGuarded grhss			`thenDs` \ body_expr ->
-    mkSelectorBinds pat body_expr	`thenDs` \ sel_binds ->
+    dsGuarded grhss				`thenDs` \ body_expr ->
+    mkSelectorBinds pat body_expr		`thenDs` \ sel_binds ->
+    mapDs (addAutoScc auto_scc) sel_binds	`thenDs` \ sel_binds ->
     returnDs (sel_binds ++ rest)
 
-	-- Common special case: no type or dictionary abstraction
-dsMonoBinds auto_scc (AbsBinds [] [] exports binds) rest
-  = mapDs (addAutoScc auto_scc) [(global, Var local) | (_, global, local) <- exports] `thenDs` \ exports' ->
-    dsMonoBinds False binds (exports' ++ rest)
-
-	-- Another common case: one exported variable
+	-- Common case: one exported variable
 	-- All non-recursive bindings come through this way
-dsMonoBinds auto_scc (AbsBinds all_tyvars dicts [(tyvars, global, local)] binds) rest
+dsMonoBinds auto_scc (AbsBinds all_tyvars dicts exps@[(tyvars, global, local)] binds) rest
   = ASSERT( all (`elem` tyvars) all_tyvars )
-    dsMonoBinds False binds []			`thenDs` \ core_prs ->
+    dsMonoBinds (addSccs auto_scc exps) binds []	`thenDs` \ core_prs ->
     let 
 	-- Always treat the binds as recursive, because the typechecker
 	-- makes rather mixed-up dictionary bindings
 	core_binds = [Rec core_prs]
+	global' = (global, mkLams tyvars $ mkLams dicts $ 
+	                   mkLets core_binds (Var local))
     in
-    addAutoScc auto_scc (global, mkLams tyvars $ mkLams dicts $ 
-			         mkLets core_binds (Var local)) `thenDs` \ global' ->
+    
     returnDs (global' : rest)
 
+	-- Another Common special case: no type or dictionary abstraction
+dsMonoBinds auto_scc (AbsBinds [] [] exports binds) rest
+  = let exports' = [(global, Var local) | (_, global, local) <- exports] in
+    dsMonoBinds (addSccs auto_scc exports) binds (exports' ++ rest)
+
 dsMonoBinds auto_scc (AbsBinds all_tyvars dicts exports binds) rest
-  = dsMonoBinds False binds []			`thenDs` \ core_prs ->
+  = dsMonoBinds (addSccs auto_scc exports) binds []`thenDs` \ core_prs ->
     let 
 	core_binds = [Rec core_prs]
 
@@ -122,10 +126,9 @@ dsMonoBinds auto_scc (AbsBinds all_tyvars dicts exports binds) rest
 		-- some of the tyvars will be bound to voidTy
 	    newSysLocalsDs (map (substTy env) local_tys) 	`thenDs` \ locals' ->
 	    newSysLocalDs  (substTy env tup_ty)			`thenDs` \ tup_id ->
-	    addAutoScc auto_scc
-		       (global, mkLams tyvars $ mkLams dicts $
-		     	        mkTupleSelector locals' (locals' !! n) tup_id $
-		     	        mkApps (mkTyApps (Var poly_tup_id) ty_args) dict_args)
+	    returnDs (global, mkLams tyvars $ mkLams dicts $
+		              mkTupleSelector locals' (locals' !! n) tup_id $
+		              mkApps (mkTyApps (Var poly_tup_id) ty_args) dict_args)
 	  where
 	    mk_ty_arg all_tyvar | all_tyvar `elem` tyvars = mkTyVarTy all_tyvar
 				| otherwise		  = voidTy
@@ -145,16 +148,34 @@ dsMonoBinds auto_scc (AbsBinds all_tyvars dicts exports binds) rest
 %************************************************************************
 
 \begin{code}
-addAutoScc :: Bool		-- if needs be, decorate toplevs?
+data AutoScc
+ 	= TopLevel
+	| TopLevelAddSccs (Id -> Maybe Id)
+	| NoSccs
+
+addSccs :: AutoScc -> [(a,Id,Id)] -> AutoScc
+addSccs auto_scc@(TopLevelAddSccs _) exports = auto_scc
+addSccs NoSccs   exports = NoSccs
+addSccs TopLevel exports 
+  = TopLevelAddSccs (\id -> case [ exp | (_,exp,loc) <- exports, loc == id ] of
+				(exp:_)  | opt_AutoSccsOnAllToplevs || 
+					    (isExported exp && 
+					     opt_AutoSccsOnExportedToplevs)
+					-> Just exp
+				_ -> Nothing)
+
+addAutoScc :: AutoScc		-- if needs be, decorate toplevs?
 	   -> (Id, CoreExpr)
 	   -> DsM (Id, CoreExpr)
 
-addAutoScc auto_scc_candidate pair@(bndr, core_expr) 
- | auto_scc_candidate && worthSCC core_expr && 
-   (opt_AutoSccsOnAllToplevs || (isExported bndr && opt_AutoSccsOnExportedToplevs))
+addAutoScc (TopLevelAddSccs auto_scc_fn) pair@(bndr, core_expr) 
+ | do_auto_scc && worthSCC core_expr
      = getModuleAndGroupDs `thenDs` \ (mod,grp) ->
-       returnDs (bndr, Note (SCC (mkAutoCC bndr mod grp NotCafCC)) core_expr)
- | otherwise 
+       returnDs (bndr, Note (SCC (mkAutoCC top_bndr mod grp NotCafCC)) core_expr)
+ where do_auto_scc = isJust maybe_auto_scc
+       maybe_auto_scc = auto_scc_fn bndr
+       (Just top_bndr) = maybe_auto_scc
+addAutoScc _ pair
      = returnDs pair
 
 worthSCC (Note (SCC _) _) = False
@@ -165,15 +186,16 @@ worthSCC core_expr        = True
 If profiling and dealing with a dict binding, wrap the dict in "_scc_ DICT <dict>":
 
 \begin{code}
-addDictScc var rhs
-  | not ( opt_SccProfilingOn || opt_AutoSccsOnAllToplevs)
-	    -- the latter is so that -unprof-auto-scc-all adds dict sccs
+addDictScc var rhs = returnDs rhs
+
+{- DISABLED for now (need to somehow make up a name for the scc) -- SDM
+  | not ( opt_SccProfilingOn && opt_AutoSccsOnDicts)
     || not (isDictTy (idType var))
   = returnDs rhs				-- That's easy: do nothing
 
   | otherwise
   = getModuleAndGroupDs 	`thenDs` \ (mod, grp) ->
-
 	-- ToDo: do -dicts-all flag (mark dict things with individual CCs)
     returnDs (Note (SCC (mkAllDictsCC mod grp False)) rhs)
+-}
 \end{code}
diff --git a/ghc/compiler/deSugar/DsExpr.lhs b/ghc/compiler/deSugar/DsExpr.lhs
index 2b26091ee4944eb8b45919ab9e4f30b4da03d54d..de10fcd5ca3f64042edb6f59cd32b6375f45c30e 100644
--- a/ghc/compiler/deSugar/DsExpr.lhs
+++ b/ghc/compiler/deSugar/DsExpr.lhs
@@ -22,7 +22,7 @@ import TcHsSyn		( TypecheckedHsExpr, TypecheckedHsBinds,
 import CoreSyn
 
 import DsMonad
-import DsBinds		( dsMonoBinds )
+import DsBinds		( dsMonoBinds, AutoScc(..) )
 import DsGRHSs		( dsGuarded )
 import DsCCall		( dsCCall )
 import DsListComp	( dsListComp )
@@ -99,7 +99,7 @@ dsLet (MonoBind (AbsBinds [] [] binder_triples (PatMonoBind pat grhss loc)) sigs
 
 -- Ordinary case for bindings
 dsLet (MonoBind binds sigs is_rec) body
-  = dsMonoBinds False binds []  `thenDs` \ prs ->
+  = dsMonoBinds NoSccs binds []  `thenDs` \ prs ->
     case is_rec of
       Recursive    -> returnDs (Let (Rec prs) body)
       NonRecursive -> returnDs (foldr mk_let body prs)
diff --git a/ghc/compiler/main/CmdLineOpts.lhs b/ghc/compiler/main/CmdLineOpts.lhs
index 89c6fc8299e9c2d40e5f57da7cb6aeca1857817c..821882c2c30a12f53182e12338a80cc3e9bfbb29 100644
--- a/ghc/compiler/main/CmdLineOpts.lhs
+++ b/ghc/compiler/main/CmdLineOpts.lhs
@@ -20,6 +20,7 @@ module CmdLineOpts (
 	opt_AutoSccsOnAllToplevs,
 	opt_AutoSccsOnExportedToplevs,
 	opt_AutoSccsOnIndividualCafs,
+	opt_AutoSccsOnDicts,
 	opt_CompilingPrelude,
 	opt_D_dump_absC,
 	opt_D_dump_asm,
@@ -280,6 +281,7 @@ opt_AllowUndecidableInstances 	= lookUp  SLIT("-fallow-undecidable-instances")
 opt_AutoSccsOnAllToplevs	= lookUp  SLIT("-fauto-sccs-on-all-toplevs")
 opt_AutoSccsOnExportedToplevs	= lookUp  SLIT("-fauto-sccs-on-exported-toplevs")
 opt_AutoSccsOnIndividualCafs	= lookUp  SLIT("-fauto-sccs-on-individual-cafs")
+opt_AutoSccsOnDicts		= lookUp  SLIT("-fauto-sccs-on-dicts")
   {-
    It's a bit unfortunate to have to re-introduce this chap, but on Win32
    platforms we do need a way of distinguishing between the case when we're
diff --git a/ghc/compiler/profiling/CostCentre.lhs b/ghc/compiler/profiling/CostCentre.lhs
index 5c78dcce6cfe50849b4d2c767708bd6171b80f34..8aeba31447e74b82129ff42adfea9003226d8821 100644
--- a/ghc/compiler/profiling/CostCentre.lhs
+++ b/ghc/compiler/profiling/CostCentre.lhs
@@ -5,7 +5,7 @@
 
 \begin{code}
 module CostCentre (
-	CostCentre(..), CcName, IsDupdCC(..), IsCafCC(..), IsDictCC(..),
+	CostCentre(..), CcName, IsDupdCC(..), IsCafCC(..),
 		-- All abstract except to friend: ParseIface.y
 
 	CostCentreStack,
@@ -13,9 +13,9 @@ module CostCentre (
 	noCostCentre, noCCAttached,
 	noCCSAttached, isCurrentCCS,  isSubsumedCCS, currentOrSubsumedCCS,
 
-	mkUserCC, mkAutoCC, mkDictCC, mkAllCafsCC, mkAllDictsCC,
+	mkUserCC, mkAutoCC, mkAllCafsCC, 
 	mkSingletonCCS, cafifyCC, dupifyCC,
-	isCafCC, isDictCC, isDupdCC, isEmptyCC, isCafCCS,
+	isCafCC, isDupdCC, isEmptyCC, isCafCCS,
 	isSccCountCostCentre,
 	sccAbleCostCentre,
 	ccFromThisModule,
@@ -106,7 +106,6 @@ data CostCentre
 		cc_name :: CcName,		-- Name of the cost centre itself
 		cc_mod  :: Module,		-- Name of module defining this CC.
 		cc_grp  :: Group,	  	-- "Group" that this CC is in.
-		cc_is_dict :: IsDictCC,		-- see below
 		cc_is_dupd :: IsDupdCC,		-- see below
 		cc_is_caf  :: IsCafCC		-- see below
     }
@@ -119,19 +118,8 @@ data CostCentre
 			-- per-individual-CAF cost attribution.
     }
 
-  | AllDictsCC {
-		cc_mod  :: Module,		-- Name of module defining this CC.
-		cc_grp  :: Group,	  	-- "Group" that this CC is in.
-			-- Again, one "big" DICT cc per module, where all
-			-- DICT costs are attributed unless the user asked for
-			-- per-individual-DICT cost attribution.
-		cc_is_dupd :: IsDupdCC
-    }
-
 type CcName = EncodedFS
 
-data IsDictCC = DictCC | VanillaCC
-
 data IsDupdCC
   = OriginalCC	-- This says how the CC is *used*.  Saying that
   | DupdCC		-- it is DupdCC doesn't make it a different
@@ -187,9 +175,6 @@ isSubsumedCCS _		     		= False
 isCafCCS (SingletonCCS cc)		= isCafCC cc
 isCafCCS _				= False
 
-isDictCCS (SingletonCCS cc)		= isDictCC cc
-isDictCCS _				= False
-
 currentOrSubsumedCCS SubsumedCCS	= True
 currentOrSubsumedCCS CurrentCCS		= True
 currentOrSubsumedCCS _			= False
@@ -203,33 +188,24 @@ mkUserCC :: UserFS -> Module -> Group -> CostCentre
 mkUserCC cc_name module_name group_name
   = NormalCC { cc_name = encodeFS cc_name,
 	       cc_mod =  module_name, cc_grp = group_name,
-	       cc_is_dict = VanillaCC, cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
+	       cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
     }
 
-mkDictCC, mkAutoCC :: Id -> Module -> Group -> IsCafCC -> CostCentre
-
-mkDictCC id module_name group_name is_caf
-  = NormalCC { cc_name = occNameFS (getOccName id),
-	       cc_mod =  module_name, cc_grp = group_name,
-	       cc_is_dict = DictCC, cc_is_dupd = OriginalCC, cc_is_caf = is_caf
-    }
+mkAutoCC :: Id -> Module -> Group -> IsCafCC -> CostCentre
 
 mkAutoCC id module_name group_name is_caf
   = NormalCC { cc_name = occNameFS (getOccName id), 
 	       cc_mod =  module_name, cc_grp = group_name,
-	       cc_is_dict = VanillaCC, cc_is_dupd = OriginalCC, cc_is_caf = is_caf
+	       cc_is_dupd = OriginalCC, cc_is_caf = is_caf
     }
 
 mkAllCafsCC  m g	  = AllCafsCC  { cc_mod = m, cc_grp = g }
-mkAllDictsCC m g is_dupd  = AllDictsCC { cc_mod = m, cc_grp = g, 
-					 cc_is_dupd = if is_dupd then DupdCC else OriginalCC }
 
 mkSingletonCCS :: CostCentre -> CostCentreStack
 mkSingletonCCS cc = SingletonCCS cc
 
 cafifyCC, dupifyCC  :: CostCentre -> CostCentre
 
-cafifyCC cc@(AllDictsCC {}) = cc
 cafifyCC cc@(NormalCC {cc_is_caf = is_caf})
   = ASSERT(not_a_caf_already is_caf)
     cc {cc_is_caf = CafCC}
@@ -240,7 +216,7 @@ cafifyCC cc = pprPanic "cafifyCC" (ppr cc)
 
 dupifyCC cc = cc {cc_is_dupd = DupdCC}
 
-isEmptyCC, isCafCC, isDictCC, isDupdCC :: CostCentre -> Bool
+isEmptyCC, isCafCC, isDupdCC :: CostCentre -> Bool
 
 isEmptyCC (NoCostCentre)		= True
 isEmptyCC _				= False
@@ -249,11 +225,6 @@ isCafCC (AllCafsCC {})		         = True
 isCafCC (NormalCC {cc_is_caf = CafCC}) = True
 isCafCC _		                 = False
 
-isDictCC (AllDictsCC {})	          = True
-isDictCC (NormalCC {cc_is_dict = DictCC}) = True
-isDictCC _			          = False
-
-isDupdCC (AllDictsCC {cc_is_dupd = DupdCC}) = True
 isDupdCC (NormalCC   {cc_is_dupd = DupdCC}) = True
 isDupdCC _		                     = False
 
@@ -265,7 +236,6 @@ isSccCountCostCentre NoCostCentre  = panic "isSccCount:NoCostCentre"
 #endif
 isSccCountCostCentre cc | isCafCC cc  = False
                         | isDupdCC cc = False
-			| isDictCC cc = True
 			| otherwise   = True
 
 sccAbleCostCentre :: CostCentre -> Bool
@@ -291,7 +261,6 @@ instance Ord CostCentre where
 cmpCostCentre :: CostCentre -> CostCentre -> Ordering
 
 cmpCostCentre (AllCafsCC  {cc_mod = m1}) (AllCafsCC  {cc_mod = m2}) = m1 `compare` m2
-cmpCostCentre (AllDictsCC {cc_mod = m1}) (AllDictsCC {cc_mod = m2}) = m1 `compare` m2
 
 cmpCostCentre (NormalCC {cc_name = n1, cc_mod =  m1, cc_is_caf = c1}) 
 	      (NormalCC {cc_name = n2, cc_mod =  m2, cc_is_caf = c2}) 
@@ -308,7 +277,6 @@ cmpCostCentre other_1 other_2
   where
     tag_CC (NormalCC   {}) = (ILIT(1) :: FAST_INT)
     tag_CC (AllCafsCC  {}) = ILIT(2)
-    tag_CC (AllDictsCC {}) = ILIT(3)
 
 cmp_caf NotCafCC CafCC     = LT
 cmp_caf NotCafCC NotCafCC  = EQ
@@ -375,22 +343,16 @@ instance Outputable CostCentre where
 -- Printing in an interface file or in Core generally
 pprCostCentreCore (AllCafsCC {cc_mod = m, cc_grp = g})
   = text "__sccC" <+> braces (pprModule m <+> doubleQuotes (ptext g))
-pprCostCentreCore (AllDictsCC {cc_mod = m, cc_grp = g, cc_is_dupd = dup})
-  = text "__sccD" <+> braces (pprModule m <+> doubleQuotes (ptext g) <+> pp_dup dup)
 pprCostCentreCore (NormalCC {cc_name = n, cc_mod = m, cc_grp = g,
-			     cc_is_dict = dic, cc_is_caf = caf, cc_is_dupd = dup})
+			     cc_is_caf = caf, cc_is_dupd = dup})
   = text "__scc" <+> braces (hsep [
 	ptext n,
 	pprModule m,	
 	doubleQuotes (ptext g),
-	pp_dict dic,
 	pp_dup dup,
 	pp_caf caf
     ])
 
-pp_dict DictCC = text "__A"
-pp_dict other  = empty
-
 pp_dup DupdCC = char '!'
 pp_dup other   = empty
 
@@ -401,14 +363,12 @@ pp_caf other   = empty
 -- Printing as a C label
 ppCostCentreLbl (NoCostCentre)		  	     = text "CC_NONE"
 ppCostCentreLbl (AllCafsCC  {cc_mod = m}) 	     = text "CC_CAFs_"  <> pprModule m
-ppCostCentreLbl (AllDictsCC {cc_mod = m}) 	     = text "CC_DICTs_" <> pprModule m
 ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m}) = text "CC_" <> pprModule m <> ptext n
 
 -- This is the name to go in the user-displayed string, 
 -- recorded in the cost centre declaration
 costCentreUserName (NoCostCentre)  = "NO_CC"
 costCentreUserName (AllCafsCC {})  = "CAFs_in_..."
-costCentreUserName (AllDictsCC {}) = "DICTs_in_..."
 costCentreUserName (NormalCC {cc_name = name, cc_is_caf = is_caf})
   =  case is_caf of { CafCC -> "CAF:";   _ -> "" } ++ decode (_UNPK_ name)
 \end{code}
@@ -441,6 +401,5 @@ pprCostCentreDecl is_local cc
 
 ccSubsumed :: CostCentre -> FAST_STRING		-- subsumed value
 ccSubsumed cc | isCafCC  cc = SLIT("CC_IS_CAF")
-	      | isDictCC cc = SLIT("CC_IS_DICT")
 	      | otherwise   = SLIT("CC_IS_BORING")
 \end{code}
diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs
index 6bed0a8a2b432db0a401326649786995f00a369a..74ab14a92aa6157f40806f9694e4d65e86c0323c 100644
--- a/ghc/compiler/reader/Lex.lhs
+++ b/ghc/compiler/reader/Lex.lhs
@@ -142,7 +142,6 @@ data IfaceToken
   | ITstrict ([Demand], Bool)
   | ITscc
   | ITsccAllCafs
-  | ITsccAllDicts
 
   | ITdotdot  			-- reserved symbols
   | ITdcolon
@@ -355,7 +354,6 @@ lex_demand cont buf =
 lex_scc cont buf =
  case currentChar# buf of
   'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
-  'D'# -> cont ITsccAllDicts (stepOverLexeme (stepOn buf))
   other -> cont ITscc buf
 
 -----------
diff --git a/ghc/compiler/rename/ParseIface.y b/ghc/compiler/rename/ParseIface.y
index 8fc06310daaf5ce5905f8ff20249314e57bcf50b..4cf9211aae210780cc3d7314e078059641a1d3d4 100644
--- a/ghc/compiler/rename/ParseIface.y
+++ b/ghc/compiler/rename/ParseIface.y
@@ -12,7 +12,7 @@ import Const		( Literal(..), mkMachInt_safe )
 import BasicTypes	( Fixity(..), FixityDirection(..), 
 			  NewOrData(..), Version
 			)
-import CostCentre       ( CostCentre(..), IsDictCC(..), IsCafCC(..), IsDupdCC(..) )
+import CostCentre       ( CostCentre(..), IsCafCC(..), IsDupdCC(..) )
 import HsPragmas	( noDataPragmas, noClassPragmas )
 import Type		( Kind, mkArrowKind, boxedTypeKind, openTypeKind )
 import IdInfo           ( ArityInfo, exactArity )
@@ -92,7 +92,6 @@ import Ratio ( (%) )
  '__ccall'	{ ITccall $$ }
  '__scc' 	{ ITscc }
  '__sccC'       { ITsccAllCafs }
- '__sccD'       { ITsccAllDicts }
 
  '__A'		{ ITarity }
  '__P'		{ ITspecialise }
@@ -683,10 +682,9 @@ ccall_string	:: { FAST_STRING }
 ------------------------------------------------------------------------
 scc     :: { CostCentre }
         :  '__sccC' '{' mod_name STRING '}'                      { AllCafsCC $3 $4 }
-        |  '__sccD' '{' mod_name STRING cc_dup '}'               { AllDictsCC $3 $4 $5 }
-        |  '__scc' '(' cc_name mod_name STRING cc_dict cc_dup cc_caf '}'
+        |  '__scc' '(' cc_name mod_name STRING cc_dup cc_caf '}'
                              { NormalCC { cc_name = $3, cc_mod = $4, cc_grp = $5,
-                                          cc_is_dict = $6, cc_is_dupd = $7, cc_is_caf = $8 } }
+                                          cc_is_dupd = $6, cc_is_caf = $7 } }
 
 cc_name :: { EncodedFS }
         : CONID                 { $1 }
@@ -700,11 +698,6 @@ cc_caf  :: { IsCafCC }
         :                       { NotCafCC }
         | '__C'                 { CafCC }
 
-cc_dict :: { IsDictCC }
-        :                       { VanillaCC }
-        | '__A'                 { DictCC }
-
-
 -------------------------------------------------------------------
 
 src_loc :: { SrcLoc }
diff --git a/ghc/driver/ghc.lprl b/ghc/driver/ghc.lprl
index 9e1a61bbc43d6ad3d4c34d7268846e93da00a9a2..abcef324a30105f1fc4e98490e25e5b618df01a7 100644
--- a/ghc/driver/ghc.lprl
+++ b/ghc/driver/ghc.lprl
@@ -484,6 +484,7 @@ $PROFing = '';		# set to p or e if profiling
 $PROFgroup = '';	# set to group if an explicit -Ggroup specified
 $PROFauto = '';	        # set to relevant hsc flag if -auto or -auto-all
 $PROFcaf  = '';	        # set to relevant hsc flag if -caf-all
+$PROFdict = '';		# set to relevant hsc flag if -auto-dicts
 $PROFignore_scc = '';	# set to relevant parser flag if explicit sccs ignored
 $UNPROFscc_auto = '';	# set to relevant hsc flag if forcing auto sccs without profiling
 $TICKYing = '';    	# set to t if compiling for ticky-ticky profiling
@@ -1000,7 +1001,7 @@ sub setupBuildFlags {
    } else {
       push(@HsC_flags, $PROFauto) if $PROFauto;
       push(@HsC_flags, $PROFcaf)  if $PROFcaf;
-      #push(@HsC_flags, $PROFdict) if $PROFdict;
+      push(@HsC_flags, $PROFdict) if $PROFdict;
 
       $Oopt_FinalStgProfilingMassage = '-fmassage-stg-for-profiling';
 
@@ -2938,13 +2939,14 @@ arg: while($_ = $Args[0]) {
 
     /^-prof$/ && do { $PROFing = 'p'; next arg; }; # profiling -- details later!
 
-    /^-auto/ && do {
-		# generate auto SCCs on top level bindings
-		# -auto-all = all top level bindings
-		# -auto     = only top level exported bindings
-		$PROFauto = ( /-all/ )
-			    ? '-fauto-sccs-on-all-toplevs'
-			    : '-fauto-sccs-on-exported-toplevs';
+    /^-auto-dicts$/ && do {
+		$PROFdicts = '-fauto-sccs-on-dicts';
+		next arg; };
+    /^-auto-all$/ && do {
+		$PROFauto = '-fauto-sccs-on-all-toplevs';
+		next arg; };
+    /^-auto$/ && do {
+		$PROFauto = '-fauto-sccs-on-exported-toplevs';
 		next arg; };
 
     /^-caf-all/ && do { # generate individual CAF SCC annotations
diff --git a/ghc/includes/Profiling.h b/ghc/includes/Profiling.h
index 0c3ca360756d66fbc41cd94bc3cdb051e9dc2296..a29759e08be1a76d2c323aefe34215580818aa7f 100644
--- a/ghc/includes/Profiling.h
+++ b/ghc/includes/Profiling.h
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Profiling.h,v 1.3 1999/02/05 16:02:26 simonm Exp $
+ * $Id: Profiling.h,v 1.4 1999/03/25 13:14:03 simonm Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -29,9 +29,8 @@
 /* Constants used to set sumbsumed flag on CostCentres */
 
 #define CC_IS_CAF      'c'            /* 'c'  => *is* a CAF cc           */
-#define CC_IS_DICT     'd'            /* 'd'  => *is* a dictionary cc    */
 #define CC_IS_SUBSUMED 's'            /* 's'  => *is* a subsumed cc      */
-#define CC_IS_BORING   'B'            /* 'B'  => *not* a CAF/dict/sub cc */
+#define CC_IS_BORING   'B'            /* 'B'  => *not* a CAF/sub cc      */
 
 /* Constants used for abreviated output of data in binary format.  The order
  * is important and corresponds to the "item" elementType in the XML log 
@@ -81,7 +80,6 @@ typedef struct _CostCentreStack {
   unsigned long scc_count;
   unsigned long sub_scc_count;
   unsigned long sub_cafcc_count;
-  unsigned long sub_dictcc_count;
     
   unsigned long time_ticks;
   unsigned long mem_alloc;
@@ -160,6 +158,7 @@ extern hash_t max_type_no;                      /* Hash on type description */
  * ---------------------------------------------------------------------------*/
 
 CostCentreStack *PushCostCentre ( CostCentreStack *, CostCentre * );
+CostCentreStack *AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
 CostCentreStack *ActualPush ( CostCentreStack *, CostCentre * );
 CostCentreStack *RemoveCC ( CostCentreStack *, CostCentre * );
 
diff --git a/ghc/includes/StgProf.h b/ghc/includes/StgProf.h
index 6cebb331ae1d45a096c6abf2a788426ee97aefcc..b221ba76aedfccd3fbaeb69116652931be7986a4 100644
--- a/ghc/includes/StgProf.h
+++ b/ghc/includes/StgProf.h
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgProf.h,v 1.3 1999/03/18 17:57:19 simonm Exp $
+ * $Id: StgProf.h,v 1.4 1999/03/25 13:14:04 simonm Exp $
  *
  * (c) The GHC Team, 1998
  *
@@ -102,7 +102,6 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
 	    scc_count 		: 0,				\
 	    sub_scc_count 	: 0,				\
 	    sub_cafcc_count 	: 0,				\
-	    sub_dictcc_count 	: 0,				\
 	    time_ticks 		: 0,				\
 	    mem_alloc 		: 0,				\
 	    is_subsumed 	: subsumed,			\
@@ -132,12 +131,11 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
  * Pushing a new cost centre (i.e. for scc annotations)
  * -------------------------------------------------------------------------- */
 
-# define SET_CCC_X(cc,do_subcc_count,do_subdict_count,do_scc_count)	\
-	do {								\
-	if (do_subcc_count)   { CCCS->sub_scc_count++; }       		\
-	if (do_subdict_count) { CCCS->sub_dictcc_count++; }		\
-	CCCS = PushCostCentre(CCCS,cc);					\
-	if (do_scc_count)     { CCCS->scc_count++; }   			\
+# define SET_CCC_X(cc,do_subcc_count,do_scc_count)		\
+	do {							\
+	if (do_subcc_count)   { CCCS->sub_scc_count++; }	\
+	CCCS = PushCostCentre(CCCS,cc);				\
+	if (do_scc_count)     { CCCS->scc_count++; }		\
 	} while(0)
 
 /* We sometimes don't increment the scc_count field, for example when
@@ -146,13 +144,10 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
  */
 
 # define SET_CCC(cc_ident,do_scc_count) \
-	 SET_CCC_X(cc_ident,do_scc_count,0,do_scc_count)
-
-# define SET_DICT_CCC(cc_ident,do_scc_count) \
-	 SET_CCC_X(cc_ident,0,do_scc_count,do_scc_count)
+	 SET_CCC_X(cc_ident,do_scc_count,do_scc_count)
 
 # define SET_CCS_TOP(cc_ident) \
-	 SET_CCC_X(cc_ident,0,0,1)
+	 SET_CCC_X(cc_ident,0,1)
 
 /* -----------------------------------------------------------------------------
  * Allocating new cost centres / cost centre stacks.
@@ -181,7 +176,6 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
         (stack)->scc_count = 0;           \
         (stack)->time_ticks = 0;          \
         (stack)->sub_cafcc_count = 0;     \
-        (stack)->sub_dictcc_count = 0;    \
         (stack)->mem_alloc = 0;           \
         } while(0)
 
@@ -195,8 +189,8 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
 #define CCCS_DETAIL_COUNT(inc_this) /*nothing*/
 #endif
 
-#define IS_CAF_OR_DICT_OR_SUB_CCS(ccs)         \
-        /* tests for lower case character */   \
+#define IS_CAF_OR_SUB_CCS(ccs)			\
+        /* tests for lower case character */	\
         ((ccs)->is_subsumed & ' ')
 	
 
@@ -234,19 +228,20 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
  *
  * Here is our special "hybrid" case when we do *not* set the CCCS.
  *  (a) The closure is a function, not a thunk;
- *  (b) The CCS is CAF/DICT-ish.
+ *  (b) The CCS is CAF-ish.
  * -------------------------------------------------------------------------- */
 
-#define ENTER_CCS_F(stack)                                  \
-        do {                                                \
-        CostCentreStack *ccs = (CostCentreStack *) (stack); \
-        if ( ! IS_CAF_OR_DICT_OR_SUB_CCS(ccs) ) {           \
-           CCCS = ccs;                                      \
-        } else {                                            \
-           CCCS_DETAIL_COUNT(ccs->caffun_subsumed);         \
-           CCCS_DETAIL_COUNT(CCCS->subsumed_caf_count);     \
-        }                                                   \
-        CCCS_DETAIL_COUNT(CCCS->function_count);            \
+#define ENTER_CCS_F(stack)					\
+        do {							\
+        CostCentreStack *ccs = (CostCentreStack *) (stack);	\
+        if ( ! IS_CAF_OR_SUB_CCS(ccs) ) {			\
+           CCCS = ccs;						\
+        } else {						\
+          CCCS = AppendCCS(CCCS,ccs);				\
+          CCCS_DETAIL_COUNT(ccs->caffun_subsumed);		\
+          CCCS_DETAIL_COUNT(CCCS->subsumed_caf_count);		\
+        }							\
+        CCCS_DETAIL_COUNT(CCCS->function_count);		\
         } while(0)
  
 #define ENTER_CCS_FCL(closure)  ENTER_CCS_F(CCS_HDR(closure))
@@ -275,16 +270,17 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
  
 /* These ENTER_CC_PAP things are only used in the RTS */
  
-#define ENTER_CCS_PAP(stack)                                \
-        do {                                                \
-        CostCentreStack *ccs = (CostCentreStack *) (stack); \
-        if ( ! IS_CAF_OR_DICT_OR_SUB_CCS(ccs) ) {           \
-            CCCS = ccs;                                     \
-        } else {                                            \
-            CCCS_DETAIL_COUNT(ccs->caffun_subsumed);        \
-            CCCS_DETAIL_COUNT(CCCS->subsumed_caf_count);    \
-        }                                                   \
-        CCCS_DETAIL_COUNT(CCCS->pap_count);                 \
+#define ENTER_CCS_PAP(stack)					\
+        do {							\
+        CostCentreStack *ccs = (CostCentreStack *) (stack);	\
+        if ( ! IS_CAF_OR_SUB_CCS(ccs) ) {			\
+            CCCS = ccs;						\
+        } else {						\
+          CCCS = AppendCCS(CCCS,ccs);				\
+          CCCS_DETAIL_COUNT(ccs->caffun_subsumed);		\
+          CCCS_DETAIL_COUNT(CCCS->subsumed_caf_count);		\
+        }							\
+        CCCS_DETAIL_COUNT(CCCS->pap_count);			\
         } while(0)                      
 
 #define ENTER_CCS_PAP_CL(closure)  \
diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c
index f44b4fc89fc1e1d360493f356b7eadd7046af280..b56f995411d7c07f062040b2565719dbb03dfaa1 100644
--- a/ghc/rts/GC.c
+++ b/ghc/rts/GC.c
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.55 1999/03/18 17:57:21 simonm Exp $
+ * $Id: GC.c,v 1.56 1999/03/25 13:14:05 simonm Exp $
  *
  * (c) The GHC Team 1998-1999
  *
@@ -2348,8 +2348,8 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
 	
       /* Dynamic bitmap: the mask is stored on the stack */
     case RET_DYN:
-      bitmap = stgCast(StgRetDyn*,p)->liveness;
-      p      = &payloadWord(stgCast(StgRetDyn*,p),0);
+      bitmap = ((StgRetDyn *)p)->liveness;
+      p      = (P_)((StgRetDyn *)p)->payload[0];
       goto small_bitmap;
 
       /* probably a slow-entry point return address: */
diff --git a/ghc/rts/Profiling.c b/ghc/rts/Profiling.c
index 45747ce772ac58391341fcf212d66e8e93a3fe5a..eec5a71ae37624155f259e9a448ec15d2d6c2c7f 100644
--- a/ghc/rts/Profiling.c
+++ b/ghc/rts/Profiling.c
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Profiling.c,v 1.3 1999/02/05 16:02:48 simonm Exp $
+ * $Id: Profiling.c,v 1.4 1999/03/25 13:14:06 simonm Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -115,6 +115,10 @@ static rtsBool ccs_to_ignore       ( CostCentreStack *ccs );
 static    void count_ticks         ( CostCentreStack *ccs );
 static    void reportCCS           ( CostCentreStack *ccs, nat indent );
 static    void DecCCS              ( CostCentreStack *ccs );
+static    CostCentreStack *pruneCCSTree ( CostCentreStack *ccs );
+#ifdef DEBUG
+static    void printCCS            ( CostCentreStack *ccs );
+#endif
 
 /* -----------------------------------------------------------------------------
    Initialise the profiling environment
@@ -233,6 +237,20 @@ registerCostCentres ( void )
    Cost-centre stack manipulation
    -------------------------------------------------------------------------- */
 
+#ifdef DEBUG
+CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
+CostCentreStack *
+PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
+#define PushCostCentre _PushCostCentre
+{
+  IF_DEBUG(prof, 
+	   fprintf(stderr,"Pushing %s on ", cc->label);
+	   printCCS(ccs);
+	   fprintf(stderr,"\n"));
+  return PushCostCentre(ccs,cc);
+}
+#endif
+
 CostCentreStack *
 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
 {
@@ -263,6 +281,48 @@ PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
   }
 }
 
+/* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
+
+#ifdef DEBUG
+CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
+CostCentreStack *
+AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+#define AppendCCS _AppendCCS
+{
+  CostCentreStack *ccs;
+  IF_DEBUG(prof, 
+	   fprintf(stderr,"Appending ");
+	   printCCS(ccs1);
+	   fprintf(stderr," to ");
+	   printCCS(ccs2);
+	   fprintf(stderr,"\n"));
+  return AppendCCS(ccs1,ccs2);
+}
+#endif
+
+CostCentreStack *
+AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+{
+  CostCentreStack *ccs;
+
+  /* Optimisation: if we attempt to append a CCS to itself, we're
+   * going to end up with the same ccs after a great deal of pushing
+   * and removing of cost centres.  Furthermore, we'll generate a lot
+   * of intermediate CCSs which would not otherwise be generated.  So:
+   * let's cope with this common case first.
+   */
+  if (ccs1 == ccs2) {
+    return ccs1;
+  }
+
+  if (ccs2->cc->is_subsumed != CC_IS_BORING) {
+    return ccs1;
+  }
+  
+  ASSERT(ccs2->prevStack != NULL);
+  ccs = AppendCCS(ccs1, ccs2->prevStack);
+  return PushCostCentre(ccs,ccs2->cc);
+}
 
 CostCentreStack *
 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
@@ -291,7 +351,6 @@ ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
   new_ccs->scc_count        = 0;
   new_ccs->sub_scc_count    = 0;
   new_ccs->sub_cafcc_count  = 0;
-  new_ccs->sub_dictcc_count = 0;
   
   /* Initialize all other stats here.  There should be a quick way
    * that's easily used elsewhere too 
@@ -299,14 +358,21 @@ ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
   new_ccs->time_ticks = 0;
   new_ccs->mem_alloc = 0;
   
-  /* stacks are subsumed only if their top CostCentres are subsumed */
-  new_ccs->is_subsumed = cc->is_subsumed;
+  /* stacks are subsumed if either:
+       - the top cost centre is boring, and the rest of the CCS is subsumed
+       - the top cost centre is subsumed.
+  */
+  if (cc->is_subsumed == CC_IS_BORING) {
+    new_ccs->is_subsumed = ccs->is_subsumed;
+  } else {
+    new_ccs->is_subsumed = cc->is_subsumed;
+  }
   
   /* update the memoization table for the parent stack */
   if (ccs != EMPTY_STACK)
     ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc);
   
-  /* make sure this CC is decalred at the next heap/time sample */
+  /* make sure this CC is declared at the next heap/time sample */
   DecCCS(new_ccs);
   
   /* return a pointer to the new stack */
@@ -466,7 +532,7 @@ report_ccs_profiling( void )
     if (do_groups) fprintf(prof_file, " %-11.11s", "GROUP");
 #endif
 
-    fprintf(prof_file, "%8s %5s %5s %8s %5s %5s", "scc", "%time", "%alloc", "inner", "cafs", "dicts");
+    fprintf(prof_file, "%8s %5s %5s %8s %5s", "scc", "%time", "%alloc", "inner", "cafs");
 
     if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
 	fprintf(prof_file, "  %5s %9s", "ticks", "bytes");
@@ -477,7 +543,7 @@ report_ccs_profiling( void )
     }
     fprintf(prof_file, "\n\n");
 
-    reportCCS(CCS_MAIN, 0);
+    reportCCS(pruneCCSTree(CCS_MAIN), 0);
 
     fclose(prof_file);
 }
@@ -493,19 +559,11 @@ reportCCS(CostCentreStack *ccs, nat indent)
   
   /* Only print cost centres with non 0 data ! */
   
-  if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
-	/* force printing of *all* cost centres if -P -P */ )
-       
-       || ( ccs->indexTable != 0 )
-       || ( ! ccs_to_ignore(ccs)
-	    && (ccs->scc_count || ccs->sub_scc_count || 
-		ccs->time_ticks || ccs->mem_alloc
-	    || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
-		&& (ccs->sub_cafcc_count || ccs->sub_dictcc_count
-#if defined(PROFILING_DETAIL_COUNTS)
-		|| cc->thunk_count || cc->function_count || cc->pap_count
-#endif
-		    ))))) {
+  if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
+       ! ccs_to_ignore(ccs))
+	/* force printing of *all* cost centres if -P -P */ 
+    {
+
     fprintf(prof_file, "%-*s%-*s %-10s", 
 	    indent, "", 24-indent, cc->label, cc->module);
 
@@ -513,11 +571,11 @@ reportCCS(CostCentreStack *ccs, nat indent)
     if (do_groups) fprintf(prof_file, " %-11.11s",cc->group);
 #endif
 
-    fprintf(prof_file, "%8ld  %4.1f  %4.1f %8ld %5ld %5ld",
+    fprintf(prof_file, "%8ld  %4.1f  %4.1f %8ld %5ld",
 	    ccs->scc_count, 
 	    total_ticks == 0 ? 0.0 : (ccs->time_ticks / (StgFloat) total_ticks * 100),
 	    total_alloc == 0 ? 0.0 : (ccs->mem_alloc / (StgFloat) total_alloc * 100),
-	    ccs->sub_scc_count, ccs->sub_cafcc_count, ccs->sub_dictcc_count);
+	    ccs->sub_scc_count, ccs->sub_cafcc_count);
     
     if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
       fprintf(prof_file, "  %5ld %9ld", ccs->time_ticks, ccs->mem_alloc*sizeof(W_));
@@ -570,4 +628,53 @@ ccs_to_ignore (CostCentreStack *ccs)
     }
 }
 
+static CostCentreStack *
+pruneCCSTree( CostCentreStack *ccs )
+{
+  CostCentreStack *ccs1;
+  IndexTable *i, **prev;
+  
+  prev = &ccs->indexTable;
+  for (i = ccs->indexTable; i != 0; i = i->next) {
+    ccs1 = pruneCCSTree(i->ccs);
+    if (ccs1 == NULL) {
+      *prev = i->next;
+    } else {
+      prev = &(i->next);
+    }
+  }
+
+  if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
+	/* force printing of *all* cost centres if -P -P */ )
+       
+       || ( ccs->indexTable != 0 )
+       || ( (ccs->scc_count || ccs->sub_scc_count || 
+	     ccs->time_ticks || ccs->mem_alloc
+	     || (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE
+		 && (ccs->sub_cafcc_count
+#if defined(PROFILING_DETAIL_COUNTS)
+		     || cc->thunk_count || cc->function_count || cc->pap_count
+#endif
+		     ))))) {
+    return ccs;
+  } else {
+    return NULL;
+  }
+}
+
+#ifdef DEBUG
+static void
+printCCS ( CostCentreStack *ccs )
+{
+  fprintf(stderr,"<");
+  for (; ccs; ccs = ccs->prevStack ) {
+    fprintf(stderr,ccs->cc->label);
+    if (ccs->prevStack) {
+      fprintf(stderr,",");
+    }
+  }
+  fprintf(stderr,">");
+}
+#endif
+
 #endif /* PROFILING */
diff --git a/ghc/rts/RtsFlags.c b/ghc/rts/RtsFlags.c
index d30fa2e2240e10369b94e304c0a4564a6ac0a4ba..8f494cd86db5bce0857c6272ff0779c7767d6e55 100644
--- a/ghc/rts/RtsFlags.c
+++ b/ghc/rts/RtsFlags.c
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: RtsFlags.c,v 1.11 1999/02/18 13:00:27 sewardj Exp $
+ * $Id: RtsFlags.c,v 1.12 1999/03/25 13:14:07 simonm Exp $
  *
  * (c) The AQUA Project, Glasgow University, 1994-1997
  * (c) The GHC Team, 1998-1999
@@ -455,6 +455,7 @@ error = rtsTrue;
                    if ((n>>6)&1) RtsFlags.DebugFlags.block_alloc = rtsTrue;
                    if ((n>>7)&1) RtsFlags.DebugFlags.sanity      = rtsTrue;
                    if ((n>>8)&1) RtsFlags.DebugFlags.stable      = rtsTrue;
+                   if ((n>>9)&1) RtsFlags.DebugFlags.prof        = rtsTrue;
                 }
 		break;
 #endif
diff --git a/ghc/rts/RtsFlags.h b/ghc/rts/RtsFlags.h
index 9c0de8fdd248f8ded13bbc07dcb701ae54f38240..e89289c12870df42d6a6f98638d1f4289b7432ae 100644
--- a/ghc/rts/RtsFlags.h
+++ b/ghc/rts/RtsFlags.h
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: RtsFlags.h,v 1.11 1999/03/03 19:20:41 sof Exp $
+ * $Id: RtsFlags.h,v 1.12 1999/03/25 13:14:08 simonm Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -57,6 +57,7 @@ struct DEBUG_FLAGS {
   rtsBool sanity      : 1; /* 128 */
 
   rtsBool stable      : 1; /* 256 */
+  rtsBool prof        : 1; /* 512 */
 };
 
 #if defined(PROFILING) || defined(PAR)
diff --git a/ghc/rts/Updates.hc b/ghc/rts/Updates.hc
index e40dce93847d5f8fb8e97426f71c91137130d150..e0dd5c28a0ecb7f2552a1ad68d3f8157647f1157 100644
--- a/ghc/rts/Updates.hc
+++ b/ghc/rts/Updates.hc
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Updates.hc,v 1.11 1999/03/22 11:26:03 simonm Exp $
+ * $Id: Updates.hc,v 1.12 1999/03/25 13:14:08 simonm Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -112,9 +112,6 @@ INFO_TABLE(PAP_info,PAP_entry,/*special layout*/0,0,PAP,const,EF_,0,0);
 STGFUN(PAP_entry)
 {
   nat Words;
-#ifdef PROFILING
-  CostCentreStack *CCS_pap;
-#endif
   P_ p;
   nat i;
   StgPAP *pap;
@@ -142,10 +139,8 @@ STGFUN(PAP_entry)
        * CAF/DICT.
        */
       
-      CCS_pap = pap->header.prof.ccs;
-      CCCS = (IS_CAF_OR_DICT_OR_SUB_CCS(CCS_pap)) 
-		? Su->header.prof.ccs 
-		: CCS_pap;
+      CCCS = Su->header.prof.ccs;
+      ENTER_CCS_PAP(pap->header.prof.ccs);
 #endif /* PROFILING */
       
       Su = Su->link;
@@ -215,8 +210,8 @@ EXTFUN(stg_update_PAP)
   FB_
 
     /* Save the pointer to the function closure that just failed the
-       argument satisfaction check
-       */
+     * argument satisfaction check
+     */
     Fun = R1.cl;
 
 #if defined(GRAN_COUNT)
@@ -225,12 +220,8 @@ EXTFUN(stg_update_PAP)
 #endif
 
     /* Just copy the whole block of stack between the stack pointer
-     * and the update frame pointer for now.  This might include some
-     * tagging, which the garbage collector will have to pay attention
-     * to, but it's much easier than sorting the words into pointers
-     * and non-pointers.
+     * and the update frame pointer.
      */
-
     Words    = (P_)Su - (P_)Sp;
     ASSERT((int)Words >= 0);
 
@@ -238,7 +229,7 @@ EXTFUN(stg_update_PAP)
     /* set "CC_pap" to go in the updatee (see Sansom thesis, p 183) */
 
     CCS_pap = (CostCentreStack *) Fun->header.prof.ccs;
-    if (IS_CAF_OR_DICT_OR_SUB_CCS(CCS_pap)) {
+    if (IS_CAF_OR_SUB_CCS(CCS_pap)) {
 	CCS_pap = CCCS;
     }
 #endif
@@ -361,9 +352,8 @@ EXTFUN(stg_update_PAP)
        * Restore the Cost Centre too (if required); again see Sansom
        * thesis p 183.  Take the CC out of the update frame if a CAF/DICT.
        */
-      CCCS = IS_CAF_OR_DICT_OR_SUB_CCS(CCS_pap)
-		? Su->header.prof.ccs 
-		: CCS_pap;
+      CCCS = Su->header.prof.ccs;
+      ENTER_CCS_PAP(CCS_pap);
 #endif /* PROFILING */
       
       /* Restore Su */
@@ -640,7 +630,7 @@ FN_(raisezh_fast)
 	break;
 
       case STOP_FRAME:
-	barf("uncaught exception");
+	barf("raisezh_fast: STOP_FRAME");
 
       default:
 	barf("raisezh_fast: weird activation record");