Commit e3655f81 authored by Sebastian Graf's avatar Sebastian Graf Committed by Marge Bot
Browse files

Don't attach CPR signatures to NOINLINE data structures (#18154)

Because the generated `KindRep`s don't have an unfolding, !3230 did not
actually stop to compute, attach and serialise unnecessary CPR
signatures for them. As already said in
`Note [CPR for data structures]`, that leads to bloated interface
files which is ultimately quadratic for Nested CPR.

So we don't attach any CPR signature to bindings that

  * Are not thunks (because thunks are not in WHNF)
  * Have arity 0 (which means the top-level constructor is not a lambda)

If the data structure has an unfolding, we continue to look through it.
If not (as is the case for `KindRep`s), we look at the unchanged CPR
signature and see `topCprType`, as expected.
parent 93d5de16
...@@ -118,9 +118,9 @@ cprAnalTopBind :: AnalEnv ...@@ -118,9 +118,9 @@ cprAnalTopBind :: AnalEnv
-> CoreBind -> CoreBind
-> (AnalEnv, CoreBind) -> (AnalEnv, CoreBind)
cprAnalTopBind env (NonRec id rhs) cprAnalTopBind env (NonRec id rhs)
= (extendAnalEnv env id' (idCprInfo id'), NonRec id' rhs') = (env', NonRec id' rhs')
where where
(id', rhs') = cprAnalBind TopLevel env id rhs (id', rhs', env') = cprAnalBind TopLevel env id rhs
cprAnalTopBind env (Rec pairs) cprAnalTopBind env (Rec pairs)
= (env', Rec pairs') = (env', Rec pairs')
...@@ -178,7 +178,7 @@ cprAnal' env (Lam var body) ...@@ -178,7 +178,7 @@ cprAnal' env (Lam var body)
| otherwise | otherwise
= (lam_ty, Lam var body') = (lam_ty, Lam var body')
where where
env' = extendAnalEnvForDemand env var (idDemandInfo var) env' = extendSigEnvForDemand env var (idDemandInfo var)
(body_ty, body') = cprAnal env' body (body_ty, body') = cprAnal env' body
lam_ty = abstractCprTy body_ty lam_ty = abstractCprTy body_ty
...@@ -194,9 +194,8 @@ cprAnal' env (Case scrut case_bndr ty alts) ...@@ -194,9 +194,8 @@ cprAnal' env (Case scrut case_bndr ty alts)
cprAnal' env (Let (NonRec id rhs) body) cprAnal' env (Let (NonRec id rhs) body)
= (body_ty, Let (NonRec id' rhs') body') = (body_ty, Let (NonRec id' rhs') body')
where where
(id', rhs') = cprAnalBind NotTopLevel env id rhs (id', rhs', env') = cprAnalBind NotTopLevel env id rhs
env' = extendAnalEnv env id' (idCprInfo id') (body_ty, body') = cprAnal env' body
(body_ty, body') = cprAnal env' body
cprAnal' env (Let (Rec pairs) body) cprAnal' env (Let (Rec pairs) body)
= body_ty `seq` (body_ty, Let (Rec pairs') body') = body_ty `seq` (body_ty, Let (Rec pairs') body')
...@@ -233,15 +232,15 @@ cprTransform env id ...@@ -233,15 +232,15 @@ cprTransform env id
sig sig
where where
sig sig
-- See Note [CPR for expandable unfoldings] -- Top-level binding, local let-binding or case binder
| Just rhs <- cprExpandUnfolding_maybe id | Just sig <- lookupSigEnv env id
= getCprSig sig
-- See Note [CPR for data structures]
| Just rhs <- cprDataStructureUnfolding_maybe id
= fst $ cprAnal env rhs = fst $ cprAnal env rhs
-- Imported function or data con worker -- Imported function or data con worker
| isGlobalId id | isGlobalId id
= getCprSig (idCprInfo id) = getCprSig (idCprInfo id)
-- Local let-bound
| Just sig <- lookupSigEnv env id
= getCprSig sig
| otherwise | otherwise
= topCprType = topCprType
...@@ -251,46 +250,43 @@ cprTransform env id ...@@ -251,46 +250,43 @@ cprTransform env id
-- Recursive bindings -- Recursive bindings
cprFix :: TopLevelFlag cprFix :: TopLevelFlag
-> AnalEnv -- Does not include bindings for this binding -> AnalEnv -- Does not include bindings for this binding
-> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
-> (AnalEnv, [(Id,CoreExpr)]) -- Binders annotated with stricness info -> (AnalEnv, [(Id,CoreExpr)]) -- Binders annotated with CPR info
cprFix top_lvl orig_env orig_pairs
cprFix top_lvl env orig_pairs = loop 1 init_env init_pairs
= loop 1 initial_pairs
where where
bot_sig = mkCprSig 0 botCpr init_sig id rhs
-- See Note [CPR for data structures]
| isDataStructure id rhs = topCprSig
| otherwise = mkCprSig 0 botCpr
-- See Note [Initialising strictness] in GHC.Core.Opt.DmdAnal -- See Note [Initialising strictness] in GHC.Core.Opt.DmdAnal
initial_pairs | ae_virgin env = [(setIdCprInfo id bot_sig, rhs) | (id, rhs) <- orig_pairs ] orig_virgin = ae_virgin orig_env
| otherwise = orig_pairs init_pairs | orig_virgin = [(setIdCprInfo id (init_sig id rhs), rhs) | (id, rhs) <- orig_pairs ]
| otherwise = orig_pairs
-- The fixed-point varies the idCprInfo field of the binders, and terminates if that init_env = extendSigEnvList orig_env (map fst init_pairs)
-- annotation does not change any more.
loop :: Int -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)]) -- The fixed-point varies the idCprInfo field of the binders and and their
loop n pairs -- entries in the AnalEnv, and terminates if that annotation does not change
| found_fixpoint = (final_anal_env, pairs') -- any more.
| otherwise = loop (n+1) pairs' loop :: Int -> AnalEnv -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)])
where loop n env pairs
found_fixpoint = map (idCprInfo . fst) pairs' == map (idCprInfo . fst) pairs | found_fixpoint = (reset_env', pairs')
first_round = n == 1 | otherwise = loop (n+1) env' pairs'
pairs' = step first_round pairs
final_anal_env = extendAnalEnvs env (map fst pairs')
step :: Bool -> [(Id, CoreExpr)] -> [(Id, CoreExpr)]
step first_round pairs = pairs'
where where
-- In all but the first iteration, delete the virgin flag -- In all but the first iteration, delete the virgin flag
start_env | first_round = env -- See Note [Initialising strictness] in GHC.Core.Opt.DmdAnal
| otherwise = nonVirgin env (env', pairs') = step (applyWhen (n/=1) nonVirgin env) pairs
-- Make sure we reset the virgin flag to what it was when we are stable
start = extendAnalEnvs start_env (map fst pairs) reset_env' = env'{ ae_virgin = orig_virgin }
found_fixpoint = map (idCprInfo . fst) pairs' == map (idCprInfo . fst) pairs
(_, pairs') = mapAccumL my_downRhs start pairs
step :: AnalEnv -> [(Id, CoreExpr)] -> (AnalEnv, [(Id, CoreExpr)])
my_downRhs env (id,rhs) step env pairs = mapAccumL go env pairs
= (env', (id', rhs')) where
go env (id, rhs) = (env', (id', rhs'))
where where
(id', rhs') = cprAnalBind top_lvl env id rhs (id', rhs', env') = cprAnalBind top_lvl env id rhs
env' = extendAnalEnv env id (idCprInfo id')
-- | Process the RHS of the binding for a sensible arity, add the CPR signature -- | Process the RHS of the binding for a sensible arity, add the CPR signature
-- to the Id, and augment the environment with the signature as well. -- to the Id, and augment the environment with the signature as well.
...@@ -299,9 +295,13 @@ cprAnalBind ...@@ -299,9 +295,13 @@ cprAnalBind
-> AnalEnv -> AnalEnv
-> Id -> Id
-> CoreExpr -> CoreExpr
-> (Id, CoreExpr) -> (Id, CoreExpr, AnalEnv)
cprAnalBind top_lvl env id rhs cprAnalBind top_lvl env id rhs
= (id', rhs') -- See Note [CPR for data structures]
| isDataStructure id rhs
= (id, rhs, env) -- Data structure => no code => need to analyse rhs
| otherwise
= (id', rhs', env')
where where
(rhs_ty, rhs') = cprAnal env rhs (rhs_ty, rhs') = cprAnal env rhs
-- possibly trim thunk CPR info -- possibly trim thunk CPR info
...@@ -310,12 +310,11 @@ cprAnalBind top_lvl env id rhs ...@@ -310,12 +310,11 @@ cprAnalBind top_lvl env id rhs
| stays_thunk = trimCprTy rhs_ty | stays_thunk = trimCprTy rhs_ty
-- See Note [CPR for sum types] -- See Note [CPR for sum types]
| returns_sum = trimCprTy rhs_ty | returns_sum = trimCprTy rhs_ty
-- See Note [CPR for expandable unfoldings]
| will_expand = topCprType
| otherwise = rhs_ty | otherwise = rhs_ty
-- See Note [Arity trimming for CPR signatures] -- See Note [Arity trimming for CPR signatures]
sig = mkCprSigForArity (idArity id) rhs_ty' sig = mkCprSigForArity (idArity id) rhs_ty'
id' = setIdCprInfo id sig id' = setIdCprInfo id sig
env' = extendSigEnv env id sig
-- See Note [CPR for thunks] -- See Note [CPR for thunks]
stays_thunk = is_thunk && not_strict stays_thunk = is_thunk && not_strict
...@@ -325,15 +324,22 @@ cprAnalBind top_lvl env id rhs ...@@ -325,15 +324,22 @@ cprAnalBind top_lvl env id rhs
(_, ret_ty) = splitPiTys (idType id) (_, ret_ty) = splitPiTys (idType id)
not_a_prod = isNothing (deepSplitProductType_maybe (ae_fam_envs env) ret_ty) not_a_prod = isNothing (deepSplitProductType_maybe (ae_fam_envs env) ret_ty)
returns_sum = not (isTopLevel top_lvl) && not_a_prod returns_sum = not (isTopLevel top_lvl) && not_a_prod
-- See Note [CPR for expandable unfoldings]
will_expand = isJust (cprExpandUnfolding_maybe id)
cprExpandUnfolding_maybe :: Id -> Maybe CoreExpr isDataStructure :: Id -> CoreExpr -> Bool
cprExpandUnfolding_maybe id = do -- See Note [CPR for data structures]
guard (idArity id == 0) isDataStructure id rhs =
idArity id == 0 && exprIsHNF rhs
-- | Returns an expandable unfolding
-- (See Note [exprIsExpandable] in "GHC.Core.Utils") that has
-- So effectively is a constructor application.
cprDataStructureUnfolding_maybe :: Id -> Maybe CoreExpr
cprDataStructureUnfolding_maybe id = do
-- There are only FinalPhase Simplifier runs after CPR analysis -- There are only FinalPhase Simplifier runs after CPR analysis
guard (activeInFinalPhase (idInlineActivation id)) guard (activeInFinalPhase (idInlineActivation id))
expandUnfolding_maybe (idUnfolding id) unf <- expandUnfolding_maybe (idUnfolding id)
guard (isDataStructure id unf)
return unf
{- Note [Arity trimming for CPR signatures] {- Note [Arity trimming for CPR signatures]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...@@ -394,15 +400,15 @@ emptyAnalEnv fam_envs ...@@ -394,15 +400,15 @@ emptyAnalEnv fam_envs
, ae_fam_envs = fam_envs , ae_fam_envs = fam_envs
} }
-- | Extend an environment with the strictness IDs attached to the id -- | Extend an environment with the CPR sigs attached to the id
extendAnalEnvs :: AnalEnv -> [Id] -> AnalEnv extendSigEnvList :: AnalEnv -> [Id] -> AnalEnv
extendAnalEnvs env ids extendSigEnvList env ids
= env { ae_sigs = sigs' } = env { ae_sigs = sigs' }
where where
sigs' = extendVarEnvList (ae_sigs env) [ (id, idCprInfo id) | id <- ids ] sigs' = extendVarEnvList (ae_sigs env) [ (id, idCprInfo id) | id <- ids ]
extendAnalEnv :: AnalEnv -> Id -> CprSig -> AnalEnv extendSigEnv :: AnalEnv -> Id -> CprSig -> AnalEnv
extendAnalEnv env id sig extendSigEnv env id sig
= env { ae_sigs = extendVarEnv (ae_sigs env) id sig } = env { ae_sigs = extendVarEnv (ae_sigs env) id sig }
lookupSigEnv :: AnalEnv -> Id -> Maybe CprSig lookupSigEnv :: AnalEnv -> Id -> Maybe CprSig
...@@ -411,17 +417,17 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id ...@@ -411,17 +417,17 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
nonVirgin :: AnalEnv -> AnalEnv nonVirgin :: AnalEnv -> AnalEnv
nonVirgin env = env { ae_virgin = False } nonVirgin env = env { ae_virgin = False }
-- | A version of 'extendAnalEnv' for a binder of which we don't see the RHS -- | A version of 'extendSigEnv' for a binder of which we don't see the RHS
-- needed to compute a 'CprSig' (e.g. lambdas and DataAlt field binders). -- needed to compute a 'CprSig' (e.g. lambdas and DataAlt field binders).
-- In this case, we can still look at their demand to attach CPR signatures -- In this case, we can still look at their demand to attach CPR signatures
-- anticipating the unboxing done by worker/wrapper. -- anticipating the unboxing done by worker/wrapper.
-- See Note [CPR for binders that will be unboxed]. -- See Note [CPR for binders that will be unboxed].
extendAnalEnvForDemand :: AnalEnv -> Id -> Demand -> AnalEnv extendSigEnvForDemand :: AnalEnv -> Id -> Demand -> AnalEnv
extendAnalEnvForDemand env id dmd extendSigEnvForDemand env id dmd
| isId id | isId id
, Just (_, DataConAppContext { dcac_dc = dc }) , Just (_, DataConAppContext { dcac_dc = dc })
<- wantToUnbox (ae_fam_envs env) has_inlineable_prag (idType id) dmd <- wantToUnbox (ae_fam_envs env) has_inlineable_prag (idType id) dmd
= extendAnalEnv env id (CprSig (conCprType (dataConTag dc))) = extendSigEnv env id (CprSig (conCprType (dataConTag dc)))
| otherwise | otherwise
= env = env
where where
...@@ -436,7 +442,7 @@ extendEnvForDataAlt :: AnalEnv -> CoreExpr -> Id -> DataCon -> [Var] -> AnalEnv ...@@ -436,7 +442,7 @@ extendEnvForDataAlt :: AnalEnv -> CoreExpr -> Id -> DataCon -> [Var] -> AnalEnv
extendEnvForDataAlt env scrut case_bndr dc bndrs extendEnvForDataAlt env scrut case_bndr dc bndrs
= foldl' do_con_arg env' ids_w_strs = foldl' do_con_arg env' ids_w_strs
where where
env' = extendAnalEnv env case_bndr (CprSig case_bndr_ty) env' = extendSigEnv env case_bndr (CprSig case_bndr_ty)
ids_w_strs = filter isId bndrs `zip` dataConRepStrictness dc ids_w_strs = filter isId bndrs `zip` dataConRepStrictness dc
...@@ -460,7 +466,7 @@ extendEnvForDataAlt env scrut case_bndr dc bndrs ...@@ -460,7 +466,7 @@ extendEnvForDataAlt env scrut case_bndr dc bndrs
| is_var scrut | is_var scrut
-- See Note [Add demands for strict constructors] in GHC.Core.Opt.WorkWrap.Utils -- See Note [Add demands for strict constructors] in GHC.Core.Opt.WorkWrap.Utils
, let dmd = applyWhen (isMarkedStrict str) strictifyDmd (idDemandInfo id) , let dmd = applyWhen (isMarkedStrict str) strictifyDmd (idDemandInfo id)
= extendAnalEnvForDemand env id dmd = extendSigEnvForDemand env id dmd
| otherwise | otherwise
= env = env
...@@ -645,46 +651,72 @@ assumption is that error cases are rarely entered and we are diverging anyway, ...@@ -645,46 +651,72 @@ assumption is that error cases are rarely entered and we are diverging anyway,
so WW doesn't hurt. so WW doesn't hurt.
Should we also trim CPR on DataCon application bindings? Should we also trim CPR on DataCon application bindings?
See Note [CPR for expandable unfoldings]! See Note [CPR for data structures]!
Note [CPR for expandable unfoldings] Note [CPR for data structures]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Long static data structures (whether top-level or not) like Long static data structures (whether top-level or not) like
xs = x1 : xs1 xs = x1 : xs1
xs1 = x2 : xs2 xs1 = x2 : xs2
xs2 = x3 : xs3 xs2 = x3 : xs3
should not get CPR signatures, because they should not get CPR signatures (#18154), because they
* Never get WW'd, so their CPR signature should be irrelevant after analysis * Never get WW'd, so their CPR signature should be irrelevant after analysis
(in fact the signature might even be harmful for that reason) (in fact the signature might even be harmful for that reason)
* Would need to be inlined/expanded to see their constructed product * Would need to be inlined/expanded to see their constructed product
* Recording CPR on them blows up interface file sizes and is redundant with * Recording CPR on them blows up interface file sizes and is redundant with
their unfolding. In case of Nested CPR, this blow-up can be quadratic! their unfolding. In case of Nested CPR, this blow-up can be quadratic!
Reason: the CPR info for xs1 contains the CPR info for xs; the CPR info
for xs2 contains that for xs1. And so on.
Hence we don't analyse or annotate data structures in 'cprAnalBind'. To
implement this, the isDataStructure guard is triggered for bindings that satisfy
But we can't just stop giving DataCon application bindings the CPR property, (1) idArity id == 0 (otherwise it's a function)
(2) exprIsHNF rhs (otherwise it's a thunk, Note [CPR for thunks] applies)
But we can't just stop giving DataCon application bindings the CPR *property*,
for example for example
fac 0 = 1 fac 0 = I# 1#
fac n = n * fac (n-1) fac n = n * fac (n-1)
fac certainly has the CPR property and should be WW'd! But FloatOut will fac certainly has the CPR property and should be WW'd! But FloatOut will
transform the first clause to transform the first clause to
lvl = 1 lvl = I# 1#
fac 0 = lvl fac 0 = lvl
If lvl doesn't have the CPR property, fac won't either. But lvl doesn't have a If lvl doesn't have the CPR property, fac won't either. But lvl is a data
CPR signature to extrapolate into a CPR transformer ('cprTransform'). So structure, and hence (see above) will not have a CPR signature. So instead, when
instead we keep on cprAnal'ing through *expandable* unfoldings for these arity 'cprAnal' meets a variable lacking a CPR signature to extrapolate into a CPR
0 bindings via 'cprExpandUnfolding_maybe'. transformer, 'cprTransform' instead tries to get its unfolding (via
'cprDataStructureUnfolding_maybe'), and analyses that instead.
In practice, GHC generates a lot of (nested) TyCon and KindRep bindings, one In practice, GHC generates a lot of (nested) TyCon and KindRep bindings, one
for each data declaration. It's wasteful to attach CPR signatures to each of for each data declaration. They should not have CPR signatures (blow up!).
them (and intractable in case of Nested CPR).
There is a perhaps surprising special case: KindRep bindings satisfy
Tracked by #18154. 'isDataStructure' (so no CPR signature), but are marked NOINLINE at the same
time (see the noinline wrinkle in Note [Grand plan for Typeable]). So there is
no unfolding for 'cprDataStructureUnfolding_maybe' to look through and we'll
return topCprType. And that is fine! We should refrain to look through NOINLINE
data structures in general, as a constructed product could never be exposed
after WW.
It's also worth pointing out how ad-hoc this is: If we instead had
f1 x = x:[]
f2 x = x : f1 x
f3 x = x : f2 x
...
we still give every function an every deepening CPR signature. But it's very
uncommon to find code like this, whereas the long static data structures from
the beginning of this Note are very common because of GHC's strategy of ANF'ing
data structure RHSs.
Note [CPR examples] Note [CPR examples]
~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~
......
...@@ -92,7 +92,7 @@ T7360.$trModule ...@@ -92,7 +92,7 @@ T7360.$trModule
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0} -- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep :: GHC.Types.KindRep $krep :: GHC.Types.KindRep
[GblId, Cpr=m1, Unf=OtherCon []] [GblId, Unf=OtherCon []]
$krep $krep
= GHC.Types.KindRepTyConApp = GHC.Types.KindRepTyConApp
GHC.Types.$tcInt (GHC.Types.[] @GHC.Types.KindRep) GHC.Types.$tcInt (GHC.Types.[] @GHC.Types.KindRep)
...@@ -127,7 +127,7 @@ T7360.$tcFoo ...@@ -127,7 +127,7 @@ T7360.$tcFoo
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0} -- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
T7360.$tc'Foo4 [InlPrag=NOUSERINLINE[~]] :: GHC.Types.KindRep T7360.$tc'Foo4 [InlPrag=NOUSERINLINE[~]] :: GHC.Types.KindRep
[GblId, Cpr=m1, Unf=OtherCon []] [GblId, Unf=OtherCon []]
T7360.$tc'Foo4 T7360.$tc'Foo4
= GHC.Types.KindRepTyConApp = GHC.Types.KindRepTyConApp
T7360.$tcFoo (GHC.Types.[] @GHC.Types.KindRep) T7360.$tcFoo (GHC.Types.[] @GHC.Types.KindRep)
...@@ -190,7 +190,7 @@ T7360.$tc'Foo2 ...@@ -190,7 +190,7 @@ T7360.$tc'Foo2
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} -- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
T7360.$tc'Foo9 [InlPrag=NOUSERINLINE[~]] :: GHC.Types.KindRep T7360.$tc'Foo9 [InlPrag=NOUSERINLINE[~]] :: GHC.Types.KindRep
[GblId, Cpr=m4, Unf=OtherCon []] [GblId, Unf=OtherCon []]
T7360.$tc'Foo9 = GHC.Types.KindRepFun $krep T7360.$tc'Foo4 T7360.$tc'Foo9 = GHC.Types.KindRepFun $krep T7360.$tc'Foo4
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment