Commit 3d38e828 authored by Simon Peyton Jones's avatar Simon Peyton Jones

Do not unpack class dictionaries with INLINABLE

Matthew Pickering uncovered a bad performance hole in the way
that single-method dictionaries work, described in Trac #14955.

See Note [Do not unpack class dictionaries] in WwLib.

I tried to fix this 6 years ago, but got it slightly wrong.  This patch
fixes it, which makes a dramatic improvement in the test case.

Nofib highlights: not much happening:

  Program           Size    Allocs   Runtime   Elapsed  TotalMem
-----------------------------------------------------------------
      VSM          -0.3%     +2.7%     -7.4%     -7.4%      0.0%
cacheprof          -0.0%     +0.1%     +0.3%     +0.7%      0.0%
  integer          -0.0%     +1.1%     +7.5%     +7.5%      0.0%
      tak          -0.1%     -0.2%     0.024     0.024      0.0%
-----------------------------------------------------------------
      Min          -4.4%     -0.2%     -7.4%     -7.4%     -8.0%
      Max          +0.6%     +2.7%     +7.5%     +7.5%      0.0%
Geom Mean          -0.1%     +0.0%     +0.1%     +0.1%     -0.2%

I investigated VSM.  The patch unpacks class dictionaries a bit more
than before (i.e. does so if there is no INLINABLE pragma). And that
gives better code in VSM (less dictionary selection etc), but one closure
gets one word bigger.

I'll accept these changes in exchange for more robust performance.

Some ghci.debugger output wobbled around (order of bindings
being displayed). I have no idea why; but I accepted the changes.
parent 625eea93
...@@ -494,8 +494,7 @@ splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> ...@@ -494,8 +494,7 @@ splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult ->
splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
= WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do
-- The arity should match the signature -- The arity should match the signature
stuff <- mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty stuff <- mkWwBodies dflags fam_envs rhs_fvs fn_id wrap_dmds use_res_info
wrap_dmds use_res_info
case stuff of case stuff of
Just (work_demands, join_arity, wrap_fn, work_fn) -> do Just (work_demands, join_arity, wrap_fn, work_fn) -> do
work_uniq <- getUniqueM work_uniq <- getUniqueM
...@@ -527,7 +526,7 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs ...@@ -527,7 +526,7 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
`setInlinePragma` work_prag `setInlinePragma` work_prag
`setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info) `setIdUnfolding` mkWorkerUnfolding dflags work_fn fn_unfolding
-- See Note [Worker-wrapper for INLINABLE functions] -- See Note [Worker-wrapper for INLINABLE functions]
`setIdStrictness` mkClosedStrictSig work_demands work_res_info `setIdStrictness` mkClosedStrictSig work_demands work_res_info
...@@ -576,13 +575,12 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs ...@@ -576,13 +575,12 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
Nothing -> return [(fn_id, rhs)] Nothing -> return [(fn_id, rhs)]
where where
mb_join_arity = isJoinId_maybe fn_id
rhs_fvs = exprFreeVars rhs rhs_fvs = exprFreeVars rhs
fun_ty = idType fn_id
fn_inl_prag = inlinePragInfo fn_info fn_inl_prag = inlinePragInfo fn_info
fn_inline_spec = inl_inline fn_inl_prag fn_inline_spec = inl_inline fn_inl_prag
fn_act = inl_act fn_inl_prag fn_act = inl_act fn_inl_prag
rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag
fn_unfolding = unfoldingInfo fn_info
arity = arityInfo fn_info arity = arityInfo fn_info
-- The arity is set by the simplifier using exprEtaExpandArity -- The arity is set by the simplifier using exprEtaExpandArity
-- So it may be more than the number of top-level-visible lambdas -- So it may be more than the number of top-level-visible lambdas
...@@ -691,7 +689,7 @@ then the splitting will go deeper too. ...@@ -691,7 +689,7 @@ then the splitting will go deeper too.
splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
splitThunk dflags fam_envs is_rec fn_id rhs splitThunk dflags fam_envs is_rec fn_id rhs
= ASSERT(not (isJoinId fn_id)) = ASSERT(not (isJoinId fn_id))
do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id] do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False [fn_id]
; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ] ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive
return res return res
......
...@@ -123,8 +123,7 @@ mkWwBodies :: DynFlags ...@@ -123,8 +123,7 @@ mkWwBodies :: DynFlags
-> FamInstEnvs -> FamInstEnvs
-> VarSet -- Free vars of RHS -> VarSet -- Free vars of RHS
-- See Note [Freshen WW arguments] -- See Note [Freshen WW arguments]
-> Maybe JoinArity -- Just ar <=> is join point with join arity ar -> Id -- The original function
-> Type -- Type of original function
-> [Demand] -- Strictness of original function -> [Demand] -- Strictness of original function
-> DmdResult -- Info about function result -> DmdResult -- Info about function result
-> UniqSM (Maybe WwResult) -> UniqSM (Maybe WwResult)
...@@ -140,12 +139,14 @@ mkWwBodies :: DynFlags ...@@ -140,12 +139,14 @@ mkWwBodies :: DynFlags
-- let x = (a,b) in -- let x = (a,b) in
-- E -- E
mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info mkWwBodies dflags fam_envs rhs_fvs fun_id demands res_info
= do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs) = do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs)
-- See Note [Freshen WW arguments] -- See Note [Freshen WW arguments]
; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs empty_subst fun_ty demands ; (wrap_args, wrap_fn_args, work_fn_args, res_ty)
; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args <- mkWWargs empty_subst fun_ty demands
; (useful1, work_args, wrap_fn_str, work_fn_str)
<- mkWWstr dflags fam_envs has_inlineable_prag wrap_args
-- Do CPR w/w. See Note [Always do CPR w/w] -- Do CPR w/w. See Note [Always do CPR w/w]
; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
...@@ -158,7 +159,7 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info ...@@ -158,7 +159,7 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
; if isWorkerSmallEnough dflags work_args ; if isWorkerSmallEnough dflags work_args
&& not (too_many_args_for_join_point wrap_args) && not (too_many_args_for_join_point wrap_args)
&& (useful1 && not only_one_void_argument || useful2) && ((useful1 && not only_one_void_argument) || useful2)
then return (Just (worker_args_dmds, length work_call_args, then return (Just (worker_args_dmds, length work_call_args,
wrapper_body, worker_body)) wrapper_body, worker_body))
else return Nothing else return Nothing
...@@ -171,6 +172,11 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info ...@@ -171,6 +172,11 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
-- f's RHS is now trivial (size 1) we still want the __inline__ to prevent -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent
-- fw from being inlined into f's RHS -- fw from being inlined into f's RHS
where where
fun_ty = idType fun_id
mb_join_arity = isJoinId_maybe fun_id
has_inlineable_prag = isStableUnfolding (realIdUnfolding fun_id)
-- See Note [Do not unpack class dictionaries]
-- Note [Do not split void functions] -- Note [Do not split void functions]
only_one_void_argument only_one_void_argument
| [d] <- demands | [d] <- demands
...@@ -490,6 +496,8 @@ To avoid this: ...@@ -490,6 +496,8 @@ To avoid this:
mkWWstr :: DynFlags mkWWstr :: DynFlags
-> FamInstEnvs -> FamInstEnvs
-> Bool -- True <=> INLINEABLE pragama on this function defn
-- See Note [Do not unpack class dictionaries]
-> [Var] -- Wrapper args; have their demand info on them -> [Var] -- Wrapper args; have their demand info on them
-- *Includes type variables* -- *Includes type variables*
-> UniqSM (Bool, -- Is this useful -> UniqSM (Bool, -- Is this useful
...@@ -501,13 +509,18 @@ mkWWstr :: DynFlags ...@@ -501,13 +509,18 @@ mkWWstr :: DynFlags
CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function, CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function,
-- and lacking its lambdas. -- and lacking its lambdas.
-- This fn does the reboxing -- This fn does the reboxing
mkWWstr _ _ [] mkWWstr dflags fam_envs has_inlineable_prag args
= return (False, [], nop_fn, nop_fn) = go args
where
go_one arg = mkWWstr_one dflags fam_envs has_inlineable_prag arg
mkWWstr dflags fam_envs (arg : args) = do go [] = return (False, [], nop_fn, nop_fn)
(useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg go (arg : args) = do { (useful1, args1, wrap_fn1, work_fn1) <- go_one arg
(useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args ; (useful2, args2, wrap_fn2, work_fn2) <- go args
return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2) ; return ( useful1 || useful2
, args1 ++ args2
, wrap_fn1 . wrap_fn2
, work_fn1 . work_fn2) }
{- {-
Note [Unpacking arguments with product and polymorphic demands] Note [Unpacking arguments with product and polymorphic demands]
...@@ -544,9 +557,12 @@ as-yet-un-filled-in pkgState files. ...@@ -544,9 +557,12 @@ as-yet-un-filled-in pkgState files.
-- brings into scope work_args (via cases) -- brings into scope work_args (via cases)
-- * work_fn assumes work_args are in scope, a -- * work_fn assumes work_args are in scope, a
-- brings into scope wrap_arg (via lets) -- brings into scope wrap_arg (via lets)
mkWWstr_one :: DynFlags -> FamInstEnvs -> Var mkWWstr_one :: DynFlags -> FamInstEnvs
-> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) -> Bool -- True <=> INLINEABLE pragama on this function defn
mkWWstr_one dflags fam_envs arg -- See Note [Do not unpack class dictionaries]
-> Var
-> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr)
mkWWstr_one dflags fam_envs has_inlineable_prag arg
| isTyVar arg | isTyVar arg
= return (False, [arg], nop_fn, nop_fn) = return (False, [arg], nop_fn, nop_fn)
...@@ -581,8 +597,10 @@ mkWWstr_one dflags fam_envs arg ...@@ -581,8 +597,10 @@ mkWWstr_one dflags fam_envs arg
| isStrictDmd dmd | isStrictDmd dmd
, Just cs <- splitProdDmd_maybe dmd , Just cs <- splitProdDmd_maybe dmd
-- See Note [Unpacking arguments with product and polymorphic demands] -- See Note [Unpacking arguments with product and polymorphic demands]
, not (has_inlineable_prag && isClassPred arg_ty)
-- See Note [Do not unpack class dictionaries]
, Just (data_con, inst_tys, inst_con_arg_tys, co) , Just (data_con, inst_tys, inst_con_arg_tys, co)
<- deepSplitProductType_maybe fam_envs (idType arg) <- deepSplitProductType_maybe fam_envs arg_ty
, cs `equalLength` inst_con_arg_tys , cs `equalLength` inst_con_arg_tys
-- See Note [mkWWstr and unsafeCoerce] -- See Note [mkWWstr and unsafeCoerce]
= do { (uniq1:uniqs) <- getUniquesM = do { (uniq1:uniqs) <- getUniquesM
...@@ -594,7 +612,7 @@ mkWWstr_one dflags fam_envs arg ...@@ -594,7 +612,7 @@ mkWWstr_one dflags fam_envs arg
-- in Simplify.hs; and see Trac #13890 -- in Simplify.hs; and see Trac #13890
rebox_fn = Let (NonRec arg_no_unf con_app) rebox_fn = Let (NonRec arg_no_unf con_app)
con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co
; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args
; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) } ; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) }
-- Don't pass the arg, rebox instead -- Don't pass the arg, rebox instead
...@@ -602,7 +620,8 @@ mkWWstr_one dflags fam_envs arg ...@@ -602,7 +620,8 @@ mkWWstr_one dflags fam_envs arg
= return (False, [arg], nop_fn, nop_fn) = return (False, [arg], nop_fn, nop_fn)
where where
dmd = idDemandInfo arg arg_ty = idType arg
dmd = idDemandInfo arg
mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd
---------------------- ----------------------
...@@ -680,10 +699,12 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get ...@@ -680,10 +699,12 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get
and the type-class specialiser can't specialise that. An example is and the type-class specialiser can't specialise that. An example is
Trac #6056. Trac #6056.
Moreover, dictionaries can have a lot of fields, so unpacking them can But in any other situation a dictionary is just an ordinary value,
increase closure sizes. and can be unpacked. So we track the INLINABLE pragma, and switch
off the unpacking in mkWWstr_one (see the isClassPred test).
Conclusion: don't unpack dictionaries. Historical note: Trac #14955 describes how I got this fix wrong
the first time.
-} -}
deepSplitProductType_maybe deepSplitProductType_maybe
...@@ -699,7 +720,6 @@ deepSplitProductType_maybe fam_envs ty ...@@ -699,7 +720,6 @@ deepSplitProductType_maybe fam_envs ty
`orElse` (mkRepReflCo ty, ty) `orElse` (mkRepReflCo ty, ty)
, Just (tc, tc_args) <- splitTyConApp_maybe ty1 , Just (tc, tc_args) <- splitTyConApp_maybe ty1
, Just con <- isDataProductTyCon_maybe tc , Just con <- isDataProductTyCon_maybe tc
, not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries]
, let arg_tys = dataConInstArgTys con tc_args , let arg_tys = dataConInstArgTys con tc_args
strict_marks = dataConRepStrictness con strict_marks = dataConRepStrictness con
= Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co) = Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co)
......
...@@ -4,14 +4,14 @@ f :: Integer -> a = _ ...@@ -4,14 +4,14 @@ f :: Integer -> a = _
x :: Integer = 1 x :: Integer = 1
xs :: [Integer] = [2,3] xs :: [Integer] = [2,3]
xs :: [Integer] = [2,3] xs :: [Integer] = [2,3]
x :: Integer = 1
f :: Integer -> a = _ f :: Integer -> a = _
x :: Integer = 1
_result :: [a] = _ _result :: [a] = _
y = (_t1::a) y = (_t1::a)
y = 2 y = 2
xs :: [Integer] = [2,3] xs :: [Integer] = [2,3]
x :: Integer = 1
f :: Integer -> Integer = _ f :: Integer -> Integer = _
x :: Integer = 1
_result :: [Integer] = _ _result :: [Integer] = _
y :: Integer = 2 y :: Integer = 2
_t1 :: Integer = 2 _t1 :: Integer = 2
......
...@@ -20,8 +20,8 @@ _result :: a ...@@ -20,8 +20,8 @@ _result :: a
f :: Integer -> a f :: Integer -> a
x :: Integer x :: Integer
xs :: [t] = [] xs :: [t] = []
x :: Integer = 2
f :: Integer -> a = _ f :: Integer -> a = _
x :: Integer = 2
_result :: a = _ _result :: a = _
_result = 3 _result = 3
Logged breakpoint at Test3.hs:2:18-31 Logged breakpoint at Test3.hs:2:18-31
......
...@@ -20,8 +20,8 @@ _result :: a ...@@ -20,8 +20,8 @@ _result :: a
f :: Integer -> a f :: Integer -> a
x :: Integer x :: Integer
xs :: [t] = [] xs :: [t] = []
x :: Integer = 2
f :: Integer -> a = _ f :: Integer -> a = _
x :: Integer = 2
_result :: a = _ _result :: a = _
_result = 3 _result = 3
Logged breakpoint at Test3.hs:2:18-31 Logged breakpoint at Test3.hs:2:18-31
......
...@@ -2,3 +2,7 @@ Rule fired: Class op signum (BUILTIN) ...@@ -2,3 +2,7 @@ Rule fired: Class op signum (BUILTIN)
Rule fired: Class op abs (BUILTIN) Rule fired: Class op abs (BUILTIN)
Rule fired: Class op heq_sel (BUILTIN) Rule fired: Class op heq_sel (BUILTIN)
Rule fired: normalize/Double (T7837) Rule fired: normalize/Double (T7837)
Rule fired: Class op heq_sel (BUILTIN)
Rule fired: Class op $p1Norm (BUILTIN)
Rule fired: Class op / (BUILTIN)
Rule fired: Class op norm (BUILTIN)
module Main where
import T14955a
--test1 :: [Bool] -> Bool
--test1 = ors
--test2 :: [Bool] -> Bool
--test2 = dors boolDict
--test2a :: [Bool] -> Bool
--test2a xs = dors boolDict xs
test3 :: [Bool] -> Bool
test3 xs = pors xs
--test4 :: [Bool] -> Bool
--test4 xs = porsProxy xs
main = print (test3 (replicate 1000000 False))
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
module T14955a where
import Prelude (Bool(..), (||), (&&))
-- Implementation 1
class Prop r where
or :: r -> r -> r
and :: r -> r -> r
true :: r
false :: r
instance Prop Bool where
or = (||)
and = (&&)
true = True
false = False
-- Implementation 2
data PropDict r = PropDict {
dor :: r -> r -> r
, dand :: r -> r -> r
, dtrue :: r
, dfalse :: r
}
boolDict = PropDict {
dor = (||)
, dand = (&&)
, dtrue = True
, dfalse = False }
-- Implementation 3
class PropProxy r where
propDict :: PropDict r
instance PropProxy Bool where
propDict = boolDict
-- Implementation 4
class PropProxy2 r where
propDict2 :: PropDict r
dummy :: ()
instance PropProxy2 Bool where
propDict2 = boolDict
dummy = ()
ors :: Prop r => [r] -> r
ors [] = true
ors (o:os) = o `or` ors os
{-# INLINABLE ors #-}
dors :: PropDict r -> [r] -> r
dors pd [] = dtrue pd
dors pd (o:os) = dor pd o (dors pd os)
pors :: PropProxy r => [r] -> r
pors [] = dtrue propDict
pors (o:os) = dor propDict o (pors os)
{-# INLINABLE pors #-}
porsProxy :: PropProxy2 r => [r] -> r
porsProxy [] = dtrue propDict2
porsProxy (o:os) = dor propDict2 o (porsProxy os)
{-# INLINABLE porsProxy #-}
...@@ -31,6 +31,15 @@ test('T10359', ...@@ -31,6 +31,15 @@ test('T10359',
compile_and_run, compile_and_run,
['-O']) ['-O'])
test('T14955',
[stats_num_field('bytes allocated',
[(wordsize(64), 48050760, 5),
(wordsize(32), 351508, 5)]),
only_ways(['normal'])
],
multimod_compile_and_run,
['T14955', '-O'])
# fortunately the values here are mostly independent of the wordsize, # fortunately the values here are mostly independent of the wordsize,
# because the test allocates an unboxed array of doubles. # because the test allocates an unboxed array of doubles.
test('T3586', test('T3586',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment