Commit 3d38e828 authored by Simon Peyton Jones's avatar Simon Peyton Jones

Do not unpack class dictionaries with INLINABLE

Matthew Pickering uncovered a bad performance hole in the way
that single-method dictionaries work, described in Trac #14955.

See Note [Do not unpack class dictionaries] in WwLib.

I tried to fix this 6 years ago, but got it slightly wrong.  This patch
fixes it, which makes a dramatic improvement in the test case.

Nofib highlights: not much happening:

  Program           Size    Allocs   Runtime   Elapsed  TotalMem
-----------------------------------------------------------------
      VSM          -0.3%     +2.7%     -7.4%     -7.4%      0.0%
cacheprof          -0.0%     +0.1%     +0.3%     +0.7%      0.0%
  integer          -0.0%     +1.1%     +7.5%     +7.5%      0.0%
      tak          -0.1%     -0.2%     0.024     0.024      0.0%
-----------------------------------------------------------------
      Min          -4.4%     -0.2%     -7.4%     -7.4%     -8.0%
      Max          +0.6%     +2.7%     +7.5%     +7.5%      0.0%
Geom Mean          -0.1%     +0.0%     +0.1%     +0.1%     -0.2%

I investigated VSM.  The patch unpacks class dictionaries a bit more
than before (i.e. does so if there is no INLINABLE pragma). And that
gives better code in VSM (less dictionary selection etc), but one closure
gets one word bigger.

I'll accept these changes in exchange for more robust performance.

Some ghci.debugger output wobbled around (order of bindings
being displayed). I have no idea why; but I accepted the changes.
parent 625eea93
......@@ -494,8 +494,7 @@ splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult ->
splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
= WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do
-- The arity should match the signature
stuff <- mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty
wrap_dmds use_res_info
stuff <- mkWwBodies dflags fam_envs rhs_fvs fn_id wrap_dmds use_res_info
case stuff of
Just (work_demands, join_arity, wrap_fn, work_fn) -> do
work_uniq <- getUniqueM
......@@ -527,7 +526,7 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
`setInlinePragma` work_prag
`setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info)
`setIdUnfolding` mkWorkerUnfolding dflags work_fn fn_unfolding
-- See Note [Worker-wrapper for INLINABLE functions]
`setIdStrictness` mkClosedStrictSig work_demands work_res_info
......@@ -576,13 +575,12 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
Nothing -> return [(fn_id, rhs)]
where
mb_join_arity = isJoinId_maybe fn_id
rhs_fvs = exprFreeVars rhs
fun_ty = idType fn_id
fn_inl_prag = inlinePragInfo fn_info
fn_inline_spec = inl_inline fn_inl_prag
fn_act = inl_act fn_inl_prag
rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag
fn_unfolding = unfoldingInfo fn_info
arity = arityInfo fn_info
-- The arity is set by the simplifier using exprEtaExpandArity
-- So it may be more than the number of top-level-visible lambdas
......@@ -691,7 +689,7 @@ then the splitting will go deeper too.
splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
splitThunk dflags fam_envs is_rec fn_id rhs
= ASSERT(not (isJoinId fn_id))
do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id]
do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False [fn_id]
; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive
return res
......
......@@ -123,8 +123,7 @@ mkWwBodies :: DynFlags
-> FamInstEnvs
-> VarSet -- Free vars of RHS
-- See Note [Freshen WW arguments]
-> Maybe JoinArity -- Just ar <=> is join point with join arity ar
-> Type -- Type of original function
-> Id -- The original function
-> [Demand] -- Strictness of original function
-> DmdResult -- Info about function result
-> UniqSM (Maybe WwResult)
......@@ -140,12 +139,14 @@ mkWwBodies :: DynFlags
-- let x = (a,b) in
-- E
mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
mkWwBodies dflags fam_envs rhs_fvs fun_id demands res_info
= do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs)
-- See Note [Freshen WW arguments]
; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs empty_subst fun_ty demands
; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args
; (wrap_args, wrap_fn_args, work_fn_args, res_ty)
<- mkWWargs empty_subst fun_ty demands
; (useful1, work_args, wrap_fn_str, work_fn_str)
<- mkWWstr dflags fam_envs has_inlineable_prag wrap_args
-- Do CPR w/w. See Note [Always do CPR w/w]
; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
......@@ -158,7 +159,7 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
; if isWorkerSmallEnough dflags work_args
&& not (too_many_args_for_join_point wrap_args)
&& (useful1 && not only_one_void_argument || useful2)
&& ((useful1 && not only_one_void_argument) || useful2)
then return (Just (worker_args_dmds, length work_call_args,
wrapper_body, worker_body))
else return Nothing
......@@ -171,6 +172,11 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
-- f's RHS is now trivial (size 1) we still want the __inline__ to prevent
-- fw from being inlined into f's RHS
where
fun_ty = idType fun_id
mb_join_arity = isJoinId_maybe fun_id
has_inlineable_prag = isStableUnfolding (realIdUnfolding fun_id)
-- See Note [Do not unpack class dictionaries]
-- Note [Do not split void functions]
only_one_void_argument
| [d] <- demands
......@@ -490,6 +496,8 @@ To avoid this:
mkWWstr :: DynFlags
-> FamInstEnvs
-> Bool -- True <=> INLINEABLE pragama on this function defn
-- See Note [Do not unpack class dictionaries]
-> [Var] -- Wrapper args; have their demand info on them
-- *Includes type variables*
-> UniqSM (Bool, -- Is this useful
......@@ -501,13 +509,18 @@ mkWWstr :: DynFlags
CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function,
-- and lacking its lambdas.
-- This fn does the reboxing
mkWWstr _ _ []
= return (False, [], nop_fn, nop_fn)
mkWWstr dflags fam_envs has_inlineable_prag args
= go args
where
go_one arg = mkWWstr_one dflags fam_envs has_inlineable_prag arg
mkWWstr dflags fam_envs (arg : args) = do
(useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg
(useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args
return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2)
go [] = return (False, [], nop_fn, nop_fn)
go (arg : args) = do { (useful1, args1, wrap_fn1, work_fn1) <- go_one arg
; (useful2, args2, wrap_fn2, work_fn2) <- go args
; return ( useful1 || useful2
, args1 ++ args2
, wrap_fn1 . wrap_fn2
, work_fn1 . work_fn2) }
{-
Note [Unpacking arguments with product and polymorphic demands]
......@@ -544,9 +557,12 @@ as-yet-un-filled-in pkgState files.
-- brings into scope work_args (via cases)
-- * work_fn assumes work_args are in scope, a
-- brings into scope wrap_arg (via lets)
mkWWstr_one :: DynFlags -> FamInstEnvs -> Var
mkWWstr_one :: DynFlags -> FamInstEnvs
-> Bool -- True <=> INLINEABLE pragama on this function defn
-- See Note [Do not unpack class dictionaries]
-> Var
-> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr)
mkWWstr_one dflags fam_envs arg
mkWWstr_one dflags fam_envs has_inlineable_prag arg
| isTyVar arg
= return (False, [arg], nop_fn, nop_fn)
......@@ -581,8 +597,10 @@ mkWWstr_one dflags fam_envs arg
| isStrictDmd dmd
, Just cs <- splitProdDmd_maybe dmd
-- See Note [Unpacking arguments with product and polymorphic demands]
, not (has_inlineable_prag && isClassPred arg_ty)
-- See Note [Do not unpack class dictionaries]
, Just (data_con, inst_tys, inst_con_arg_tys, co)
<- deepSplitProductType_maybe fam_envs (idType arg)
<- deepSplitProductType_maybe fam_envs arg_ty
, cs `equalLength` inst_con_arg_tys
-- See Note [mkWWstr and unsafeCoerce]
= do { (uniq1:uniqs) <- getUniquesM
......@@ -594,7 +612,7 @@ mkWWstr_one dflags fam_envs arg
-- in Simplify.hs; and see Trac #13890
rebox_fn = Let (NonRec arg_no_unf con_app)
con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co
; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args
; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args
; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) }
-- Don't pass the arg, rebox instead
......@@ -602,6 +620,7 @@ mkWWstr_one dflags fam_envs arg
= return (False, [arg], nop_fn, nop_fn)
where
arg_ty = idType arg
dmd = idDemandInfo arg
mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd
......@@ -680,10 +699,12 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get
and the type-class specialiser can't specialise that. An example is
Trac #6056.
Moreover, dictionaries can have a lot of fields, so unpacking them can
increase closure sizes.
But in any other situation a dictionary is just an ordinary value,
and can be unpacked. So we track the INLINABLE pragma, and switch
off the unpacking in mkWWstr_one (see the isClassPred test).
Conclusion: don't unpack dictionaries.
Historical note: Trac #14955 describes how I got this fix wrong
the first time.
-}
deepSplitProductType_maybe
......@@ -699,7 +720,6 @@ deepSplitProductType_maybe fam_envs ty
`orElse` (mkRepReflCo ty, ty)
, Just (tc, tc_args) <- splitTyConApp_maybe ty1
, Just con <- isDataProductTyCon_maybe tc
, not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries]
, let arg_tys = dataConInstArgTys con tc_args
strict_marks = dataConRepStrictness con
= Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co)
......
......@@ -4,14 +4,14 @@ f :: Integer -> a = _
x :: Integer = 1
xs :: [Integer] = [2,3]
xs :: [Integer] = [2,3]
x :: Integer = 1
f :: Integer -> a = _
x :: Integer = 1
_result :: [a] = _
y = (_t1::a)
y = 2
xs :: [Integer] = [2,3]
x :: Integer = 1
f :: Integer -> Integer = _
x :: Integer = 1
_result :: [Integer] = _
y :: Integer = 2
_t1 :: Integer = 2
......
......@@ -20,8 +20,8 @@ _result :: a
f :: Integer -> a
x :: Integer
xs :: [t] = []
x :: Integer = 2
f :: Integer -> a = _
x :: Integer = 2
_result :: a = _
_result = 3
Logged breakpoint at Test3.hs:2:18-31
......
......@@ -20,8 +20,8 @@ _result :: a
f :: Integer -> a
x :: Integer
xs :: [t] = []
x :: Integer = 2
f :: Integer -> a = _
x :: Integer = 2
_result :: a = _
_result = 3
Logged breakpoint at Test3.hs:2:18-31
......
......@@ -2,3 +2,7 @@ Rule fired: Class op signum (BUILTIN)
Rule fired: Class op abs (BUILTIN)
Rule fired: Class op heq_sel (BUILTIN)
Rule fired: normalize/Double (T7837)
Rule fired: Class op heq_sel (BUILTIN)
Rule fired: Class op $p1Norm (BUILTIN)
Rule fired: Class op / (BUILTIN)
Rule fired: Class op norm (BUILTIN)
module Main where
import T14955a
--test1 :: [Bool] -> Bool
--test1 = ors
--test2 :: [Bool] -> Bool
--test2 = dors boolDict
--test2a :: [Bool] -> Bool
--test2a xs = dors boolDict xs
test3 :: [Bool] -> Bool
test3 xs = pors xs
--test4 :: [Bool] -> Bool
--test4 xs = porsProxy xs
main = print (test3 (replicate 1000000 False))
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
module T14955a where
import Prelude (Bool(..), (||), (&&))
-- Implementation 1
class Prop r where
or :: r -> r -> r
and :: r -> r -> r
true :: r
false :: r
instance Prop Bool where
or = (||)
and = (&&)
true = True
false = False
-- Implementation 2
data PropDict r = PropDict {
dor :: r -> r -> r
, dand :: r -> r -> r
, dtrue :: r
, dfalse :: r
}
boolDict = PropDict {
dor = (||)
, dand = (&&)
, dtrue = True
, dfalse = False }
-- Implementation 3
class PropProxy r where
propDict :: PropDict r
instance PropProxy Bool where
propDict = boolDict
-- Implementation 4
class PropProxy2 r where
propDict2 :: PropDict r
dummy :: ()
instance PropProxy2 Bool where
propDict2 = boolDict
dummy = ()
ors :: Prop r => [r] -> r
ors [] = true
ors (o:os) = o `or` ors os
{-# INLINABLE ors #-}
dors :: PropDict r -> [r] -> r
dors pd [] = dtrue pd
dors pd (o:os) = dor pd o (dors pd os)
pors :: PropProxy r => [r] -> r
pors [] = dtrue propDict
pors (o:os) = dor propDict o (pors os)
{-# INLINABLE pors #-}
porsProxy :: PropProxy2 r => [r] -> r
porsProxy [] = dtrue propDict2
porsProxy (o:os) = dor propDict2 o (porsProxy os)
{-# INLINABLE porsProxy #-}
......@@ -31,6 +31,15 @@ test('T10359',
compile_and_run,
['-O'])
test('T14955',
[stats_num_field('bytes allocated',
[(wordsize(64), 48050760, 5),
(wordsize(32), 351508, 5)]),
only_ways(['normal'])
],
multimod_compile_and_run,
['T14955', '-O'])
# fortunately the values here are mostly independent of the wordsize,
# because the test allocates an unboxed array of doubles.
test('T3586',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment