Commit dbdf77d9 authored by niteria's avatar niteria

Lift constructor tag allocation out of a loop

Before this change, for each constructor that we want
to allocate a tag for we would traverse a list of all
the constructors in a datatype to determine which tag
a constructor should get.

This is obviously quadratic and for datatypes with 10k
constructors it actually makes a big difference.

This change implements the plan outlined by @simonpj in
https://mail.haskell.org/pipermail/ghc-devs/2017-October/014974.html
which is basically about using a map and constructing it outside the
loop.

One place where things got a bit awkward was TysWiredIn.hs,
it would have been possible to just assign the tags by hand, but
that seemed error-prone to me, so I decided to go through a map
there as well.

Test Plan:
./validate
On a file with 10k constructors
Before:
   8,130,522,344 bytes allocated in the heap
  Total   time    3.682s  (  3.920s elapsed)
After:
   4,133,478,744 bytes allocated in the heap
  Total   time    2.509s  (  2.750s elapsed)

Reviewers: simonpj, bgamari

Reviewed By: simonpj

Subscribers: goldfire, rwbarton, thomie, simonmar, carter, simonpj

GHC Trac Issues: #14657

Differential Revision: https://phabricator.haskell.org/D4289
parent 1577908f
......@@ -75,7 +75,6 @@ import Name
import PrelNames
import Var
import Outputable
import ListSetOps
import Util
import BasicTypes
import FastString
......@@ -862,6 +861,7 @@ mkDataCon :: Name
-> Type -- ^ Original result type
-> RuntimeRepInfo -- ^ See comments on 'TyCon.RuntimeRepInfo'
-> TyCon -- ^ Representation type constructor
-> ConTag -- ^ Constructor tag
-> ThetaType -- ^ The "stupid theta", context of the data
-- declaration e.g. @data Eq a => T a ...@
-> Id -- ^ Worker Id
......@@ -874,7 +874,7 @@ mkDataCon name declared_infix prom_info
fields
univ_tvs ex_tvs user_tvbs
eq_spec theta
orig_arg_tys orig_res_ty rep_info rep_tycon
orig_arg_tys orig_res_ty rep_info rep_tycon tag
stupid_theta work_id rep
-- Warning: mkDataCon is not a good place to check certain invariants.
-- If the programmer writes the wrong result type in the decl, thus:
......@@ -918,7 +918,6 @@ mkDataCon name declared_infix prom_info
-- source-language arguments. We add extra ones for the
-- dictionary arguments right here.
tag = assoc "mkDataCon" (tyConDataCons rep_tycon `zip` [fIRST_TAG..]) con
rep_arg_tys = dataConRepArgTys con
rep_ty =
......
......@@ -27,6 +27,7 @@ import Var
import VarSet
import BasicTypes
import Name
import NameEnv
import MkId
import Class
import TyCon
......@@ -107,13 +108,16 @@ buildDataCon :: FamInstEnvs
-- or the GADT equalities
-> [Type] -> Type -- Argument and result types
-> TyCon -- Rep tycon
-> NameEnv ConTag -- Maps the Name of each DataCon to its
-- ConTag
-> TcRnIf m n DataCon
-- A wrapper for DataCon.mkDataCon that
-- a) makes the worker Id
-- b) makes the wrapper Id if necessary, including
-- allocating its unique (hence monadic)
buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs field_lbls
univ_tvs ex_tvs user_tvbs eq_spec ctxt arg_tys res_ty rep_tycon
buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs
field_lbls univ_tvs ex_tvs user_tvbs eq_spec ctxt arg_tys res_ty
rep_tycon tag_map
= do { wrap_name <- newImplicitBinder src_name mkDataConWrapperOcc
; work_name <- newImplicitBinder src_name mkDataConWorkerOcc
-- This last one takes the name of the data constructor in the source
......@@ -124,10 +128,12 @@ buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs fie
; us <- newUniqueSupply
; dflags <- getDynFlags
; let stupid_ctxt = mkDataConStupidTheta rep_tycon arg_tys univ_tvs
tag = lookupNameEnv_NF tag_map src_name
-- See Note [Constructor tag allocation], fixes #14657
data_con = mkDataCon src_name declared_infix prom_info
src_bangs field_lbls
univ_tvs ex_tvs user_tvbs eq_spec ctxt
arg_tys res_ty NoRRI rep_tycon
arg_tys res_ty NoRRI rep_tycon tag
stupid_ctxt dc_wrk dc_rep
dc_wrk = mkDataConWorkId work_name data_con
dc_rep = initUs_ us (mkDataConRep dflags fam_envs wrap_name
......@@ -307,6 +313,7 @@ buildClass tycon_name binders roles fds
arg_tys
(mkTyConApp rec_tycon (mkTyVarTys univ_tvs))
rec_tycon
(mkTyConTagMap rec_tycon)
; rhs <- case () of
_ | use_newtype
......
......@@ -897,6 +897,9 @@ tcIfaceDataCons tycon_name tycon tc_tybinders if_cons
univ_tvs :: [TyVar]
univ_tvs = binderVars (tyConTyVarBinders tc_tybinders)
tag_map :: NameEnv ConTag
tag_map = mkTyConTagMap tycon
tc_con_decl (IfCon { ifConInfix = is_infix,
ifConExTvs = ex_bndrs,
ifConUserTvBinders = user_bndrs,
......@@ -960,7 +963,7 @@ tcIfaceDataCons tycon_name tycon tc_tybinders if_cons
lbl_names
univ_tvs ex_tvs user_tv_bndrs
eq_spec theta
arg_tys orig_res_ty tycon
arg_tys orig_res_ty tycon tag_map
; traceIf (text "Done interface-file tc_con_decl" <+> ppr dc_name)
; return con }
mk_doc con_name = text "Constructor" <+> ppr con_name
......
......@@ -150,7 +150,7 @@ import TyCon
import Class ( Class, mkClass )
import RdrName
import Name
import NameEnv ( NameEnv, mkNameEnv, lookupNameEnv )
import NameEnv ( NameEnv, mkNameEnv, lookupNameEnv, lookupNameEnv_NF )
import NameSet ( NameSet, mkNameSet, elemNameSet )
import BasicTypes ( Arity, Boxity(..), TupleSort(..), ConTagZ,
SourceText(..) )
......@@ -517,6 +517,13 @@ pcDataConWithFixity' declared_infix dc_name wrk_key rri
tyvars ex_tyvars user_tyvars arg_tys tycon
= data_con
where
tag_map = mkTyConTagMap tycon
-- This constructs the constructor Name to ConTag map once per
-- constructor, which is quadratic. It's OK here, because it's
-- only called for wired in data types that don't have a lot of
-- constructors. It's also likely that GHC will lift tag_map, since
-- we call pcDataConWithFixity' with static TyCons in the same module.
-- See Note [Constructor tag allocation] and #14657
data_con = mkDataCon dc_name declared_infix prom_info
(map (const no_bang) arg_tys)
[] -- No labelled fields
......@@ -527,6 +534,7 @@ pcDataConWithFixity' declared_infix dc_name wrk_key rri
arg_tys (mkTyConApp tycon (mkTyVarTys tyvars))
rri
tycon
(lookupNameEnv_NF tag_map dc_name)
[] -- No stupid theta
(mkDataConWorkId wrk_name data_con)
NoDataConRep -- Wired-in types are too simple to need wrappers
......
......@@ -1691,16 +1691,19 @@ tcConDecls :: TyCon -> ([TyConBinder], Type)
-- have all the names and the binders have the visibilities.
tcConDecls rep_tycon (tmpl_bndrs, res_tmpl)
= concatMapM $ addLocM $
tcConDecl rep_tycon tmpl_bndrs res_tmpl
tcConDecl rep_tycon (mkTyConTagMap rep_tycon) tmpl_bndrs res_tmpl
-- It's important that we pay for tag allocation here, once per TyCon,
-- See Note [Constructor tag allocation], fixes #14657
tcConDecl :: TyCon -- Representation tycon. Knot-tied!
-> NameEnv ConTag
-> [TyConBinder] -> Type
-- Return type template (with its template tyvars)
-- (tvs, T tys), where T is the family TyCon
-> ConDecl GhcRn
-> TcM [DataCon]
tcConDecl rep_tycon tmpl_bndrs res_tmpl
tcConDecl rep_tycon tag_map tmpl_bndrs res_tmpl
(ConDeclH98 { con_name = name
, con_ex_tvs = explicit_tkv_nms
, con_mb_cxt = hs_ctxt
......@@ -1771,7 +1774,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl
stricts Nothing field_lbls
univ_tvs ex_tvs user_tvbs
[{- no eq_preds -}] ctxt arg_tys
res_tmpl rep_tycon
res_tmpl rep_tycon tag_map
-- NB: we put data_tc, the type constructor gotten from the
-- constructor type signature into the data constructor;
-- that way checkValidDataCon can complain if it's wrong.
......@@ -1780,7 +1783,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl
; mapM buildOneDataCon [name]
}
tcConDecl rep_tycon tmpl_bndrs res_tmpl
tcConDecl rep_tycon tag_map tmpl_bndrs res_tmpl
(ConDeclGADT { con_names = names
, con_qvars = qtvs
, con_mb_cxt = cxt, con_args = hs_args
......@@ -1851,7 +1854,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl
rep_nm
stricts Nothing field_lbls
univ_tvs ex_tvs all_user_bndrs eq_preds
ctxt' arg_tys' res_ty' rep_tycon
ctxt' arg_tys' res_ty' rep_tycon tag_map
-- NB: we put data_tc, the type constructor gotten from the
-- constructor type signature into the data constructor;
-- that way checkValidDataCon can complain if it's wrong.
......
......@@ -97,6 +97,7 @@ module TyCon(
tyConRuntimeRepInfo,
tyConBinders, tyConResKind, tyConTyVarBinders,
tcTyConScopedTyVars,
mkTyConTagMap,
-- ** Manipulating TyCons
expandSynTyCon_maybe,
......@@ -840,7 +841,7 @@ data AlgTyConRhs
-- user declares the type to have no constructors
--
-- INVARIANT: Kept in order of increasing 'DataCon'
-- tag (see the tag assignment in DataCon.mkDataCon)
-- tag (see the tag assignment in mkTyConTagMap)
data_cons_size :: Int,
-- ^ Cached value: length data_cons
is_enum :: Bool -- ^ Cached value: is this an enumeration type?
......@@ -2329,6 +2330,28 @@ tyConRuntimeRepInfo (PromotedDataCon { promDcRepInfo = rri }) = rri
tyConRuntimeRepInfo _ = NoRRI
-- could panic in that second case. But Douglas Adams told me not to.
{-
Note [Constructor tag allocation]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When typechecking we need to allocate constructor tags to constructors.
They are allocated based on the position in the data_cons field of TyCon,
with the first constructor getting fIRST_TAG.
We used to pay linear cost per constructor, with each constructor looking up
its relative index in the constructor list. That was quadratic and prohibitive
for large data types with more than 10k constructors.
The current strategy is to build a NameEnv with a mapping from costructor's
Name to ConTag and pass it down to buildDataCon for efficient lookup.
Relevant ticket: #14657
-}
mkTyConTagMap :: TyCon -> NameEnv ConTag
mkTyConTagMap tycon =
mkNameEnv $ map getName (tyConDataCons tycon) `zip` [fIRST_TAG..]
-- See Note [Constructor tag allocation]
{-
************************************************************************
* *
......
......@@ -79,6 +79,7 @@ buildPDataDataCon orig_name vect_tc repr_tc repr
fam_envs <- readGEnv global_fam_inst_env
rep_nm <- liftDs $ newTyConRepName dc_name
let univ_tvbs = mkTyVarBinders Specified tvs
tag_map = mkTyConTagMap repr_tc
liftDs $ buildDataCon fam_envs dc_name
False -- not infix
rep_nm
......@@ -93,6 +94,7 @@ buildPDataDataCon orig_name vect_tc repr_tc repr
comp_tys
(mkFamilyTyConApp repr_tc (mkTyVarTys tvs))
repr_tc
tag_map
where
no_bang = HsSrcBang NoSourceText NoSrcUnpack NoSrcStrict
......@@ -125,6 +127,7 @@ buildPDatasDataCon orig_name vect_tc repr_tc repr
fam_envs <- readGEnv global_fam_inst_env
rep_nm <- liftDs $ newTyConRepName dc_name
let univ_tvbs = mkTyVarBinders Specified tvs
tag_map = mkTyConTagMap repr_tc
liftDs $ buildDataCon fam_envs dc_name
False -- not infix
rep_nm
......@@ -139,6 +142,7 @@ buildPDatasDataCon orig_name vect_tc repr_tc repr
comp_tys
(mkFamilyTyConApp repr_tc (mkTyVarTys tvs))
repr_tc
tag_map
where
no_bang = HsSrcBang NoSourceText NoSrcUnpack NoSrcStrict
......
......@@ -197,6 +197,7 @@ vectDataCon dc
; let ret_ty = mkFamilyTyConApp tycon' (mkTyVarTys univ_tvs)
; fam_envs <- readGEnv global_fam_inst_env
; rep_nm <- liftDs $ newTyConRepName name'
; let tag_map = mkTyConTagMap tycon'
; liftDs $ buildDataCon fam_envs
name'
(dataConIsInfix dc) -- infix if the original is
......@@ -212,6 +213,7 @@ vectDataCon dc
arg_tys -- argument types
ret_ty -- return type
tycon' -- representation tycon
tag_map
}
where
name = dataConName dc
......
......@@ -1154,6 +1154,18 @@ test('MultiLayerModules',
multimod_compile,
['MultiLayerModules', '-v0'])
test('ManyConstructors',
[ compiler_stats_num_field('bytes allocated',
[(wordsize(64), 4246959352, 10),
# initial: 8130527160
# 2018-01-05: 4246959352 Lift constructor tag allocation out of a loop
]),
pre_cmd('./genManyConstructors'),
extra_files(['genManyConstructors']),
],
multimod_compile,
['ManyConstructors', '-v0'])
test('T13701',
[ compiler_stats_num_field('bytes allocated',
[(platform('x86_64-apple-darwin'), 2217187888, 10),
......
SIZE=10000
MODULE=ManyConstructors
# Generates a module with a large number of constructors that looks
# like this:
#
# module ManyConstructors where
#
# data A10000 = A0
# | A00001
# | A00002
# ...
# | A10000
#
# The point of this test is to check if we don't regress on #14657 reintroducing
# some code that's quadratic in the number of constructors in a data type.
# NB. This is not that artificial, I've seen data types of this size
# in the wild.
echo "module $MODULE where" > $MODULE.hs
echo >> $MODULE.hs
echo "data A$SIZE = A0" >> $MODULE.hs
for i in $(seq -w 1 $SIZE); do
echo " | A$i" >> $MODULE.hs
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment