Skip to content
Snippets Groups Projects
Commit a933aff3 authored by Zubin's avatar Zubin Committed by Marge Bot
Browse files

driver: Make `checkHomeUnitsClosed` faster

The implementation of `checkHomeUnitsClosed` was traversing every single path
in the unit dependency graph - this grows exponentially and quickly grows to be
infeasible on larger unit dependency graphs.

Instead we replace this with a faster implementation which follows from the
specificiation of the closure property - there is a closure error if there are
units which are both are both (transitively) depended upon by home units and
(transitively) depend on home units, but are not themselves home units.

To compute the set of units required for closure, we first compute the closure
of the unit dependency graph, then the transpose of this closure, and find all
units that are reachable from the home units in the transpose of the closure.
parent 6e18ce2b
No related branches found
No related tags found
No related merge requests found
...@@ -1565,8 +1565,8 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots ...@@ -1565,8 +1565,8 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
(root_errs, rootSummariesOk) <- partitionWithM getRootSummary roots -- #17549 (root_errs, rootSummariesOk) <- partitionWithM getRootSummary roots -- #17549
let root_map = mkRootMap rootSummariesOk let root_map = mkRootMap rootSummariesOk
checkDuplicates root_map checkDuplicates root_map
(deps, pkg_deps, map0) <- loopSummaries rootSummariesOk (M.empty, Set.empty, root_map) (deps, map0) <- loopSummaries rootSummariesOk (M.empty, root_map)
let closure_errs = checkHomeUnitsClosed (hsc_unit_env hsc_env) (hsc_all_home_unit_ids hsc_env) (Set.toList pkg_deps) let closure_errs = checkHomeUnitsClosed (hsc_unit_env hsc_env)
let unit_env = hsc_unit_env hsc_env let unit_env = hsc_unit_env hsc_env
let tmpfs = hsc_tmpfs hsc_env let tmpfs = hsc_tmpfs hsc_env
...@@ -1660,19 +1660,19 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots ...@@ -1660,19 +1660,19 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
-- This loops over all the mod summaries in the dependency graph, accumulates the actual dependencies for each module/unit -- This loops over all the mod summaries in the dependency graph, accumulates the actual dependencies for each module/unit
loopSummaries :: [ModSummary] loopSummaries :: [ModSummary]
-> (M.Map NodeKey ModuleGraphNode, Set.Set (UnitId, UnitId), -> (M.Map NodeKey ModuleGraphNode,
DownsweepCache) DownsweepCache)
-> IO ((M.Map NodeKey ModuleGraphNode), Set.Set (UnitId, UnitId), DownsweepCache) -> IO ((M.Map NodeKey ModuleGraphNode), DownsweepCache)
loopSummaries [] done = return done loopSummaries [] done = return done
loopSummaries (ms:next) (done, pkgs, summarised) loopSummaries (ms:next) (done, summarised)
| Just {} <- M.lookup k done | Just {} <- M.lookup k done
= loopSummaries next (done, pkgs, summarised) = loopSummaries next (done, summarised)
-- Didn't work out what the imports mean yet, now do that. -- Didn't work out what the imports mean yet, now do that.
| otherwise = do | otherwise = do
(final_deps, pkgs1, done', summarised') <- loopImports (calcDeps ms) done summarised (final_deps, done', summarised') <- loopImports (calcDeps ms) done summarised
-- This has the effect of finding a .hs file if we are looking at the .hs-boot file. -- This has the effect of finding a .hs file if we are looking at the .hs-boot file.
(_, _, done'', summarised'') <- loopImports (maybeToList hs_file_for_boot) done' summarised' (_, done'', summarised'') <- loopImports (maybeToList hs_file_for_boot) done' summarised'
loopSummaries next (M.insert k (ModuleNode final_deps ms) done'', pkgs1 `Set.union` pkgs, summarised'') loopSummaries next (M.insert k (ModuleNode final_deps ms) done'', summarised'')
where where
k = NodeKey_Module (msKey ms) k = NodeKey_Module (msKey ms)
...@@ -1692,18 +1692,17 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots ...@@ -1692,18 +1692,17 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
-- Visited set; the range is a list because -- Visited set; the range is a list because
-- the roots can have the same module names -- the roots can have the same module names
-- if allow_dup_roots is True -- if allow_dup_roots is True
-> IO ([NodeKey], Set.Set (UnitId, UnitId), -> IO ([NodeKey],
M.Map NodeKey ModuleGraphNode, DownsweepCache) M.Map NodeKey ModuleGraphNode, DownsweepCache)
-- The result is the completed NodeMap -- The result is the completed NodeMap
loopImports [] done summarised = return ([], Set.empty, done, summarised) loopImports [] done summarised = return ([], done, summarised)
loopImports ((home_uid,mb_pkg, gwib) : ss) done summarised loopImports ((home_uid,mb_pkg, gwib) : ss) done summarised
| Just summs <- M.lookup cache_key summarised | Just summs <- M.lookup cache_key summarised
= case summs of = case summs of
[Right ms] -> do [Right ms] -> do
let nk = NodeKey_Module (msKey ms) let nk = NodeKey_Module (msKey ms)
(rest, pkgs, summarised', done') <- loopImports ss done summarised (rest, summarised', done') <- loopImports ss done summarised
return (nk: rest, pkgs, summarised', done') return (nk: rest, summarised', done')
[Left _err] -> [Left _err] ->
loopImports ss done summarised loopImports ss done summarised
_errs -> do _errs -> do
...@@ -1715,69 +1714,77 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots ...@@ -1715,69 +1714,77 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
Nothing excl_mods Nothing excl_mods
case mb_s of case mb_s of
NotThere -> loopImports ss done summarised NotThere -> loopImports ss done summarised
External uid -> do External _ -> do
(other_deps, pkgs, done', summarised') <- loopImports ss done summarised (other_deps, done', summarised') <- loopImports ss done summarised
return (other_deps, Set.insert (homeUnitId home_unit, uid) pkgs, done', summarised') return (other_deps, done', summarised')
FoundInstantiation iud -> do FoundInstantiation iud -> do
(other_deps, pkgs, done', summarised') <- loopImports ss done summarised (other_deps, done', summarised') <- loopImports ss done summarised
return (NodeKey_Unit iud : other_deps, pkgs, done', summarised') return (NodeKey_Unit iud : other_deps, done', summarised')
FoundHomeWithError (_uid, e) -> loopImports ss done (Map.insert cache_key [(Left e)] summarised) FoundHomeWithError (_uid, e) -> loopImports ss done (Map.insert cache_key [(Left e)] summarised)
FoundHome s -> do FoundHome s -> do
(done', pkgs1, summarised') <- (done', summarised') <-
loopSummaries [s] (done, Set.empty, Map.insert cache_key [Right s] summarised) loopSummaries [s] (done, Map.insert cache_key [Right s] summarised)
(other_deps, pkgs2, final_done, final_summarised) <- loopImports ss done' summarised' (other_deps, final_done, final_summarised) <- loopImports ss done' summarised'
-- MP: This assumes that we can only instantiate non home units, which is probably fair enough for now. -- MP: This assumes that we can only instantiate non home units, which is probably fair enough for now.
return (NodeKey_Module (msKey s) : other_deps, pkgs1 `Set.union` pkgs2, final_done, final_summarised) return (NodeKey_Module (msKey s) : other_deps, final_done, final_summarised)
where where
cache_key = (home_uid, mb_pkg, unLoc <$> gwib) cache_key = (home_uid, mb_pkg, unLoc <$> gwib)
home_unit = ue_unitHomeUnit home_uid (hsc_unit_env hsc_env) home_unit = ue_unitHomeUnit home_uid (hsc_unit_env hsc_env)
GWIB { gwib_mod = L loc mod, gwib_isBoot = is_boot } = gwib GWIB { gwib_mod = L loc mod, gwib_isBoot = is_boot } = gwib
wanted_mod = L loc mod wanted_mod = L loc mod
-- This function checks then important property that if both p and q are home units -- | This function checks then important property that if both p and q are home units
-- then any dependency of p, which transitively depends on q is also a home unit. -- then any dependency of p, which transitively depends on q is also a home unit.
checkHomeUnitsClosed :: UnitEnv -> Set.Set UnitId -> [(UnitId, UnitId)] -> [DriverMessages] --
-- Fast path, trivially closed. -- See Note [Multiple Home Units], section 'Closure Property'.
checkHomeUnitsClosed ue home_id_set home_imp_ids checkHomeUnitsClosed :: UnitEnv -> [DriverMessages]
| Set.size home_id_set == 1 = [] checkHomeUnitsClosed ue
| otherwise = | Set.null bad_unit_ids = []
let res = foldMap loop home_imp_ids | otherwise = [singleMessage $ mkPlainErrorMsgEnvelope rootLoc $ DriverHomePackagesNotClosed (Set.toList bad_unit_ids)]
-- Now check whether everything which transitively depends on a home_unit is actually a home_unit
-- These units are the ones which we need to load as home packages but failed to do for some reason,
-- it's a bug in the tool invoking GHC.
bad_unit_ids = Set.difference res home_id_set
in if Set.null bad_unit_ids
then []
else [singleMessage $ mkPlainErrorMsgEnvelope rootLoc $ DriverHomePackagesNotClosed (Set.toList bad_unit_ids)]
where where
home_id_set = unitEnv_keys $ ue_home_unit_graph ue
bad_unit_ids = upwards_closure Set.\\ home_id_set
rootLoc = mkGeneralSrcSpan (fsLit "<command line>") rootLoc = mkGeneralSrcSpan (fsLit "<command line>")
-- TODO: This could repeat quite a bit of work but I struggled to write this function.
-- Which units transitively depend on a home unit graph :: Graph (Node UnitId UnitId)
loop :: (UnitId, UnitId) -> Set.Set UnitId -- The units which transitively depend on a home unit graph = graphFromEdgedVerticesUniq graphNodes
loop (from_uid, uid) =
let us = ue_findHomeUnitEnv from_uid ue in -- downwards closure of graph
let um = unitInfoMap (homeUnitEnv_units us) in downwards_closure
case lookupUniqMap um uid of = graphFromEdgedVerticesUniq [ DigraphNode uid uid (Set.toList deps)
Nothing -> pprPanic "uid not found" (ppr uid) | (uid, deps) <- M.toList (allReachable graph node_key)]
Just ui ->
let depends = unitDepends ui inverse_closure = transposeG downwards_closure
home_depends = Set.fromList depends `Set.intersection` home_id_set
other_depends = Set.fromList depends `Set.difference` home_id_set upwards_closure = Set.fromList $ map node_key $ reachablesG inverse_closure [DigraphNode uid uid [] | uid <- Set.toList home_id_set]
in
-- Case 1: The unit directly depends on a home_id all_unit_direct_deps :: UniqMap UnitId (Set.Set UnitId)
if not (null home_depends) all_unit_direct_deps
then = unitEnv_foldWithKey go emptyUniqMap $ ue_home_unit_graph ue
let res = foldMap (loop . (from_uid,)) other_depends where
in Set.insert uid res go rest this this_uis =
-- Case 2: Check the rest of the dependencies, and then see if any of them depended on plusUniqMap_C Set.union
else (addToUniqMap_C Set.union external_depends this (Set.fromList $ this_deps))
let res = foldMap (loop . (from_uid,)) other_depends rest
in where
if not (Set.null res) external_depends = mapUniqMap (Set.fromList . unitDepends) (unitInfoMap this_units)
then Set.insert uid res this_units = homeUnitEnv_units this_uis
else res this_deps = [ toUnitId unit | (unit,Just _) <- explicitUnits this_units]
graphNodes :: [Node UnitId UnitId]
graphNodes = go Set.empty home_id_set
where
go done todo
= case Set.minView todo of
Nothing -> []
Just (uid, todo')
| Set.member uid done -> go done todo'
| otherwise -> case lookupUniqMap all_unit_direct_deps uid of
Nothing -> pprPanic "uid not found" (ppr (uid, all_unit_direct_deps))
Just depends ->
let todo'' = (depends Set.\\ done) `Set.union` todo'
in DigraphNode uid uid (Set.toList depends) : go (Set.insert uid done) todo''
-- | Update the every ModSummary that is depended on -- | Update the every ModSummary that is depended on
-- by a module that needs template haskell. We enable codegen to -- by a module that needs template haskell. We enable codegen to
......
TOP=../../../..
include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
SETUP=../Setup -v0
mhu-perf: clean
$(MAKE) -s --no-print-directory clean
./genLargeHMU
'$(GHC_PKG)' init tmp.d
'$(TEST_HC)' $(TEST_HC_OPTS) -v0 --make Setup
for dir in unit-p*; do \
cd $$dir && $(SETUP) clean && $(SETUP) configure $(CABAL_MINIMAL_BUILD) --ipid=$$dir-0.1.0.0 --with-ghc='$(TEST_HC)' --with-hc-pkg='$(GHC_PKG)' --ghc-options='$(TEST_HC_OPTS)' --package-db=../tmp.d && $(SETUP) build && $(SETUP) register --inplace && cd ..; \
done;
ifeq "$(CLEANUP)" "1"
$(MAKE) -s --no-print-directory clean
endif
clean :
$(RM) -r unitTop* unit-p* top*/ tmp*.d inst-* *.o *.hi */*.o */*.hi */Setup$(exeext) */dist Setup$(exeext)
import Distribution.Simple
main = defaultMain
test('mhu-perf',
[ collect_compiler_stats('bytes allocated',2),
extra_files(['genLargeHMU','Setup.hs']),
pre_cmd('$MAKE -s --no-print-directory mhu-perf'),
js_broken(22349),
when(arch('wasm32'), skip), # wasm32 doesn't like running Setup/Makefile tests
compile_timeout_multiplier(5)
],
multiunit_compile,
[['unitTop1', 'unitTop2'], '-fhide-source-paths'])
#!/usr/bin/env bash
# Generate $DEPTH layers of packages with $WIDTH modules on each layer
# Every package on layer N depends on all the packages on layer N-1
# unitTop imports all the units from the last layer
DEPTH=8
WIDTH=8
for i in $(seq -w 1 $WIDTH); do
mkdir unit-p0M$i
echo "module DummyLevel0M$i where" > unit-p0M$i/DummyLevel0M$i.hs;
cat > unit-p0M$i/unit-p0M$i.cabal <<EOF
name: unit-p0M$i
version: 0.1.0.0
build-type: Simple
cabal-version: >=1.10
library
default-language: Haskell2010
exposed-modules: DummyLevel0M$i
build-depends: base
EOF
done
for l in $(seq 1 $DEPTH); do
for i in $(seq -w 1 $WIDTH); do
mkdir unit-p${l}M$i
cat > unit-p${l}M$i/unit-p${l}M$i.cabal <<EOF
name: unit-p${l}M$i
version: 0.1.0.0
build-type: Simple
cabal-version: >=1.10
library
default-language: Haskell2010
exposed-modules: DummyLevel${l}M$i
build-depends: base
EOF
echo "module DummyLevel${l}M$i where" > unit-p${l}M$i/DummyLevel${l}M$i.hs;
for j in $(seq -w 1 $WIDTH); do
echo " , unit-p$((l-1))M$j" >> unit-p${l}M$i/unit-p${l}M$i.cabal
echo "import DummyLevel$((l-1))M$j" >> unit-p${l}M$i/DummyLevel${l}M$i.hs;
done
done
done
mkdir top1
echo "module Top1 where" > top1/Top1.hs
echo "-package-db ./tmp.d -i -itop1 Top1 -this-unit-id unit-top1 -package base" > unitTop1;
for j in $(seq -w 1 $WIDTH); do
echo "-package unit-p${DEPTH}M$j" >> unitTop1;
echo "import DummyLevel${DEPTH}M$j" >> top1/Top1.hs;
done
mkdir top2
echo "module Top2 where" > top2/Top2.hs
echo "-package-db ./tmp.d -i -itop2 Top2 -this-unit-id unit-top2 -package base" > unitTop2;
for j in $(seq -w 2 $WIDTH); do
echo "-package unit-p${DEPTH}M$j" >> unitTop2;
echo "import DummyLevel${DEPTH}M$j" >> top2/Top2.hs;
done
[1 of 2] Compiling Top1[unit-top1]
[2 of 2] Compiling Top2[unit-top2]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment