From 532993c8160d960f848e7abd401774b6879e3ee8 Mon Sep 17 00:00:00 2001 From: Zubin Duggal <zubin.duggal@gmail.com> Date: Wed, 20 Dec 2023 17:52:35 +0530 Subject: [PATCH] driver: Really don't lose track of nodes when we fail to resolve cycles This fixes a bug in 8db8d2fd1c881032b1b360c032b6d9d072c11723, where we could lose track of acyclic components at the start of an unresolved cycle. We now ensure we never loose track of any of these components. As T24275 demonstrates, a "cyclic" SCC might not really be a true SCC: When viewed without boot files, we have a single SCC ``` [REC main:T24275B [main:T24275B {-# SOURCE #-}, main:T24275A {-# SOURCE #-}] main:T24275A [main:T24275A {-# SOURCE #-}]] ``` But with boot files this turns into ``` [NONREC main:T24275B {-# SOURCE #-} [], REC main:T24275B [main:T24275B {-# SOURCE #-}, main:T24275A {-# SOURCE #-}] main:T24275A {-# SOURCE #-} [main:T24275B], NONREC main:T24275A [main:T24275A {-# SOURCE #-}]] ``` Note that this is truly not an SCC, as no nodes are reachable from T24275B.hs-boot. However, we treat this entire group as a single "SCC" because it seems so when we analyse the graph without taking boot files into account. Indeed, we must return a single ResolvedCycle element in the BuildPlan for this as described in Note [Upsweep]. However, since after resolving this is not a true SCC anymore, `findCycle` fails to find a cycle and we have a sub-optimal error message as a result. To handle this, I extended `findCycle` to not assume its input is an SCC, and to try harder to find cycles in its input. Fixes #24275 --- compiler/GHC/Data/Graph/Directed.hs | 67 ++++++++++--------- compiler/GHC/Driver/Make.hs | 2 +- testsuite/tests/driver/T24275/T24275.stderr | 4 ++ testsuite/tests/driver/T24275/T24275A.hs | 1 + testsuite/tests/driver/T24275/T24275A.hs-boot | 3 + testsuite/tests/driver/T24275/T24275B.hs | 3 + testsuite/tests/driver/T24275/T24275B.hs-boot | 1 + testsuite/tests/driver/T24275/all.T | 1 + 8 files changed, 50 insertions(+), 32 deletions(-) create mode 100644 testsuite/tests/driver/T24275/T24275.stderr create mode 100644 testsuite/tests/driver/T24275/T24275A.hs create mode 100644 testsuite/tests/driver/T24275/T24275A.hs-boot create mode 100644 testsuite/tests/driver/T24275/T24275B.hs create mode 100644 testsuite/tests/driver/T24275/T24275B.hs-boot create mode 100644 testsuite/tests/driver/T24275/all.T diff --git a/compiler/GHC/Data/Graph/Directed.hs b/compiler/GHC/Data/Graph/Directed.hs index 915180b9e9bc..712152b6d947 100644 --- a/compiler/GHC/Data/Graph/Directed.hs +++ b/compiler/GHC/Data/Graph/Directed.hs @@ -46,7 +46,7 @@ module GHC.Data.Graph.Directed ( import GHC.Prelude -import GHC.Utils.Misc ( minWith, count ) +import GHC.Utils.Misc ( sortWith, count ) import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Data.Maybe ( expectJust ) @@ -219,47 +219,52 @@ type WorkItem key payload [payload]) -- Rest of the path; -- [a,b,c] means c depends on b, b depends on a --- | Find a reasonably short cycle a->b->c->a, in a strongly --- connected component. The input nodes are presumed to be --- a SCC, so you can start anywhere. +-- | Find a reasonably short cycle a->b->c->a, in a graph +-- The graph might not necessarily be strongly connected. findCycle :: forall payload key. Ord key => [Node key payload] -- The nodes. The dependencies can -- contain extra keys, which are ignored -> Maybe [payload] -- A cycle, starting with node -- so each depends on the next findCycle graph - = go Set.empty (new_work root_deps []) [] + = goRoots plausible_roots where env :: Map.Map key (Node key payload) env = Map.fromList [ (node_key node, node) | node <- graph ] - -- Find the node with fewest dependencies among the SCC modules + goRoots [] = Nothing + goRoots (root:xs) = + case go Set.empty (new_work root_deps []) [] of + Nothing -> goRoots xs + Just res -> Just res + where + DigraphNode root_payload root_key root_deps = root + -- 'go' implements Dijkstra's algorithm, more or less + go :: Set.Set key -- Visited + -> [WorkItem key payload] -- Work list, items length n + -> [WorkItem key payload] -- Work list, items length n+1 + -> Maybe [payload] -- Returned cycle + -- Invariant: in a call (go visited ps qs), + -- visited = union (map tail (ps ++ qs)) + + go _ [] [] = Nothing -- No cycles + go visited [] qs = go visited qs [] + go visited (((DigraphNode payload key deps), path) : ps) qs + | key == root_key = Just (root_payload : reverse path) + | key `Set.member` visited = go visited ps qs + | key `Map.notMember` env = go visited ps qs + | otherwise = go (Set.insert key visited) + ps (new_qs ++ qs) + where + new_qs = new_work deps (payload : path) + + + -- Find the nodes with fewest dependencies among the SCC modules -- This is just a heuristic to find some plausible root module - root :: Node key payload - root = fst (minWith snd [ (node, count (`Map.member` env) - (node_dependencies node)) - | node <- graph ]) - DigraphNode root_payload root_key root_deps = root - - - -- 'go' implements Dijkstra's algorithm, more or less - go :: Set.Set key -- Visited - -> [WorkItem key payload] -- Work list, items length n - -> [WorkItem key payload] -- Work list, items length n+1 - -> Maybe [payload] -- Returned cycle - -- Invariant: in a call (go visited ps qs), - -- visited = union (map tail (ps ++ qs)) - - go _ [] [] = Nothing -- No cycles - go visited [] qs = go visited qs [] - go visited (((DigraphNode payload key deps), path) : ps) qs - | key == root_key = Just (root_payload : reverse path) - | key `Set.member` visited = go visited ps qs - | key `Map.notMember` env = go visited ps qs - | otherwise = go (Set.insert key visited) - ps (new_qs ++ qs) - where - new_qs = new_work deps (payload : path) + plausible_roots :: [Node key payload] + plausible_roots = map fst (sortWith snd [ (node, count (`Map.member` env) (node_dependencies node)) + | node <- graph ]) + new_work :: [key] -> [payload] -> [WorkItem key payload] new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ] diff --git a/compiler/GHC/Driver/Make.hs b/compiler/GHC/Driver/Make.hs index 2a8ec8623a9a..d5b0e304dca0 100644 --- a/compiler/GHC/Driver/Make.hs +++ b/compiler/GHC/Driver/Make.hs @@ -631,7 +631,7 @@ createBuildPlan mod_graph maybe_top_mod = collapseSCC :: [SCC ModuleGraphNode] -> Either [ModuleGraphNode] [(Either ModuleGraphNode ModuleGraphNodeWithBootFile)] -- Must be at least two nodes, as we were in a cycle collapseSCC [AcyclicSCC node1, AcyclicSCC node2] = Right [toNodeWithBoot node1, toNodeWithBoot node2] - collapseSCC (AcyclicSCC node : nodes) = (toNodeWithBoot node :) <$> collapseSCC nodes + collapseSCC (AcyclicSCC node : nodes) = either (Left . (node :)) (Right . (toNodeWithBoot node :)) (collapseSCC nodes) -- Cyclic collapseSCC nodes = Left (flattenSCCs nodes) diff --git a/testsuite/tests/driver/T24275/T24275.stderr b/testsuite/tests/driver/T24275/T24275.stderr new file mode 100644 index 000000000000..605671e191ad --- /dev/null +++ b/testsuite/tests/driver/T24275/T24275.stderr @@ -0,0 +1,4 @@ +Module graph contains a cycle: + module ‘T24275A’ (./T24275A.hs-boot) + imports module ‘T24275B’ (T24275B.hs) + which imports module ‘T24275A’ (./T24275A.hs-boot) diff --git a/testsuite/tests/driver/T24275/T24275A.hs b/testsuite/tests/driver/T24275/T24275A.hs new file mode 100644 index 000000000000..8c7ef372bcb9 --- /dev/null +++ b/testsuite/tests/driver/T24275/T24275A.hs @@ -0,0 +1 @@ +module T24275A where diff --git a/testsuite/tests/driver/T24275/T24275A.hs-boot b/testsuite/tests/driver/T24275/T24275A.hs-boot new file mode 100644 index 000000000000..795450d2315c --- /dev/null +++ b/testsuite/tests/driver/T24275/T24275A.hs-boot @@ -0,0 +1,3 @@ +module T24275A where + +import T24275B diff --git a/testsuite/tests/driver/T24275/T24275B.hs b/testsuite/tests/driver/T24275/T24275B.hs new file mode 100644 index 000000000000..ddff8fff9545 --- /dev/null +++ b/testsuite/tests/driver/T24275/T24275B.hs @@ -0,0 +1,3 @@ +module T24275B where + +import {-# SOURCE #-} T24275A diff --git a/testsuite/tests/driver/T24275/T24275B.hs-boot b/testsuite/tests/driver/T24275/T24275B.hs-boot new file mode 100644 index 000000000000..ec0367607361 --- /dev/null +++ b/testsuite/tests/driver/T24275/T24275B.hs-boot @@ -0,0 +1 @@ +module T24275B where diff --git a/testsuite/tests/driver/T24275/all.T b/testsuite/tests/driver/T24275/all.T new file mode 100644 index 000000000000..7dfdfbe207c0 --- /dev/null +++ b/testsuite/tests/driver/T24275/all.T @@ -0,0 +1 @@ +test('T24275', extra_files(['T24275A.hs','T24275A.hs-boot','T24275B.hs-boot','T24275B.hs']), multimod_compile_fail, ['T24275B','']) -- GitLab