From 532993c8160d960f848e7abd401774b6879e3ee8 Mon Sep 17 00:00:00 2001
From: Zubin Duggal <zubin.duggal@gmail.com>
Date: Wed, 20 Dec 2023 17:52:35 +0530
Subject: [PATCH] driver: Really don't lose track of nodes when we fail to
 resolve cycles

This fixes a bug in 8db8d2fd1c881032b1b360c032b6d9d072c11723, where we could lose
track of acyclic components at the start of an unresolved cycle. We now ensure we
never loose track of any of these components.

As T24275 demonstrates, a "cyclic" SCC might not really be a true SCC:

When viewed without boot files, we have a single SCC

```
[REC main:T24275B [main:T24275B {-# SOURCE #-},
                   main:T24275A {-# SOURCE #-}]
     main:T24275A [main:T24275A {-# SOURCE #-}]]
```

But with boot files this turns into

```
[NONREC main:T24275B {-# SOURCE #-} [],
 REC main:T24275B [main:T24275B {-# SOURCE #-},
                   main:T24275A {-# SOURCE #-}]
    main:T24275A {-# SOURCE #-} [main:T24275B],
 NONREC main:T24275A [main:T24275A {-# SOURCE #-}]]
```

Note that this is truly not an SCC, as no nodes are reachable from T24275B.hs-boot.
However, we treat this entire group as a single "SCC" because it seems so when we
analyse the graph without taking boot files into account.

Indeed, we must return a single ResolvedCycle element in the BuildPlan for this
as described in Note [Upsweep].

However, since after resolving this is not a true SCC anymore, `findCycle` fails
to find a cycle and we have a sub-optimal error message as a result.

To handle this, I extended `findCycle` to not assume its input is an SCC, and to
try harder to find cycles in its input.

Fixes #24275
---
 compiler/GHC/Data/Graph/Directed.hs           | 67 ++++++++++---------
 compiler/GHC/Driver/Make.hs                   |  2 +-
 testsuite/tests/driver/T24275/T24275.stderr   |  4 ++
 testsuite/tests/driver/T24275/T24275A.hs      |  1 +
 testsuite/tests/driver/T24275/T24275A.hs-boot |  3 +
 testsuite/tests/driver/T24275/T24275B.hs      |  3 +
 testsuite/tests/driver/T24275/T24275B.hs-boot |  1 +
 testsuite/tests/driver/T24275/all.T           |  1 +
 8 files changed, 50 insertions(+), 32 deletions(-)
 create mode 100644 testsuite/tests/driver/T24275/T24275.stderr
 create mode 100644 testsuite/tests/driver/T24275/T24275A.hs
 create mode 100644 testsuite/tests/driver/T24275/T24275A.hs-boot
 create mode 100644 testsuite/tests/driver/T24275/T24275B.hs
 create mode 100644 testsuite/tests/driver/T24275/T24275B.hs-boot
 create mode 100644 testsuite/tests/driver/T24275/all.T

diff --git a/compiler/GHC/Data/Graph/Directed.hs b/compiler/GHC/Data/Graph/Directed.hs
index 915180b9e9bc..712152b6d947 100644
--- a/compiler/GHC/Data/Graph/Directed.hs
+++ b/compiler/GHC/Data/Graph/Directed.hs
@@ -46,7 +46,7 @@ module GHC.Data.Graph.Directed (
 
 import GHC.Prelude
 
-import GHC.Utils.Misc ( minWith, count )
+import GHC.Utils.Misc ( sortWith, count )
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Data.Maybe ( expectJust )
@@ -219,47 +219,52 @@ type WorkItem key payload
      [payload])         -- Rest of the path;
                         --  [a,b,c] means c depends on b, b depends on a
 
--- | Find a reasonably short cycle a->b->c->a, in a strongly
--- connected component.  The input nodes are presumed to be
--- a SCC, so you can start anywhere.
+-- | Find a reasonably short cycle a->b->c->a, in a graph
+-- The graph might not necessarily be strongly connected.
 findCycle :: forall payload key. Ord key
           => [Node key payload]     -- The nodes.  The dependencies can
                                     -- contain extra keys, which are ignored
           -> Maybe [payload]        -- A cycle, starting with node
                                     -- so each depends on the next
 findCycle graph
-  = go Set.empty (new_work root_deps []) []
+  = goRoots plausible_roots
   where
     env :: Map.Map key (Node key payload)
     env = Map.fromList [ (node_key node, node) | node <- graph ]
 
-    -- Find the node with fewest dependencies among the SCC modules
+    goRoots [] = Nothing
+    goRoots (root:xs) =
+        case go Set.empty (new_work root_deps []) [] of
+          Nothing -> goRoots xs
+          Just res -> Just res
+      where
+        DigraphNode root_payload root_key root_deps = root
+        -- 'go' implements Dijkstra's algorithm, more or less
+        go :: Set.Set key   -- Visited
+           -> [WorkItem key payload]        -- Work list, items length n
+           -> [WorkItem key payload]        -- Work list, items length n+1
+           -> Maybe [payload]               -- Returned cycle
+           -- Invariant: in a call (go visited ps qs),
+           --            visited = union (map tail (ps ++ qs))
+
+        go _       [] [] = Nothing  -- No cycles
+        go visited [] qs = go visited qs []
+        go visited (((DigraphNode payload key deps), path) : ps) qs
+           | key == root_key           = Just (root_payload : reverse path)
+           | key `Set.member` visited  = go visited ps qs
+           | key `Map.notMember` env   = go visited ps qs
+           | otherwise                 = go (Set.insert key visited)
+                                            ps (new_qs ++ qs)
+           where
+             new_qs = new_work deps (payload : path)
+
+
+    -- Find the nodes with fewest dependencies among the SCC modules
     -- This is just a heuristic to find some plausible root module
-    root :: Node key payload
-    root = fst (minWith snd [ (node, count (`Map.member` env)
-                                           (node_dependencies node))
-                            | node <- graph ])
-    DigraphNode root_payload root_key root_deps = root
-
-
-    -- 'go' implements Dijkstra's algorithm, more or less
-    go :: Set.Set key   -- Visited
-       -> [WorkItem key payload]        -- Work list, items length n
-       -> [WorkItem key payload]        -- Work list, items length n+1
-       -> Maybe [payload]               -- Returned cycle
-       -- Invariant: in a call (go visited ps qs),
-       --            visited = union (map tail (ps ++ qs))
-
-    go _       [] [] = Nothing  -- No cycles
-    go visited [] qs = go visited qs []
-    go visited (((DigraphNode payload key deps), path) : ps) qs
-       | key == root_key           = Just (root_payload : reverse path)
-       | key `Set.member` visited  = go visited ps qs
-       | key `Map.notMember` env   = go visited ps qs
-       | otherwise                 = go (Set.insert key visited)
-                                        ps (new_qs ++ qs)
-       where
-         new_qs = new_work deps (payload : path)
+    plausible_roots :: [Node key payload]
+    plausible_roots = map fst (sortWith snd [ (node, count (`Map.member` env) (node_dependencies node))
+                                            | node <- graph ])
+
 
     new_work :: [key] -> [payload] -> [WorkItem key payload]
     new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ]
diff --git a/compiler/GHC/Driver/Make.hs b/compiler/GHC/Driver/Make.hs
index 2a8ec8623a9a..d5b0e304dca0 100644
--- a/compiler/GHC/Driver/Make.hs
+++ b/compiler/GHC/Driver/Make.hs
@@ -631,7 +631,7 @@ createBuildPlan mod_graph maybe_top_mod =
         collapseSCC :: [SCC ModuleGraphNode] -> Either [ModuleGraphNode] [(Either ModuleGraphNode ModuleGraphNodeWithBootFile)]
         -- Must be at least two nodes, as we were in a cycle
         collapseSCC [AcyclicSCC node1, AcyclicSCC node2] = Right [toNodeWithBoot node1, toNodeWithBoot node2]
-        collapseSCC (AcyclicSCC node : nodes) = (toNodeWithBoot node :) <$> collapseSCC nodes
+        collapseSCC (AcyclicSCC node : nodes) = either (Left . (node :)) (Right . (toNodeWithBoot node :)) (collapseSCC nodes)
         -- Cyclic
         collapseSCC nodes = Left (flattenSCCs nodes)
 
diff --git a/testsuite/tests/driver/T24275/T24275.stderr b/testsuite/tests/driver/T24275/T24275.stderr
new file mode 100644
index 000000000000..605671e191ad
--- /dev/null
+++ b/testsuite/tests/driver/T24275/T24275.stderr
@@ -0,0 +1,4 @@
+Module graph contains a cycle:
+        module ‘T24275A’ (./T24275A.hs-boot)
+        imports module ‘T24275B’ (T24275B.hs)
+  which imports module ‘T24275A’ (./T24275A.hs-boot)
diff --git a/testsuite/tests/driver/T24275/T24275A.hs b/testsuite/tests/driver/T24275/T24275A.hs
new file mode 100644
index 000000000000..8c7ef372bcb9
--- /dev/null
+++ b/testsuite/tests/driver/T24275/T24275A.hs
@@ -0,0 +1 @@
+module T24275A where
diff --git a/testsuite/tests/driver/T24275/T24275A.hs-boot b/testsuite/tests/driver/T24275/T24275A.hs-boot
new file mode 100644
index 000000000000..795450d2315c
--- /dev/null
+++ b/testsuite/tests/driver/T24275/T24275A.hs-boot
@@ -0,0 +1,3 @@
+module T24275A where
+
+import T24275B
diff --git a/testsuite/tests/driver/T24275/T24275B.hs b/testsuite/tests/driver/T24275/T24275B.hs
new file mode 100644
index 000000000000..ddff8fff9545
--- /dev/null
+++ b/testsuite/tests/driver/T24275/T24275B.hs
@@ -0,0 +1,3 @@
+module T24275B where
+
+import {-# SOURCE #-} T24275A
diff --git a/testsuite/tests/driver/T24275/T24275B.hs-boot b/testsuite/tests/driver/T24275/T24275B.hs-boot
new file mode 100644
index 000000000000..ec0367607361
--- /dev/null
+++ b/testsuite/tests/driver/T24275/T24275B.hs-boot
@@ -0,0 +1 @@
+module T24275B where
diff --git a/testsuite/tests/driver/T24275/all.T b/testsuite/tests/driver/T24275/all.T
new file mode 100644
index 000000000000..7dfdfbe207c0
--- /dev/null
+++ b/testsuite/tests/driver/T24275/all.T
@@ -0,0 +1 @@
+test('T24275', extra_files(['T24275A.hs','T24275A.hs-boot','T24275B.hs-boot','T24275B.hs']), multimod_compile_fail, ['T24275B',''])
-- 
GitLab