From a525a92a37cc202b6db09a827ba9df9eb8c9fa79 Mon Sep 17 00:00:00 2001
From: sheaf <sam.derbyshire@gmail.com>
Date: Thu, 3 Aug 2023 14:32:39 +0200
Subject: [PATCH] Adjust reporting of unused tyvars in data FamInsts

This commit adjusts the validity checking of data family
instances to improve the reporting of unused type variables.

See Note [Out of scope tvs in data family instances] in GHC.Tc.Validity.

The problem was that, in a situation such as

  data family D :: Type
  data instance forall (d :: Type). D = MkD

the RHS passed to 'checkFamPatBinders' would be the TyCon app

  R:D d

which mentions the type variable 'd' quantified in the user-written
forall. Thus, when computing the set of unused type variables in
the RHS of the data family instance, we would find that 'd' is used,
and report a strange error message that would say that 'd' is not
bound on the LHS.

To fix this, we special-case the data-family instance case,
manually extracting all the type variables that appear in the
arguments of all the data constructores of the data family instance.

Fixes #23778
---
 compiler/GHC/Tc/Validity.hs                   | 75 ++++++++++++++++++-
 .../should_fail/ExplicitForAllFams4b.stderr   | 18 ++---
 .../tests/typecheck/should_fail/T23734.hs     | 17 +++++
 .../tests/typecheck/should_fail/T23778.hs     |  7 ++
 .../tests/typecheck/should_fail/T23778.stderr |  5 ++
 testsuite/tests/typecheck/should_fail/all.T   |  1 +
 6 files changed, 111 insertions(+), 12 deletions(-)
 create mode 100644 testsuite/tests/typecheck/should_fail/T23734.hs
 create mode 100644 testsuite/tests/typecheck/should_fail/T23778.hs
 create mode 100644 testsuite/tests/typecheck/should_fail/T23778.stderr

diff --git a/compiler/GHC/Tc/Validity.hs b/compiler/GHC/Tc/Validity.hs
index 394de6c8e0af..ece2c26c5192 100644
--- a/compiler/GHC/Tc/Validity.hs
+++ b/compiler/GHC/Tc/Validity.hs
@@ -45,6 +45,7 @@ import GHC.Core.Unify ( typesAreApart, tcMatchTyX_BM, BindFlag(..) )
 import GHC.Core.Coercion
 import GHC.Core.Coercion.Axiom
 import GHC.Core.Class
+import GHC.Core.DataCon
 import GHC.Core.TyCon
 import GHC.Core.Predicate
 import GHC.Core.TyCo.FVs
@@ -2293,10 +2294,13 @@ checkFamPatBinders fam_tc qtvs pats rhs
   = do { traceTc "checkFamPatBinders" $
          vcat [ debugPprType (mkTyConApp fam_tc pats)
               , ppr (mkTyConApp fam_tc pats)
+              , text "rhs:" <+> ppr rhs
               , text "qtvs:" <+> ppr qtvs
-              , text "rhs_tvs:" <+> ppr (fvVarSet rhs_fvs)
+              , text "rhs_fvs:" <+> ppr (fvVarSet rhs_fvs)
               , text "cpt_tvs:" <+> ppr cpt_tvs
-              , text "inj_cpt_tvs:" <+> ppr inj_cpt_tvs ]
+              , text "inj_cpt_tvs:" <+> ppr inj_cpt_tvs
+              , text "bad_rhs_tvs:" <+> ppr bad_rhs_tvs
+              , text "bad_qtvs:" <+> ppr bad_qtvs ]
 
          -- Check for implicitly-bound tyvars, mentioned on the
          -- RHS but not bound on the LHS
@@ -2315,6 +2319,26 @@ checkFamPatBinders fam_tc qtvs pats rhs
        ; check_tvs FamInstLHSUnusedBoundTyVars bad_qtvs
        }
   where
+    rhs_fvs
+      -- Special treatment for data family instances.
+      --
+      -- See Note [Out of scope tvs in data family instances].
+      | Just (tc_rep, _rep_args) <- splitTyConApp_maybe rhs
+      , Just (fam_tc', _fam_args, _) <- tyConFamInstSig_maybe tc_rep
+      , fam_tc' == fam_tc
+      = if isGadtSyntaxTyCon tc_rep
+        then emptyFV
+        else
+          let datacons_fvs =
+                unionsFV
+                  [ delFVs user_tvbs (tyCoFVsOfTypes arg_tys)
+                  | dc <- tyConDataCons tc_rep
+                  , let arg_tys = map scaledThing $ dataConOrigArgTys dc
+                        user_tvbs = mkVarSet $ dataConUserTyVars dc ]
+          in datacons_fvs
+      | otherwise
+      = tyCoFVsOfType rhs
+
     cpt_tvs     = tyCoVarsOfTypes pats
     inj_cpt_tvs = fvVarSet $ injectiveVarsOfTypes False pats
       -- The type variables that are in injective positions.
@@ -2325,7 +2349,7 @@ checkFamPatBinders fam_tc qtvs pats rhs
       -- NB: It's OK to use the nondeterministic `fvVarSet` function here,
       -- since the order of `inj_cpt_tvs` is never revealed in an error
       -- message.
-    rhs_fvs     = tyCoFVsOfType rhs
+
     used_tvs    = cpt_tvs `unionVarSet` fvVarSet rhs_fvs
     bad_qtvs    = filterOut (`elemVarSet` used_tvs) qtvs
                   -- Bound but not used at all
@@ -2338,6 +2362,51 @@ checkFamPatBinders fam_tc qtvs pats rhs
         TcRnIllegalInstance $ IllegalFamilyInstance $
         mk_err (map tyVarName tvs)
 
+{- Note [Out of scope tvs in data family instances]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider the following data family instance declaration, which we want to reject:
+
+  type D :: Type
+  data family D
+  data instance forall (a :: Type). D = MkD
+
+The problem is that the user quantified the type variable `a` in the instance,
+but didn't use it anywhere.
+
+However, from the perspective of GHC, the RHS of the instance declaration
+is "R:D a", where "R:D" is the representation TyCon that
+GHC.Tc.TyCl.Instance.tcDataFamInstDecl builds; it always includes ALL the
+quantified variables. This means that naively computing the free tyvars of the
+RHS would cause us to report `a` as used on the RHS, which is rather confusing
+for the user (as reported in #23778):
+
+    * Type variable `a' is mentioned in the RHS,
+      but not bound on the LHS of the family instance
+
+To avoid this, we manually compute which type variables are used on the RHS
+of the data family instance in terms of the types of the data family instance
+constructors:
+
+  Vanilla data instance declarations
+
+    For a vanilla data declaration, we collect the free type variables of
+    the data constructors. For example
+
+      data instance D1 x a = K1 a | forall b. K2 a b c
+
+    will give rise to RHS FVs {a,c}.
+
+  GADT data instance declarations
+
+    A GADT data declarations brings its own type variables into scope, e.g.
+
+      data instance D3 y b where
+        C1 :: ...
+        C2 :: ...
+
+    So in this case we use emptyFVs for the RHS tyvars.
+-}
+
 -- | Checks that a list of type patterns is valid in a matching (LHS)
 -- position of a class instances or type/data family instance.
 --
diff --git a/testsuite/tests/indexed-types/should_fail/ExplicitForAllFams4b.stderr b/testsuite/tests/indexed-types/should_fail/ExplicitForAllFams4b.stderr
index 46ef1d94a4ec..508938616a51 100644
--- a/testsuite/tests/indexed-types/should_fail/ExplicitForAllFams4b.stderr
+++ b/testsuite/tests/indexed-types/should_fail/ExplicitForAllFams4b.stderr
@@ -24,9 +24,9 @@ ExplicitForAllFams4b.hs:12:27: error: [GHC-34447]
       K (a, Bool) -- Defined at ExplicitForAllFams4b.hs:12:27
       K _ -- Defined at ExplicitForAllFams4b.hs:13:27
 
-ExplicitForAllFams4b.hs:13:22: error: [GHC-53634]
-    • Out of scope type variable ‘b’ in the RHS of a family instance.
-        All such variables must be bound on the LHS.
+ExplicitForAllFams4b.hs:13:22: error: [GHC-30337]
+    • Type variable ‘b’ is bound by a forall,
+        but not used in the family instance.
     • In the data instance declaration for ‘K’
 
 ExplicitForAllFams4b.hs:16:27: error: [GHC-53634]
@@ -39,9 +39,9 @@ ExplicitForAllFams4b.hs:16:30: error: [GHC-34447]
       L (a, Bool) -- Defined at ExplicitForAllFams4b.hs:16:30
       L _ -- Defined at ExplicitForAllFams4b.hs:17:30
 
-ExplicitForAllFams4b.hs:17:25: error: [GHC-53634]
-    • Out of scope type variable ‘b’ in the RHS of a family instance.
-        All such variables must be bound on the LHS.
+ExplicitForAllFams4b.hs:17:25: error: [GHC-30337]
+    • Type variable ‘b’ is bound by a forall,
+        but not used in the family instance.
     • In the newtype instance declaration for ‘L’
 
 ExplicitForAllFams4b.hs:24:3: error: [GHC-95424]
@@ -76,9 +76,9 @@ ExplicitForAllFams4b.hs:28:15: error: [GHC-53634]
     • In the type instance declaration for ‘CT’
       In the instance declaration for ‘C Bool’
 
-ExplicitForAllFams4b.hs:29:15: error: [GHC-53634]
-    • Out of scope type variable ‘b’ in the RHS of a family instance.
-        All such variables must be bound on the LHS.
+ExplicitForAllFams4b.hs:29:15: error: [GHC-30337]
+    • Type variable ‘b’ is bound by a forall,
+        but not used in the family instance.
     • In the data instance declaration for ‘CD’
       In the instance declaration for ‘C Bool’
 
diff --git a/testsuite/tests/typecheck/should_fail/T23734.hs b/testsuite/tests/typecheck/should_fail/T23734.hs
new file mode 100644
index 000000000000..f00c16a673e3
--- /dev/null
+++ b/testsuite/tests/typecheck/should_fail/T23734.hs
@@ -0,0 +1,17 @@
+{-# LANGUAGE TypeFamilies, ExplicitForAll, PolyKinds #-}
+module T23734 where
+
+import Data.Kind
+
+type family F
+type instance forall a. F = ()
+
+type family G where
+  forall b. G = ()
+
+class C where
+  type family H
+  type forall c. H = ()
+
+data family D :: Type
+data instance forall d. D = MkD
diff --git a/testsuite/tests/typecheck/should_fail/T23778.hs b/testsuite/tests/typecheck/should_fail/T23778.hs
new file mode 100644
index 000000000000..40780f5f67c3
--- /dev/null
+++ b/testsuite/tests/typecheck/should_fail/T23778.hs
@@ -0,0 +1,7 @@
+{-# LANGUAGE TypeFamilies, NoPolyKinds #-}
+module T23778 where
+
+import Data.Kind
+
+data family D :: Type -> Type
+data instance forall d u v. D u = MkD1 | MkD2 u
diff --git a/testsuite/tests/typecheck/should_fail/T23778.stderr b/testsuite/tests/typecheck/should_fail/T23778.stderr
new file mode 100644
index 000000000000..1faf580c1896
--- /dev/null
+++ b/testsuite/tests/typecheck/should_fail/T23778.stderr
@@ -0,0 +1,5 @@
+
+T23778.hs:7:22: error: [GHC-30337]
+    • Type variables ‘d’, ‘v’ are bound by a forall,
+        but not used in the family instance.
+    • In the data instance declaration for ‘D’
diff --git a/testsuite/tests/typecheck/should_fail/all.T b/testsuite/tests/typecheck/should_fail/all.T
index d5d40040db81..0eff733f82fa 100644
--- a/testsuite/tests/typecheck/should_fail/all.T
+++ b/testsuite/tests/typecheck/should_fail/all.T
@@ -684,6 +684,7 @@ test('CommonFieldResultTypeMismatch', normal, compile_fail, [''])
 test('CommonFieldTypeMismatch', normal, compile_fail, [''])
 test('T17284', normal, compile_fail, [''])
 test('T23427', normal, compile_fail, [''])
+test('T23778', normal, compile_fail, [''])
 test('T13981A', [extra_files(['T13981A.hs-boot', 'T13981B.hs', 'T13981C.hs', 'T13981F.hs'])], multimod_compile_fail, ['T13981A', '-v0'])
 test('T22560_fail_a', normal, compile_fail, [''])
 test('T22560_fail_b', normal, compile_fail, [''])
-- 
GitLab