From cd339ef0e8ce940902df79ed1d93b3af50ea6f77 Mon Sep 17 00:00:00 2001
From: Joshua Price <2855417+ElderEphemera@users.noreply.github.com>
Date: Sat, 23 May 2020 20:28:13 -0400
Subject: [PATCH] Make Unicode brackets opening/closing tokens (#18225)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The tokens `[|`, `|]`, `(|`, and `|)` are opening/closing tokens as
described in GHC Proposal #229. This commit makes the unicode
variants (`âŸ¦`, `âŸ§`, `â¦‡`, and `â¦ˆ`) act the same as their ASCII
counterparts.
---
 compiler/GHC/Parser/Lexer.x                   | 12 ++++++++----
 testsuite/tests/parser/unicode/T18225A.hs     | 13 +++++++++++++
 testsuite/tests/parser/unicode/T18225B.hs     | 11 +++++++++++
 testsuite/tests/parser/unicode/T18225B.stderr |  1 +
 testsuite/tests/parser/unicode/all.T          |  2 ++
 5 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 testsuite/tests/parser/unicode/T18225A.hs
 create mode 100644 testsuite/tests/parser/unicode/T18225B.hs
 create mode 100644 testsuite/tests/parser/unicode/T18225B.stderr

diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index 5bdf4c41f36e..7606dd3f9ee9 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -564,11 +564,11 @@ $tab          { warnTab }
 --
 -- The precise rules are as follows:
 --
---  * Identifiers, literals, and opening brackets (, (#, [, [|, [||, [p|, [e|,
---    [t|, {, are considered "opening tokens". The function followedByOpeningToken
---    tests whether the next token is an opening token.
+--  * Identifiers, literals, and opening brackets (, (#, (|, [, [|, [||, [p|,
+--    [e|, [t|, {, âŸ¦, â¦‡, are considered "opening tokens". The function
+--    followedByOpeningToken tests whether the next token is an opening token.
 --
---  * Identifiers, literals, and closing brackets ), #), ], |], },
+--  * Identifiers, literals, and closing brackets ), #), |), ], |], }, âŸ§, â¦ˆ,
 --    are considered "closing tokens". The function precededByClosingToken tests
 --    whether the previous token is a closing token.
 --
@@ -1068,6 +1068,8 @@ followedByOpeningToken _ _ _ (AI _ buf)
         ('\"', _) -> True
         ('\'', _) -> True
         ('_', _) -> True
+        ('âŸ¦', _) -> True
+        ('â¦‡', _) -> True
         (c, _) -> isAlphaNum c
 
 -- See Note [Whitespace-sensitive operator parsing]
@@ -1080,6 +1082,8 @@ precededByClosingToken _ (AI _ buf) _ _ =
     '\"' -> True
     '\'' -> True
     '_' -> True
+    'âŸ§' -> True
+    'â¦ˆ' -> True
     c -> isAlphaNum c
 
 {-# INLINE nextCharIs #-}
diff --git a/testsuite/tests/parser/unicode/T18225A.hs b/testsuite/tests/parser/unicode/T18225A.hs
new file mode 100644
index 000000000000..5e340a3fe6b6
--- /dev/null
+++ b/testsuite/tests/parser/unicode/T18225A.hs
@@ -0,0 +1,13 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE UnicodeSyntax #-}
+
+module T18225A where
+
+(!) :: IO a -> b -> b
+(!) _ = id
+
+test1 :: Int
+test1 = $âŸ¦1âŸ§
+
+test2 :: Int
+test2 = âŸ¦2âŸ§!2
diff --git a/testsuite/tests/parser/unicode/T18225B.hs b/testsuite/tests/parser/unicode/T18225B.hs
new file mode 100644
index 000000000000..5bd15f1e1388
--- /dev/null
+++ b/testsuite/tests/parser/unicode/T18225B.hs
@@ -0,0 +1,11 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE UnicodeSyntax #-}
+
+module T18225B where
+
+f :: (a, (b, c)) -> b
+f (_, (x, _)) = x
+
+test :: a -> a
+test = proc x -> â¦‡fâ¦ˆ$([|x|])
diff --git a/testsuite/tests/parser/unicode/T18225B.stderr b/testsuite/tests/parser/unicode/T18225B.stderr
new file mode 100644
index 000000000000..67cff08f214f
--- /dev/null
+++ b/testsuite/tests/parser/unicode/T18225B.stderr
@@ -0,0 +1 @@
+T18225B.hs:11:23: Parse error in command: [| x |]
diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T
index 55f7fd09be98..54a3b7cb1fdc 100644
--- a/testsuite/tests/parser/unicode/all.T
+++ b/testsuite/tests/parser/unicode/all.T
@@ -28,3 +28,5 @@ test('T10907', normal, compile, [''])
 test('T7650', normal, compile, [''])
 
 test('brackets', normal, compile, [''])
+test('T18225A', normal, compile, [''])
+test('T18225B', normal, compile_fail, [''])
-- 
GitLab