From cd339ef0e8ce940902df79ed1d93b3af50ea6f77 Mon Sep 17 00:00:00 2001 From: Joshua Price <2855417+ElderEphemera@users.noreply.github.com> Date: Sat, 23 May 2020 20:28:13 -0400 Subject: [PATCH] Make Unicode brackets opening/closing tokens (#18225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tokens `[|`, `|]`, `(|`, and `|)` are opening/closing tokens as described in GHC Proposal #229. This commit makes the unicode variants (`⟦`, `⟧`, `⦇`, and `⦈`) act the same as their ASCII counterparts. --- compiler/GHC/Parser/Lexer.x | 12 ++++++++---- testsuite/tests/parser/unicode/T18225A.hs | 13 +++++++++++++ testsuite/tests/parser/unicode/T18225B.hs | 11 +++++++++++ testsuite/tests/parser/unicode/T18225B.stderr | 1 + testsuite/tests/parser/unicode/all.T | 2 ++ 5 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 testsuite/tests/parser/unicode/T18225A.hs create mode 100644 testsuite/tests/parser/unicode/T18225B.hs create mode 100644 testsuite/tests/parser/unicode/T18225B.stderr diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x index 5bdf4c41f36e..7606dd3f9ee9 100644 --- a/compiler/GHC/Parser/Lexer.x +++ b/compiler/GHC/Parser/Lexer.x @@ -564,11 +564,11 @@ $tab { warnTab } -- -- The precise rules are as follows: -- --- * Identifiers, literals, and opening brackets (, (#, [, [|, [||, [p|, [e|, --- [t|, {, are considered "opening tokens". The function followedByOpeningToken --- tests whether the next token is an opening token. +-- * Identifiers, literals, and opening brackets (, (#, (|, [, [|, [||, [p|, +-- [e|, [t|, {, ⟦, ⦇, are considered "opening tokens". The function +-- followedByOpeningToken tests whether the next token is an opening token. -- --- * Identifiers, literals, and closing brackets ), #), ], |], }, +-- * Identifiers, literals, and closing brackets ), #), |), ], |], }, ⟧, ⦈, -- are considered "closing tokens". The function precededByClosingToken tests -- whether the previous token is a closing token. -- @@ -1068,6 +1068,8 @@ followedByOpeningToken _ _ _ (AI _ buf) ('\"', _) -> True ('\'', _) -> True ('_', _) -> True + ('⟦', _) -> True + ('⦇', _) -> True (c, _) -> isAlphaNum c -- See Note [Whitespace-sensitive operator parsing] @@ -1080,6 +1082,8 @@ precededByClosingToken _ (AI _ buf) _ _ = '\"' -> True '\'' -> True '_' -> True + '⟧' -> True + '⦈' -> True c -> isAlphaNum c {-# INLINE nextCharIs #-} diff --git a/testsuite/tests/parser/unicode/T18225A.hs b/testsuite/tests/parser/unicode/T18225A.hs new file mode 100644 index 000000000000..5e340a3fe6b6 --- /dev/null +++ b/testsuite/tests/parser/unicode/T18225A.hs @@ -0,0 +1,13 @@ +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE UnicodeSyntax #-} + +module T18225A where + +(!) :: IO a -> b -> b +(!) _ = id + +test1 :: Int +test1 = $⟦1⟧ + +test2 :: Int +test2 = ⟦2⟧!2 diff --git a/testsuite/tests/parser/unicode/T18225B.hs b/testsuite/tests/parser/unicode/T18225B.hs new file mode 100644 index 000000000000..5bd15f1e1388 --- /dev/null +++ b/testsuite/tests/parser/unicode/T18225B.hs @@ -0,0 +1,11 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE UnicodeSyntax #-} + +module T18225B where + +f :: (a, (b, c)) -> b +f (_, (x, _)) = x + +test :: a -> a +test = proc x -> ⦇f⦈$([|x|]) diff --git a/testsuite/tests/parser/unicode/T18225B.stderr b/testsuite/tests/parser/unicode/T18225B.stderr new file mode 100644 index 000000000000..67cff08f214f --- /dev/null +++ b/testsuite/tests/parser/unicode/T18225B.stderr @@ -0,0 +1 @@ +T18225B.hs:11:23: Parse error in command: [| x |] diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T index 55f7fd09be98..54a3b7cb1fdc 100644 --- a/testsuite/tests/parser/unicode/all.T +++ b/testsuite/tests/parser/unicode/all.T @@ -28,3 +28,5 @@ test('T10907', normal, compile, ['']) test('T7650', normal, compile, ['']) test('brackets', normal, compile, ['']) +test('T18225A', normal, compile, ['']) +test('T18225B', normal, compile_fail, ['']) -- GitLab