From 358e0a8d4cb49baa29cf6b001eaa9d4ac428bb2d Mon Sep 17 00:00:00 2001 From: Thomas Miedema <thomasmiedema@gmail.com> Date: Fri, 3 Jul 2015 22:37:18 +0200 Subject: [PATCH] parser: Allow Lm (MODIFIER LETTER) category in identifiers Easy fix in the parser to stop regressions, due to Unicode 7.0 changing the classification of some prior code points. Signed-off-by: Austin Seipp <austin@well-typed.com> Test Plan: `tests/parser/should_compile/T10196.hs` Reviewers: hvr, austin, bgamari Reviewed By: austin, bgamari Subscribers: thomie, bgamari Differential Revision: https://phabricator.haskell.org/D969 GHC Trac Issues: #10196 --- compiler/basicTypes/Lexeme.hs | 11 ++++++++++- compiler/parser/Lexer.x | 8 ++++++-- compiler/utils/Util.hs | 14 ++++++++++++++ testsuite/tests/parser/should_compile/T10196.hs | 13 +++++++++++++ testsuite/tests/parser/should_compile/all.T | 1 + testsuite/tests/parser/should_fail/T10196Fail1.hs | 4 ++++ .../tests/parser/should_fail/T10196Fail1.stderr | 2 ++ testsuite/tests/parser/should_fail/T10196Fail2.hs | 4 ++++ .../tests/parser/should_fail/T10196Fail2.stderr | 2 ++ testsuite/tests/parser/should_fail/T10196Fail3.hs | 6 ++++++ .../tests/parser/should_fail/T10196Fail3.stderr | 2 ++ testsuite/tests/parser/should_fail/all.T | 3 +++ 12 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 testsuite/tests/parser/should_compile/T10196.hs create mode 100644 testsuite/tests/parser/should_fail/T10196Fail1.hs create mode 100644 testsuite/tests/parser/should_fail/T10196Fail1.stderr create mode 100644 testsuite/tests/parser/should_fail/T10196Fail2.hs create mode 100644 testsuite/tests/parser/should_fail/T10196Fail2.stderr create mode 100644 testsuite/tests/parser/should_fail/T10196Fail3.hs create mode 100644 testsuite/tests/parser/should_fail/T10196Fail3.stderr diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs index c5bda4dbdd36..a2409614d1f4 100644 --- a/compiler/basicTypes/Lexeme.hs +++ b/compiler/basicTypes/Lexeme.hs @@ -28,6 +28,7 @@ module Lexeme ( ) where import FastString +import Util ((<||>)) import Data.Char import qualified Data.Set as Set @@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str && -- but not worrying about case or clashing with reserved words? okIdOcc :: String -> Bool okIdOcc str - = let hashes = dropWhile okIdChar str in + -- TODO. #10196. Only allow modifier letters in the suffix of an identifier. + = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in all (== '#') hashes -- -XMagicHash allows a suffix of hashes -- of course, `all` says "True" to an empty list @@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of OtherNumber -> True _ -> c == '\'' || c == '_' +-- | Is this character acceptable in the suffix of an identifier. +-- See alexGetByte in Lexer.x +okIdSuffixChar :: Char -> Bool +okIdSuffixChar c = case generalCategory c of + ModifierLetter -> True -- See #10196 + _ -> False + -- | Is this character acceptable in a symbol (after the first char)? -- See alexGetByte in Lexer.x okSymChar :: Char -> Bool diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 432686e5af32..7fbbd132fc25 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -156,7 +156,10 @@ $graphic = [$small $large $symbol $digit $special $unigraphic \"\'] $binit = 0-1 $octit = 0-7 $hexit = [$decdigit A-F a-f] -$idchar = [$small $large $digit \'] + +$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte. +-- TODO #10196. Only allow modifier letters in the suffix of an identifier. +$idchar = [$small $large $digit $suffix \'] $pragmachar = [$small $large $digit] @@ -1835,6 +1838,7 @@ alexGetByte (AI loc s) symbol = '\x04' space = '\x05' other_graphic = '\x06' + suffix = '\x07' adj_c | c <= '\x06' = non_graphic @@ -1851,7 +1855,7 @@ alexGetByte (AI loc s) UppercaseLetter -> upper LowercaseLetter -> lower TitlecaseLetter -> upper - ModifierLetter -> other_graphic + ModifierLetter -> suffix -- see #10196 OtherLetter -> lower -- see #1103 NonSpacingMark -> other_graphic SpacingCombiningMark -> other_graphic diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs index aa3a19b64c27..b2492f169fd5 100644 --- a/compiler/utils/Util.hs +++ b/compiler/utils/Util.hs @@ -55,6 +55,7 @@ module Util ( isEqual, eqListBy, eqMaybeBy, thenCmp, cmpList, removeSpaces, + (<&&>), (<||>), -- * Edit distance fuzzyMatch, fuzzyLookup, @@ -115,6 +116,10 @@ import Data.List hiding (group) import FastTypes #endif +#if __GLASGOW_HASKELL__ < 709 +import Control.Applicative (Applicative) +#endif +import Control.Applicative ( liftA2 ) import Control.Monad ( liftM ) import System.IO.Error as IO ( isDoesNotExistError ) import System.Directory ( doesDirectoryExist, getModificationTime ) @@ -653,6 +658,15 @@ cmpList cmp (a:as) (b:bs) removeSpaces :: String -> String removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace +-- Boolean operators lifted to Applicative +(<&&>) :: Applicative f => f Bool -> f Bool -> f Bool +(<&&>) = liftA2 (&&) +infixr 3 <&&> -- same as (&&) + +(<||>) :: Applicative f => f Bool -> f Bool -> f Bool +(<||>) = liftA2 (||) +infixr 2 <||> -- same as (||) + {- ************************************************************************ * * diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs new file mode 100644 index 000000000000..f80911829b33 --- /dev/null +++ b/testsuite/tests/parser/should_compile/T10196.hs @@ -0,0 +1,13 @@ +module T10196 where + +data X = Xᵦ | Xᵤ | Xᵩ | Xᵢ | Xᵪ | Xᵣ + +f :: Int +f = + let xᵦ = 1 + xᵤ = xᵦ + xᵩ = xᵤ + xᵢ = xᵩ + xᵪ = xᵢ + xᵣ = xᵪ + in xᵣ diff --git a/testsuite/tests/parser/should_compile/all.T b/testsuite/tests/parser/should_compile/all.T index 13acedf01413..0bff4085c7fc 100644 --- a/testsuite/tests/parser/should_compile/all.T +++ b/testsuite/tests/parser/should_compile/all.T @@ -98,3 +98,4 @@ test('T7118', normal, compile, ['']) test('T7776', normal, compile, ['']) test('RdrNoStaticPointers01', when(compiler_lt('ghc', '7.9'), skip), compile, ['']) test('T5682', normal, compile, ['']) +test('T10196', normal, compile, ['']) diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.hs b/testsuite/tests/parser/should_fail/T10196Fail1.hs new file mode 100644 index 000000000000..2f1c8f39ea3a --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail1.hs @@ -0,0 +1,4 @@ +module T10196Fail1 where + +-- Constructors are not allowed to start with a modifier letter. +data Foo = ᵦfoo diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.stderr b/testsuite/tests/parser/should_fail/T10196Fail1.stderr new file mode 100644 index 000000000000..3c4a173eefd4 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail1.stderr @@ -0,0 +1,2 @@ + +T10196Fail1.hs:4:12: error: lexical error at character '\7526' diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.hs b/testsuite/tests/parser/should_fail/T10196Fail2.hs new file mode 100644 index 000000000000..64b3cacd6210 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail2.hs @@ -0,0 +1,4 @@ +module T10196Fail2 where + +-- Variables are not allowed to start with a modifier letter. +ᵦ = 1 diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.stderr b/testsuite/tests/parser/should_fail/T10196Fail2.stderr new file mode 100644 index 000000000000..abba8aa04c01 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail2.stderr @@ -0,0 +1,2 @@ + +T10196Fail2.hs:4:1: error: lexical error at character '\7526' diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs new file mode 100644 index 000000000000..09b80ddeff01 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail3.hs @@ -0,0 +1,6 @@ +module T10196Fail3 where + +-- Modifier letters are not allowed in the middle of an identifier. +-- And this should not be lexed as 2 separate identifiers either. +xᵦx :: Int +xᵦx = 1 diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr new file mode 100644 index 000000000000..64037440e23a --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail3.stderr @@ -0,0 +1,2 @@ + +T10196Fail3.hs:5:2: error: lexical error at character '/7526' diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T index 7e286cf3f23b..26d576757a5a 100644 --- a/testsuite/tests/parser/should_fail/all.T +++ b/testsuite/tests/parser/should_fail/all.T @@ -86,3 +86,6 @@ test('ExportCommaComma', normal, compile_fail, ['']) test('T8430', literate, compile_fail, ['']) test('T8431', [timeout_multiplier(0.05)], compile_fail, ['-XAlternativeLayoutRule']) test('T8506', normal, compile_fail, ['']) +test('T10196Fail1', normal, compile_fail, ['']) +test('T10196Fail2', normal, compile_fail, ['']) +test('T10196Fail3', expect_broken(10196), compile_fail, ['']) -- GitLab