diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs index c5bda4dbdd36c63f81edac9666eb5e02d6f2f89d..a2409614d1f4cccc3e98a1340e0591d056f60b9a 100644 --- a/compiler/basicTypes/Lexeme.hs +++ b/compiler/basicTypes/Lexeme.hs @@ -28,6 +28,7 @@ module Lexeme ( ) where import FastString +import Util ((<||>)) import Data.Char import qualified Data.Set as Set @@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str && -- but not worrying about case or clashing with reserved words? okIdOcc :: String -> Bool okIdOcc str - = let hashes = dropWhile okIdChar str in + -- TODO. #10196. Only allow modifier letters in the suffix of an identifier. + = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in all (== '#') hashes -- -XMagicHash allows a suffix of hashes -- of course, `all` says "True" to an empty list @@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of OtherNumber -> True _ -> c == '\'' || c == '_' +-- | Is this character acceptable in the suffix of an identifier. +-- See alexGetByte in Lexer.x +okIdSuffixChar :: Char -> Bool +okIdSuffixChar c = case generalCategory c of + ModifierLetter -> True -- See #10196 + _ -> False + -- | Is this character acceptable in a symbol (after the first char)? -- See alexGetByte in Lexer.x okSymChar :: Char -> Bool diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 432686e5af321dc5bd87107fd8088b45a634d2d1..7fbbd132fc25614177bb0e9a3176e2185ad50399 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -156,7 +156,10 @@ $graphic = [$small $large $symbol $digit $special $unigraphic \"\'] $binit = 0-1 $octit = 0-7 $hexit = [$decdigit A-F a-f] -$idchar = [$small $large $digit \'] + +$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte. +-- TODO #10196. Only allow modifier letters in the suffix of an identifier. +$idchar = [$small $large $digit $suffix \'] $pragmachar = [$small $large $digit] @@ -1835,6 +1838,7 @@ alexGetByte (AI loc s) symbol = '\x04' space = '\x05' other_graphic = '\x06' + suffix = '\x07' adj_c | c <= '\x06' = non_graphic @@ -1851,7 +1855,7 @@ alexGetByte (AI loc s) UppercaseLetter -> upper LowercaseLetter -> lower TitlecaseLetter -> upper - ModifierLetter -> other_graphic + ModifierLetter -> suffix -- see #10196 OtherLetter -> lower -- see #1103 NonSpacingMark -> other_graphic SpacingCombiningMark -> other_graphic diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs index aa3a19b64c2758a5d86b7e367a4b8390f20c6346..b2492f169fd553838e90b6b33cb0c4207b16146d 100644 --- a/compiler/utils/Util.hs +++ b/compiler/utils/Util.hs @@ -55,6 +55,7 @@ module Util ( isEqual, eqListBy, eqMaybeBy, thenCmp, cmpList, removeSpaces, + (<&&>), (<||>), -- * Edit distance fuzzyMatch, fuzzyLookup, @@ -115,6 +116,10 @@ import Data.List hiding (group) import FastTypes #endif +#if __GLASGOW_HASKELL__ < 709 +import Control.Applicative (Applicative) +#endif +import Control.Applicative ( liftA2 ) import Control.Monad ( liftM ) import System.IO.Error as IO ( isDoesNotExistError ) import System.Directory ( doesDirectoryExist, getModificationTime ) @@ -653,6 +658,15 @@ cmpList cmp (a:as) (b:bs) removeSpaces :: String -> String removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace +-- Boolean operators lifted to Applicative +(<&&>) :: Applicative f => f Bool -> f Bool -> f Bool +(<&&>) = liftA2 (&&) +infixr 3 <&&> -- same as (&&) + +(<||>) :: Applicative f => f Bool -> f Bool -> f Bool +(<||>) = liftA2 (||) +infixr 2 <||> -- same as (||) + {- ************************************************************************ * * diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs new file mode 100644 index 0000000000000000000000000000000000000000..f80911829b33ece1b476abbaa67d4487a3477cd3 --- /dev/null +++ b/testsuite/tests/parser/should_compile/T10196.hs @@ -0,0 +1,13 @@ +module T10196 where + +data X = Xᵦ | Xᵤ | Xᵩ | Xᵢ | Xᵪ | Xᵣ + +f :: Int +f = + let xᵦ = 1 + xᵤ = xᵦ + xᵩ = xᵤ + xᵢ = xᵩ + xᵪ = xᵢ + xᵣ = xᵪ + in xᵣ diff --git a/testsuite/tests/parser/should_compile/all.T b/testsuite/tests/parser/should_compile/all.T index 13acedf0141306c0320d9325fc71d1bed3a26224..0bff4085c7fc28e5c49426614cd319fe0a2755c1 100644 --- a/testsuite/tests/parser/should_compile/all.T +++ b/testsuite/tests/parser/should_compile/all.T @@ -98,3 +98,4 @@ test('T7118', normal, compile, ['']) test('T7776', normal, compile, ['']) test('RdrNoStaticPointers01', when(compiler_lt('ghc', '7.9'), skip), compile, ['']) test('T5682', normal, compile, ['']) +test('T10196', normal, compile, ['']) diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.hs b/testsuite/tests/parser/should_fail/T10196Fail1.hs new file mode 100644 index 0000000000000000000000000000000000000000..2f1c8f39ea3a1d90af448ece061de2689a6d198e --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail1.hs @@ -0,0 +1,4 @@ +module T10196Fail1 where + +-- Constructors are not allowed to start with a modifier letter. +data Foo = ᵦfoo diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.stderr b/testsuite/tests/parser/should_fail/T10196Fail1.stderr new file mode 100644 index 0000000000000000000000000000000000000000..3c4a173eefd4668d90543f9ef59838854e9a0c7b --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail1.stderr @@ -0,0 +1,2 @@ + +T10196Fail1.hs:4:12: error: lexical error at character '\7526' diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.hs b/testsuite/tests/parser/should_fail/T10196Fail2.hs new file mode 100644 index 0000000000000000000000000000000000000000..64b3cacd6210dccd2cb64e6989344a8aaadba850 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail2.hs @@ -0,0 +1,4 @@ +module T10196Fail2 where + +-- Variables are not allowed to start with a modifier letter. +ᵦ = 1 diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.stderr b/testsuite/tests/parser/should_fail/T10196Fail2.stderr new file mode 100644 index 0000000000000000000000000000000000000000..abba8aa04c014a12ecd413c2dddcf68f2bd05b04 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail2.stderr @@ -0,0 +1,2 @@ + +T10196Fail2.hs:4:1: error: lexical error at character '\7526' diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs new file mode 100644 index 0000000000000000000000000000000000000000..09b80ddeff01ace2f2a76f78aaee8653917c67fb --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail3.hs @@ -0,0 +1,6 @@ +module T10196Fail3 where + +-- Modifier letters are not allowed in the middle of an identifier. +-- And this should not be lexed as 2 separate identifiers either. +xᵦx :: Int +xᵦx = 1 diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr new file mode 100644 index 0000000000000000000000000000000000000000..64037440e23ad00a7a49e219b15dba36cb453091 --- /dev/null +++ b/testsuite/tests/parser/should_fail/T10196Fail3.stderr @@ -0,0 +1,2 @@ + +T10196Fail3.hs:5:2: error: lexical error at character '/7526' diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T index 7e286cf3f23b7d387fda723865a65d38a43f0fa9..26d576757a5a44ec9a66e43159141146a092eba6 100644 --- a/testsuite/tests/parser/should_fail/all.T +++ b/testsuite/tests/parser/should_fail/all.T @@ -86,3 +86,6 @@ test('ExportCommaComma', normal, compile_fail, ['']) test('T8430', literate, compile_fail, ['']) test('T8431', [timeout_multiplier(0.05)], compile_fail, ['-XAlternativeLayoutRule']) test('T8506', normal, compile_fail, ['']) +test('T10196Fail1', normal, compile_fail, ['']) +test('T10196Fail2', normal, compile_fail, ['']) +test('T10196Fail3', expect_broken(10196), compile_fail, [''])