Commit 358e0a8d authored by thomie's avatar thomie Committed by Ben Gamari
Browse files

parser: Allow Lm (MODIFIER LETTER) category in identifiers



Easy fix in the parser to stop regressions, due to Unicode 7.0 changing
the classification of some prior code points.
Signed-off-by: default avatarAustin Seipp <austin@well-typed.com>

Test Plan: `tests/parser/should_compile/T10196.hs`

Reviewers: hvr, austin, bgamari

Reviewed By: austin, bgamari

Subscribers: thomie, bgamari

Differential Revision: https://phabricator.haskell.org/D969

GHC Trac Issues: #10196
parent 51de9342
......@@ -28,6 +28,7 @@ module Lexeme (
) where
import FastString
import Util ((<||>))
import Data.Char
import qualified Data.Set as Set
......@@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str &&
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
= let hashes = dropWhile okIdChar str in
-- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
= let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
......@@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of
OtherNumber -> True
_ -> c == '\'' || c == '_'
-- | Is this character acceptable in the suffix of an identifier.
-- See alexGetByte in Lexer.x
okIdSuffixChar :: Char -> Bool
okIdSuffixChar c = case generalCategory c of
ModifierLetter -> True -- See #10196
_ -> False
-- | Is this character acceptable in a symbol (after the first char)?
-- See alexGetByte in Lexer.x
okSymChar :: Char -> Bool
......
......@@ -156,7 +156,10 @@ $graphic = [$small $large $symbol $digit $special $unigraphic \"\']
$binit = 0-1
$octit = 0-7
$hexit = [$decdigit A-F a-f]
$idchar = [$small $large $digit \']
$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
-- TODO #10196. Only allow modifier letters in the suffix of an identifier.
$idchar = [$small $large $digit $suffix \']
$pragmachar = [$small $large $digit]
......@@ -1835,6 +1838,7 @@ alexGetByte (AI loc s)
symbol = '\x04'
space = '\x05'
other_graphic = '\x06'
suffix = '\x07'
adj_c
| c <= '\x06' = non_graphic
......@@ -1851,7 +1855,7 @@ alexGetByte (AI loc s)
UppercaseLetter -> upper
LowercaseLetter -> lower
TitlecaseLetter -> upper
ModifierLetter -> other_graphic
ModifierLetter -> suffix -- see #10196
OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
......
......@@ -55,6 +55,7 @@ module Util (
isEqual, eqListBy, eqMaybeBy,
thenCmp, cmpList,
removeSpaces,
(<&&>), (<||>),
-- * Edit distance
fuzzyMatch, fuzzyLookup,
......@@ -115,6 +116,10 @@ import Data.List hiding (group)
import FastTypes
#endif
#if __GLASGOW_HASKELL__ < 709
import Control.Applicative (Applicative)
#endif
import Control.Applicative ( liftA2 )
import Control.Monad ( liftM )
import System.IO.Error as IO ( isDoesNotExistError )
import System.Directory ( doesDirectoryExist, getModificationTime )
......@@ -653,6 +658,15 @@ cmpList cmp (a:as) (b:bs)
removeSpaces :: String -> String
removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
-- Boolean operators lifted to Applicative
(<&&>) :: Applicative f => f Bool -> f Bool -> f Bool
(<&&>) = liftA2 (&&)
infixr 3 <&&> -- same as (&&)
(<||>) :: Applicative f => f Bool -> f Bool -> f Bool
(<||>) = liftA2 (||)
infixr 2 <||> -- same as (||)
{-
************************************************************************
* *
......
module T10196 where
data X = X | X | X | X | X | X
f :: Int
f =
let x = 1
x = x
x = x
x = x
x = x
x = x
in x
......@@ -98,3 +98,4 @@ test('T7118', normal, compile, [''])
test('T7776', normal, compile, [''])
test('RdrNoStaticPointers01', when(compiler_lt('ghc', '7.9'), skip), compile, [''])
test('T5682', normal, compile, [''])
test('T10196', normal, compile, [''])
module T10196Fail1 where
-- Constructors are not allowed to start with a modifier letter.
data Foo = foo
T10196Fail1.hs:4:12: error: lexical error at character '\7526'
module T10196Fail2 where
-- Variables are not allowed to start with a modifier letter.
= 1
T10196Fail2.hs:4:1: error: lexical error at character '\7526'
module T10196Fail3 where
-- Modifier letters are not allowed in the middle of an identifier.
-- And this should not be lexed as 2 separate identifiers either.
xx :: Int
xx = 1
T10196Fail3.hs:5:2: error: lexical error at character '/7526'
......@@ -86,3 +86,6 @@ test('ExportCommaComma', normal, compile_fail, [''])
test('T8430', literate, compile_fail, [''])
test('T8431', [timeout_multiplier(0.05)], compile_fail, ['-XAlternativeLayoutRule'])
test('T8506', normal, compile_fail, [''])
test('T10196Fail1', normal, compile_fail, [''])
test('T10196Fail2', normal, compile_fail, [''])
test('T10196Fail3', expect_broken(10196), compile_fail, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment