Commit 6b01d3ce authored by thomie's avatar thomie Committed by Ben Gamari

parser: Allow Lm (MODIFIER LETTER) category in identifiers

Easy fix in the parser to stop regressions, due to Unicode 7.0 changing
the classification of some prior code points.
Signed-off-by: default avatarAustin Seipp <austin@well-typed.com>

Test Plan: `tests/parser/should_compile/T10196.hs`

Reviewers: hvr, austin, bgamari

Reviewed By: austin, bgamari

Subscribers: thomie, bgamari

Differential Revision: https://phabricator.haskell.org/D969

GHC Trac Issues: #10196
parent 39d83f23
......@@ -28,6 +28,7 @@ module Lexeme (
) where
import FastString
import Util ((<||>))
import Data.Char
import qualified Data.Set as Set
......@@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str &&
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
= let hashes = dropWhile okIdChar str in
-- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
= let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
......@@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of
OtherNumber -> True
_ -> c == '\'' || c == '_'
-- | Is this character acceptable in the suffix of an identifier.
-- See alexGetByte in Lexer.x
okIdSuffixChar :: Char -> Bool
okIdSuffixChar c = case generalCategory c of
ModifierLetter -> True -- See #10196
_ -> False
-- | Is this character acceptable in a symbol (after the first char)?
-- See alexGetByte in Lexer.x
okSymChar :: Char -> Bool
......
......@@ -156,7 +156,10 @@ $graphic = [$small $large $symbol $digit $special $unigraphic \"\']
$binit = 0-1
$octit = 0-7
$hexit = [$decdigit A-F a-f]
$idchar = [$small $large $digit \']
$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
-- TODO #10196. Only allow modifier letters in the suffix of an identifier.
$idchar = [$small $large $digit $suffix \']
$pragmachar = [$small $large $digit]
......@@ -1842,6 +1845,7 @@ alexGetByte (AI loc s)
symbol = '\x04'
space = '\x05'
other_graphic = '\x06'
suffix = '\x07'
adj_c
| c <= '\x06' = non_graphic
......@@ -1858,7 +1862,7 @@ alexGetByte (AI loc s)
UppercaseLetter -> upper
LowercaseLetter -> lower
TitlecaseLetter -> upper
ModifierLetter -> other_graphic
ModifierLetter -> suffix -- see #10196
OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
......
......@@ -55,6 +55,7 @@ module Util (
isEqual, eqListBy, eqMaybeBy,
thenCmp, cmpList,
removeSpaces,
(<&&>), (<||>),
-- * Edit distance
fuzzyMatch, fuzzyLookup,
......@@ -665,6 +666,10 @@ removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
(<&&>) = liftA2 (&&)
infixr 3 <&&> -- same as (&&)
(<||>) :: Applicative f => f Bool -> f Bool -> f Bool
(<||>) = liftA2 (||)
infixr 2 <||> -- same as (||)
{-
************************************************************************
* *
......
module T10196 where
data X = X | X | X | X | X | X
f :: Int
f =
let x = 1
x = x
x = x
x = x
x = x
x = x
in x
......@@ -101,4 +101,5 @@ test('T5682', normal, compile, [''])
test('T9723a', normal, compile, [''])
test('T9723b', normal, compile, [''])
test('T10188', normal, compile, [''])
test('T10196', normal, compile, [''])
test('T10582', expect_broken(10582), compile, [''])
module T10196Fail1 where
-- Constructors are not allowed to start with a modifier letter.
data Foo = foo
T10196Fail1.hs:4:12: error: lexical error at character '\7526'
module T10196Fail2 where
-- Variables are not allowed to start with a modifier letter.
= 1
T10196Fail2.hs:4:1: error: lexical error at character '\7526'
module T10196Fail3 where
-- Modifier letters are not allowed in the middle of an identifier.
-- And this should not be lexed as 2 separate identifiers either.
xx :: Int
xx = 1
T10196Fail3.hs:5:2: error: lexical error at character '/7526'
......@@ -88,3 +88,6 @@ test('T8431', compile_timeout_multiplier(0.05),
compile_fail, ['-XAlternativeLayoutRule'])
test('T8506', normal, compile_fail, [''])
test('T9225', normal, compile_fail, [''])
test('T10196Fail1', normal, compile_fail, [''])
test('T10196Fail2', normal, compile_fail, [''])
test('T10196Fail3', expect_broken(10196), compile_fail, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment