Commit d738e664 authored by thomie's avatar thomie

Modifier letter in middle of identifier is ok

Refactoring only. Cleanup some loose ends from #10196.

Initially the idea was to only allow modifier letters at the end of
identifiers. Since we later decided to allow modifier letters also in
the middle of identifiers (because not doing so would not fix the
regression completely), the names `suffix` and `okIdSuffixChar` don't
seem appropriate anymore.

Remove TODO. Move test from should_fail to should_compile.
parent 2f733b3a
......@@ -28,7 +28,6 @@ module Lexeme (
) where
import FastString
import Util ((<||>))
import Data.Char
import qualified Data.Set as Set
......@@ -183,8 +182,7 @@ okConSymOcc str = all okSymChar str &&
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
-- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
= let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
= let hashes = dropWhile okIdChar str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
......@@ -194,19 +192,13 @@ okIdChar :: Char -> Bool
okIdChar c = case generalCategory c of
UppercaseLetter -> True
LowercaseLetter -> True
OtherLetter -> True
TitlecaseLetter -> True
ModifierLetter -> True -- See #10196
OtherLetter -> True
DecimalNumber -> True
OtherNumber -> True
_ -> c == '\'' || c == '_'
-- | Is this character acceptable in the suffix of an identifier.
-- See alexGetByte in Lexer.x
okIdSuffixChar :: Char -> Bool
okIdSuffixChar c = case generalCategory c of
ModifierLetter -> True -- See #10196
_ -> False
-- | Is this character acceptable in a symbol (after the first char)?
-- See alexGetByte in Lexer.x
okSymChar :: Char -> Bool
......
......@@ -155,9 +155,8 @@ $binit = 0-1
$octit = 0-7
$hexit = [$decdigit A-F a-f]
$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
-- TODO #10196. Only allow modifier letters in the suffix of an identifier.
$idchar = [$small $large $digit $suffix \']
$modifier = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
$idchar = [$small $large $digit $modifier \']
$pragmachar = [$small $large $digit]
......@@ -1875,7 +1874,7 @@ alexGetByte (AI loc s)
symbol = '\x04'
space = '\x05'
other_graphic = '\x06'
suffix = '\x07'
modifier = '\x07'
adj_c
| c <= '\x06' = non_graphic
......@@ -1892,7 +1891,7 @@ alexGetByte (AI loc s)
UppercaseLetter -> upper
LowercaseLetter -> lower
TitlecaseLetter -> upper
ModifierLetter -> suffix -- see #10196
ModifierLetter -> modifier -- see #10196
OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
......
......@@ -11,3 +11,8 @@ f =
x = x
x = x
in x
-- Modifier letters are also allowed in the middle of an identifier.
-- This should not be lexed as 2 separate identifiers.
xx :: Int
xx = 1
module T10196Fail3 where
-- Modifier letters are not allowed in the middle of an identifier.
-- And this should not be lexed as 2 separate identifiers either.
xx :: Int
xx = 1
T10196Fail3.hs:5:2: error: lexical error at character '/7526'
......@@ -91,6 +91,5 @@ test('T8506', normal, compile_fail, [''])
test('T9225', normal, compile_fail, [''])
test('T10196Fail1', normal, compile_fail, [''])
test('T10196Fail2', normal, compile_fail, [''])
test('T10196Fail3', expect_broken(10196), compile_fail, [''])
test('T10498a', normal, compile_fail, [''])
test('T10498b', normal, compile_fail, [''])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment