Commit 2aee4196 authored by thomie's avatar thomie

Allow combining characters in identifiers (#7650)

Reviewed by: austin, rwbarton

Differential Revision: https://phabricator.haskell.org/D1938
parent 661aa07e
...@@ -194,9 +194,10 @@ okIdChar c = case generalCategory c of ...@@ -194,9 +194,10 @@ okIdChar c = case generalCategory c of
LowercaseLetter -> True LowercaseLetter -> True
TitlecaseLetter -> True TitlecaseLetter -> True
ModifierLetter -> True -- See #10196 ModifierLetter -> True -- See #10196
OtherLetter -> True OtherLetter -> True -- See #1103
NonSpacingMark -> True -- See #7650
DecimalNumber -> True DecimalNumber -> True
OtherNumber -> True OtherNumber -> True -- See #4373
_ -> c == '\'' || c == '_' _ -> c == '\'' || c == '_'
-- | Is this character acceptable in a symbol (after the first char)? -- | Is this character acceptable in a symbol (after the first char)?
......
...@@ -155,8 +155,8 @@ $binit = 0-1 ...@@ -155,8 +155,8 @@ $binit = 0-1
$octit = 0-7 $octit = 0-7
$hexit = [$decdigit A-F a-f] $hexit = [$decdigit A-F a-f]
$modifier = \x07 -- Trick Alex into handling Unicode. See alexGetByte. $uniidchar = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
$idchar = [$small $large $digit $modifier \'] $idchar = [$small $large $digit $uniidchar \']
$pragmachar = [$small $large $digit] $pragmachar = [$small $large $digit]
...@@ -1874,10 +1874,10 @@ alexGetByte (AI loc s) ...@@ -1874,10 +1874,10 @@ alexGetByte (AI loc s)
symbol = '\x04' symbol = '\x04'
space = '\x05' space = '\x05'
other_graphic = '\x06' other_graphic = '\x06'
modifier = '\x07' uniidchar = '\x07'
adj_c adj_c
| c <= '\x06' = non_graphic | c <= '\x07' = non_graphic
| c <= '\x7f' = c | c <= '\x7f' = c
-- Alex doesn't handle Unicode, so when Unicode -- Alex doesn't handle Unicode, so when Unicode
-- character is encountered we output these values -- character is encountered we output these values
...@@ -1891,9 +1891,9 @@ alexGetByte (AI loc s) ...@@ -1891,9 +1891,9 @@ alexGetByte (AI loc s)
UppercaseLetter -> upper UppercaseLetter -> upper
LowercaseLetter -> lower LowercaseLetter -> lower
TitlecaseLetter -> upper TitlecaseLetter -> upper
ModifierLetter -> modifier -- see #10196 ModifierLetter -> uniidchar -- see #10196
OtherLetter -> lower -- see #1103 OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic NonSpacingMark -> uniidchar -- see #7650
SpacingCombiningMark -> other_graphic SpacingCombiningMark -> other_graphic
EnclosingMark -> other_graphic EnclosingMark -> other_graphic
DecimalNumber -> digit DecimalNumber -> digit
......
main = print spın̈alTap
where spın̈alTap = 11
-- n̈ is a combining character sequence. We now allow it to be used in
-- identifiers (#7650).
--
-- > map generalCategory "n̈"
-- [LowercaseLetter,NonSpacingMark]
--
-- > map show "n̈"
-- ["'n'","'\776'"]
...@@ -25,3 +25,4 @@ test('T7671', normal, compile, ['']) ...@@ -25,3 +25,4 @@ test('T7671', normal, compile, [''])
# TODO: This test ought to be run in a non-UTF8 locale, but this is not yet # TODO: This test ought to be run in a non-UTF8 locale, but this is not yet
# supported by the test suite (see 10907) # supported by the test suite (see 10907)
test('T10907', normal, compile, ['']) test('T10907', normal, compile, [''])
test('T7650', normal, compile, [''])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment