Commit da2fca9e authored by eir@cis.upenn.edu's avatar eir@cis.upenn.edu

Fix #7484, checking for good binder names in Convert.

This commit also refactors a bunch of lexeme-oriented code into
a new module Lexeme, and includes a submodule update for haddock.
parent dbf360a5
-- (c) The GHC Team
--
-- Functions to evaluate whether or not a string is a valid identifier.
-- There is considerable overlap between the logic here and the logic
-- in Lexer.x, but sadly there seems to be way to merge them.
module Lexeme (
-- * Lexical characteristics of Haskell names
-- | Use these functions to figure what kind of name a 'FastString'
-- represents; these functions do /not/ check that the identifier
-- is valid.
isLexCon, isLexVar, isLexId, isLexSym,
isLexConId, isLexConSym, isLexVarId, isLexVarSym,
startsVarSym, startsVarId, startsConSym, startsConId,
-- * Validating identifiers
-- | These functions (working over plain old 'String's) check
-- to make sure that the identifier is valid.
okVarOcc, okConOcc, okTcOcc,
okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
-- Some of the exports above are not used within GHC, but may
-- be of value to GHC API users.
) where
import FastString
import Data.Char
import qualified Data.Set as Set
{-
************************************************************************
* *
Lexical categories
* *
************************************************************************
These functions test strings to see if they fit the lexical categories
defined in the Haskell report.
Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some names generated for internal use can show up in debugging output,
e.g. when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
-}
isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
isLexCon cs = isLexConId cs || isLexConSym cs
isLexVar cs = isLexVarId cs || isLexVarSym cs
isLexId cs = isLexConId cs || isLexVarId cs
isLexSym cs = isLexConSym cs || isLexVarSym cs
-------------
isLexConId cs -- Prefix type or data constructors
| nullFS cs = False -- e.g. "Foo", "[]", "(,)"
| cs == (fsLit "[]") = True
| otherwise = startsConId (headFS cs)
isLexVarId cs -- Ordinary prefix identifiers
| nullFS cs = False -- e.g. "x", "_x"
| otherwise = startsVarId (headFS cs)
isLexConSym cs -- Infix type or data constructors
| nullFS cs = False -- e.g. ":-:", ":", "->"
| cs == (fsLit "->") = True
| otherwise = startsConSym (headFS cs)
isLexVarSym fs -- Infix identifiers e.g. "+"
| fs == (fsLit "~R#") = True
| otherwise
= case (if nullFS fs then [] else unpackFS fs) of
[] -> False
(c:cs) -> startsVarSym c && all isVarSymChar cs
-- See Note [Classification of generated names]
-------------
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
startsVarSym c = startsVarSymASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
startsConSym c = c == ':' -- Infix data constructors
startsVarId c = c == '_' || case generalCategory c of -- Ordinary Ids
LowercaseLetter -> True
OtherLetter -> True -- See #1103
_ -> False
startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors
startsVarSymASCII :: Char -> Bool
startsVarSymASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
isVarSymChar :: Char -> Bool
isVarSymChar c = c == ':' || startsVarSym c
{-
************************************************************************
* *
Detecting valid names for Template Haskell
* *
************************************************************************
-}
----------------------
-- External interface
----------------------
-- | Is this an acceptable variable name?
okVarOcc :: String -> Bool
okVarOcc str@(c:_)
| startsVarId c
= okVarIdOcc str
| startsVarSym c
= okVarSymOcc str
okVarOcc _ = False
-- | Is this an acceptable constructor name?
okConOcc :: String -> Bool
okConOcc str@(c:_)
| startsConId c
= okConIdOcc str
| startsConSym c
= okConSymOcc str
| str == "[]"
= True
okConOcc _ = False
-- | Is this an acceptable type name?
okTcOcc :: String -> Bool
okTcOcc "[]" = True
okTcOcc "->" = True
okTcOcc "~" = True
okTcOcc str@(c:_)
| startsConId c
= okConIdOcc str
| startsConSym c
= okConSymOcc str
| startsVarSym c
= okVarSymOcc str
okTcOcc _ = False
-- | Is this an acceptable alphanumeric variable name, assuming it starts
-- with an acceptable letter?
okVarIdOcc :: String -> Bool
okVarIdOcc str = okIdOcc str &&
not (str `Set.member` reservedIds)
-- | Is this an acceptable symbolic variable name, assuming it starts
-- with an acceptable character?
okVarSymOcc :: String -> Bool
okVarSymOcc str = all okSymChar str &&
not (str `Set.member` reservedOps) &&
not (isDashes str)
-- | Is this an acceptable alphanumeric constructor name, assuming it
-- starts with an acceptable letter?
okConIdOcc :: String -> Bool
okConIdOcc str = okIdOcc str ||
is_tuple_name1 str
where
-- check for tuple name, starting at the beginning
is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
is_tuple_name1 _ = False
-- check for tuple tail
is_tuple_name2 ")" = True
is_tuple_name2 (',' : rest) = is_tuple_name2 rest
is_tuple_name2 (ws : rest)
| isSpace ws = is_tuple_name2 rest
is_tuple_name2 _ = False
-- | Is this an acceptable symbolic constructor name, assuming it
-- starts with an acceptable character?
okConSymOcc :: String -> Bool
okConSymOcc ":" = True
okConSymOcc str = all okSymChar str &&
not (str `Set.member` reservedOps)
----------------------
-- Internal functions
----------------------
-- | Is this string an acceptable id, possibly with a suffix of hashes,
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
= let hashes = dropWhile okIdChar str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
-- | Is this character acceptable in an identifier (after the first letter)?
-- See alexGetByte in Lexer.x
okIdChar :: Char -> Bool
okIdChar c = case generalCategory c of
UppercaseLetter -> True
LowercaseLetter -> True
OtherLetter -> True
TitlecaseLetter -> True
DecimalNumber -> True
OtherNumber -> True
_ -> c == '\'' || c == '_'
-- | Is this character acceptable in a symbol (after the first char)?
-- See alexGetByte in Lexer.x
okSymChar :: Char -> Bool
okSymChar c
| c `elem` specialSymbols
= False
| c `elem` "_\"'"
= False
| otherwise
= case generalCategory c of
ConnectorPunctuation -> True
DashPunctuation -> True
OtherPunctuation -> True
MathSymbol -> True
CurrencySymbol -> True
ModifierSymbol -> True
OtherSymbol -> True
_ -> False
-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
reservedIds :: Set.Set String
reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
, "do", "else", "foreign", "if", "import", "in"
, "infix", "infixl", "infixr", "instance", "let"
, "module", "newtype", "of", "then", "type", "where"
, "_" ]
-- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
specialSymbols :: [Char]
specialSymbols = "(),;[]`{}"
-- | All reserved operators. Taken from section 2.4 of the 2010 Report.
reservedOps :: Set.Set String
reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
, "@", "~", "=>" ]
-- | Does this string contain only dashes and has at least 2 of them?
isDashes :: String -> Bool
isDashes ('-' : '-' : rest) = all (== '-') rest
isDashes _ = False
......@@ -94,11 +94,6 @@ module OccName (
-- * Tidying up
TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
-- * Lexical characteristics of Haskell names
isLexCon, isLexVar, isLexId, isLexSym,
isLexConId, isLexConSym, isLexVarId, isLexVarSym,
startsVarSym, startsVarId, startsConSym, startsConId,
-- FsEnv
FastStringEnv, emptyFsEnv, lookupFsEnv, extendFsEnv, mkFsEnv
) where
......@@ -110,6 +105,7 @@ import UniqFM
import UniqSet
import FastString
import Outputable
import Lexeme
import Binary
import Data.Char
import Data.Data
......@@ -849,72 +845,6 @@ tidyOccName env occ@(OccName occ_sp fs)
new_fs = mkFastString (base ++ show n)
\end{code}
%************************************************************************
%* *
\subsection{Lexical categories}
%* *
%************************************************************************
These functions test strings to see if they fit the lexical categories
defined in the Haskell report.
Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some names generated for internal use can show up in debugging output,
e.g. when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
\begin{code}
isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
isLexCon cs = isLexConId cs || isLexConSym cs
isLexVar cs = isLexVarId cs || isLexVarSym cs
isLexId cs = isLexConId cs || isLexVarId cs
isLexSym cs = isLexConSym cs || isLexVarSym cs
-------------
isLexConId cs -- Prefix type or data constructors
| nullFS cs = False -- e.g. "Foo", "[]", "(,)"
| cs == (fsLit "[]") = True
| otherwise = startsConId (headFS cs)
isLexVarId cs -- Ordinary prefix identifiers
| nullFS cs = False -- e.g. "x", "_x"
| otherwise = startsVarId (headFS cs)
isLexConSym cs -- Infix type or data constructors
| nullFS cs = False -- e.g. ":-:", ":", "->"
| cs == (fsLit "->") = True
| otherwise = startsConSym (headFS cs)
isLexVarSym fs -- Infix identifiers e.g. "+"
| fs == (fsLit "~R#") = True
| otherwise
= case (if nullFS fs then [] else unpackFS fs) of
[] -> False
(c:cs) -> startsVarSym c && all isVarSymChar cs
-- See Note [Classification of generated names]
-------------
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
startsConSym c = c == ':' -- Infix data constructors
startsVarId c = isLower c || c == '_' -- Ordinary Ids
startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors
isSymbolASCII :: Char -> Bool
isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
isVarSymChar :: Char -> Bool
isVarSymChar c = c == ':' || startsVarSym c
\end{code}
%************************************************************************
%* *
Binary instance
......
......@@ -161,6 +161,7 @@ Library
Hooks
Id
IdInfo
Lexeme
Literal
Llvm
Llvm.AbsSyn
......
......@@ -536,6 +536,7 @@ compiler_stage2_dll0_MODULES = \
IfaceType \
InstEnv \
Kind \
Lexeme \
ListSetOps \
Literal \
LoadIface \
......
......@@ -30,6 +30,7 @@ import ForeignCall
import Unique
import ErrUtils
import Bag
import Lexeme
import Util
import FastString
import Outputable
......@@ -1122,14 +1123,11 @@ cvtName ctxt_ns (TH.Name occ flavour)
occ_str = TH.occString occ
okOcc :: OccName.NameSpace -> String -> Bool
okOcc _ [] = False
okOcc ns str@(c:_)
| OccName.isVarNameSpace ns = startsVarId c || startsVarSym c
| OccName.isDataConNameSpace ns = startsConId c || startsConSym c || str == "[]"
| otherwise = startsConId c || startsConSym c ||
startsVarSym c || str == "[]" || str == "->"
-- allow type operators like "+"
okOcc ns str
| OccName.isVarNameSpace ns = okVarOcc str
| OccName.isDataConNameSpace ns = okConOcc str
| otherwise = okTcOcc str
-- Determine the name space of a name in a type
--
isVarName :: TH.Name -> Bool
......
......@@ -115,6 +115,8 @@ import Ctype
-- -----------------------------------------------------------------------------
-- Alex "Character set macros"
-- NB: The logic behind these definitions is also reflected in basicTypes/Lexeme.hs
-- Any changes here should likely be reflected there.
$unispace = \x05 -- Trick Alex into handling Unicode. See alexGetByte.
$nl = [\n\r\f]
$whitechar = [$nl\v\ $unispace]
......@@ -1802,6 +1804,10 @@ alexGetByte (AI loc s)
-- character is encountered we output these values
-- with the actual character value hidden in the state.
| otherwise =
-- NB: The logic behind these definitions is also reflected
-- in basicTypes/Lexeme.hs
-- Any changes here should likely be reflected there.
case generalCategory c of
UppercaseLetter -> upper
LowercaseLetter -> lower
......
......@@ -60,6 +60,7 @@ import Util
import Var
import MonadUtils
import Outputable
import Lexeme
import FastString
import Pair
import Bag
......
......@@ -91,6 +91,7 @@ import BasicTypes hiding( SuccessFlag(..) )
import Maybes( MaybeErr(..) )
import DynFlags
import Panic
import Lexeme
import FastString
import Outputable
import Control.Monad ( when )
......
......@@ -343,4 +343,4 @@ test('T9066', normal, compile, ['-v0'])
test('T8100', normal, compile, ['-v0'])
test('T9064', normal, compile, ['-v0'])
test('T9209', normal, compile_fail, ['-v0'])
test('T7484', expect_broken(7484), compile_fail, ['-v0'])
test('T7484', normal, compile_fail, ['-v0'])
Subproject commit 19409126be62383bc64d79698b265ffaf96269a5
Subproject commit 2b3712d701c1df626abbc60525c35e735272e45d
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment