Commit 5622c6b6 authored by Simon Marlow's avatar Simon Marlow
Browse files

Integrate FixityResolution

parent fcb9281f
......@@ -33,7 +33,7 @@ topdecl -> @type@ simpletype @=@ type
decls -> @{@ decl_1 @;@ ... @;@ decl_n @}@ & (n>=0)
decl -> gendecl
| (funlhs | pat^0) rhs
| (funlhs | \hprime{pat}) rhs
cdecls -> @{@ cdecl_1 @;@ ... @;@ cdecl_n @}@ & (n>=0)
cdecl -> gendecl
......@@ -1521,12 +1521,10 @@ fixity declaration.)
\index{function binding}\index{pattern binding}
@@@
decl -> (funlhs | pat^0) rhs
decl -> (funlhs | \hprime{pat}) rhs
funlhs -> var apat \{ apat \}
| pat^{i+1} varop^{(a,i)} pat^{i+1}
| lpat^i varop^{({\rm{}l},i)} pat^{i+1}
| pat^{i+1} varop^{({\rm{}r},i)} rpat^i
| \hprime{pat varop pat}
| @(@ funlhs @)@ apat \{ apat \}
rhs -> @=@ exp [@where@ decls]
......@@ -1538,11 +1536,12 @@ gdrhs -> \hprime{guards @=@ exp [gdrhs]}
@@@
\indexsyn{decl}%
\indexsyn{pat}%
\indexsyn{rhs}%
\indexsyn{gdrhs}%
\indexsyn{qs}%
We distinguish two cases within this syntax: a {\em pattern binding}
occurs when the left hand side is a "pat^0";
occurs when the left hand side is a \hprime{"pat"};
otherwise, the binding is called a {\em function
binding}. Either binding may appear at the top-level of a module or
within a @where@ or @let@ construct.
......@@ -1592,6 +1591,26 @@ definitions are all equivalent:
@
\eprogNoSkip
\begin{haskellprime}
Note that fixity resolution applies to the infix variants of the
function binding in the same way as for expressions
(Section~\ref{fixity-resolution}). Applying fixity resolution to the
left side of the equals in a function binding must leave the "varop"
being defined at the top level. For example, if we are defining a new
operator @##@ with precedence 6, then this definition would be
illegal:
\bprog
@
a ## b : xs = exp
@
\eprogNoSkip
because @:@ has precedence 5, so the left hand side resolves to
@(a ## x) : xs@, and this cannot be a pattern binding because @(a ## x)@
is not a valid pattern.
\end{haskellprime}
\outline{
\paragraph*{Translation:}
The general binding form for functions is semantically
......@@ -1649,29 +1668,6 @@ simple pattern binding:
\ea\]
}
\paragraph*{A note about syntax.} It is usually straightforward
to tell whether a binding is a pattern binding or a function binding,
but the existence of @n+k@ patterns\index{n+k pattern@@"n@+@k" pattern}
sometimes confuses the issue.
Here are four examples:
\bprog
@
x + 1 = ... -- Function binding, defines (+)
-- Equivalent to (+) x 1 = ...
(x + 1) = ... -- Pattern binding, defines x
(x + 1) * y = ... -- Function binding, defines (*)
-- Equivalent to (*) (x+1) y = ...
(x + 1) y = ... -- Function binding, defines (+)
-- Equivalent to (+) x 1 y = ...
@
\eprog
The first two can be distinguished because a pattern binding
has a "pat^0" on the left hand side, not a "pat" --- the former cannot
be an unparenthesised @n+k@ pattern.
\subsection{Static Semantics of Function and Pattern Bindings}
\label{dependencyanalysis}
......
......@@ -19,29 +19,15 @@ the @Prelude@, regardless of whether or not the identifier ``@concatMap@'' is in
scope where the list comprehension is used, and (if it is in scope)
what it is bound to.
In the syntax that follows, there are some families of nonterminals
indexed by precedence levels (written as a superscript). Similarly, the
nonterminals "op", "varop", and "conop" may have a double index:
a letter "l", "r", or "n" for left-, right- or non-associativity and
a precedence level. A precedence-level variable "i" ranges from 0 to 9;
an associativity variable "a" varies over "\{l, r, n\}".
For example
@@@
aexp -> @(@ exp^{i+1} qop^{(a,i)} @)@
@@@
actually stands for 30 productions, with 10 substitutions for "i"
and 3 for "a".
exp -> \hprime{infixexp @::@ [context @=>@] type} & (\tr{expression type signature})
| \hprime{infixexp}
@@@
exp -> exp^0 @::@ [context @=>@] type & (\tr{expression type signature})
| exp^0
exp^i -> exp^{i+1} [qop^{({\rm{n}},i)} exp^{i+1}]
| lexp^i
| rexp^i
lexp^i -> (lexp^i | exp^{i+1}) qop^{({\rm{l}},i)} exp^{i+1}
lexp^6 -> @-@ exp^7
rexp^i -> exp^{i+1} qop^{({\rm{r}},i)} (rexp^i | exp^{i+1})
exp^{10} -> @\@ apat_1 ... apat_n @->@ exp & (\tr{lambda abstraction}, n>=1)
\hprime{infixexp} -> \hprime{lexp qop infixexp} & (\tr{infix operator application})
| \hprime{@-@ infixexp} & (\tr{prefix negation})
| \hprime{lexp}
\hprime{lexp} -> @\@ apat_1 ... apat_n @->@ exp & (\tr{lambda abstraction}, n>=1)
| @let@ decls @in@ exp & ({\tr{let expression}})
| @if@ exp \hprime{[@;@]} @then@ exp \hprime{[@;@]} @else@ exp & (\tr{conditional})
| @case@ exp @of@ @{@ alts @}@ & (\tr{case expression})
......@@ -57,18 +43,15 @@ aexp -> qvar & (\tr{variable})
| @[@ exp_1 @,@ ... @,@ exp_k @]@ & (\tr{list}, k>=1)
| @[@ exp_1 [@,@ exp_2] @..@ [exp_3] @]@ & (\tr{arithmetic sequence})
| @[@ exp @|@ qual_1 @,@ ... @,@ qual_n @]@ & (\tr{list comprehension}, n>=1)
| @(@ exp^{i+1} qop^{(a,i)} @)@ & (\tr{left section})
| @(@ lexp^{i} qop^{(l,i)} @)@ & (\tr{left section})
| @(@ qop^{(a,i)}_{\langle@-@\rangle} exp^{i+1} @)@ & (\tr{right section})
| @(@ qop^{(r,i)}_{\langle@-@\rangle} rexp^{i} @)@ & (\tr{right section})
| @(@ \hprime{infixexp qop} @)@ & (\tr{left section})
| @(@ \hprime{qop_{\langle@-@\rangle} infixexp} @)@ & (\tr{right section})
| qcon @{@ fbind_1 @,@ ... @,@ fbind_n @}@ & (\tr{labeled construction}, n>=0)
| aexp_{\langle{}qcon\rangle{}} @{@ fbind_1 @,@ ... @,@ fbind_n @}@ & (\tr{labeled update}, n >= 1)
@@@
\indexsyn{exp}%
\index{exp@@"exp^i"}%
\index{lexp@@"lexp^i"}%
\index{rexp@@"rexp^i"}%
\indexsyn{infixexp}%
\indexsyn{lexp}%
\indexsyn{aexp}%
\indexsyn{fexp}%
......@@ -120,9 +103,14 @@ Expressions involving infix operators are disambiguated by the
operator's fixity (see Section~\ref{fixity}). Consecutive
unparenthesized operators with the same precedence must both be either
left or right associative to avoid a syntax error.
Given an unparenthesized expression ``"x qop^{(a,i)} y qop^{(b,j)} z"'', parentheses
must be added around either ``"x qop^{(a,i)} y"'' or ``"y qop^{(b,j)}
z"'' when "i=j" unless "a=b={\rm l}" or "a=b={\rm r}".
Given an unparenthesized expression ``"x qop^{(a,i)} y qop^{(b,j)} z"''
\hprime{(where "qop^{(a,i)}" means an operator with associativity "a" and
precedence "i")}, parentheses must be added around either ``"x
qop^{(a,i)} y"'' or ``"y qop^{(b,j)} z"'' when "i=j" unless "a=b={\rm
l}" or "a=b={\rm r}".
An example algorithm for resolving expressions involving infix
operators is given in Section~\ref{fixity-resolution}.
Negation\index{negation} is the only prefix operator in
\Haskell{}; it has the same precedence as the infix @-@ operator
......@@ -153,33 +141,9 @@ This & Parses as \\
@\ x -> a+b :: Int@ & @\ x -> ((a+b) :: Int@) \\
\hline\et\]
{\em A note about parsing.} Expressions that involve the interaction
of fixities with the let/lambda meta-rule
may be hard to parse. For example, the expression
\bprog
@
let x = True in x == x == True
@
\eprog
cannot possibly mean
\bprog
@
let x = True in (x == x == True)
@
\eprog
because @(==)@ is a non-associative operator; so the expression must parse thus:
\bprog
@
(let x = True in (x == x)) == True
@
\eprog
However, implementations may well use a post-parsing pass to deal with fixities,
so they may well incorrectly deliver the former parse. Programmers are advised
to avoid constructs whose parsing involves an interaction of (lack of) associativity
with the let/lambda meta-rule.
For the sake of clarity, the rest of this section shows the syntax of
expressions without their precedences.
For the sake of clarity, the rest of this section will assume that
expressions involving infix operators have been resolved according to
the fixities of the operators.
\subsection{Errors}
\label{basic-errors}\index{error}
......@@ -418,12 +382,11 @@ The following identities hold:
\label{sections}
%
@@@
aexp -> @(@ exp^{i+1} qop^{(a,i)} @)@ & (\tr{left section})
| @(@ lexp^{i} qop^{(l,i)} @)@ & (\tr{left section})
| @(@ qop^{(a,i)}_{\langle@-@\rangle} exp^{i+1} @)@ & (\tr{right section})
| @(@ qop^{(r,i)}_{\langle@-@\rangle} rexp^{i} @)@ & (\tr{right section})
| @(@ \hprime{infixexp qop} @)@ & (\tr{left section})
| @(@ \hprime{qop_{\langle@-@\rangle} infixexp} @)@ & (\tr{right section})
@@@
\indexsyn{aexp}%
\indexsyn{infixexp}%
\indexsyn{qop}%
\noindent
{\em Sections} are written as "@(@ op e @)@" or "@(@ e op @)@", where
......@@ -1207,15 +1170,12 @@ defining the semantics of pattern matching for case expressions is sufficient.
Patterns\index{pattern} have this syntax:
@@@
pat -> var @+@ integer & (\tr{successor pattern})
| pat^0
pat^i -> pat^{i+1} [qconop^{({\rm{n}},i)} pat^{i+1}]
| lpat^i
| rpat^i
lpat^i -> (lpat^i | pat^{i+1}) qconop^{({\rm{l}},i)} pat^{i+1}
lpat^6 -> @-@ (integer | float) & (\tr{negative literal})
rpat^i -> pat^{i+1} qconop^{({\rm{r}},i)} (rpat^i | pat^{i+1})
pat^{10} -> apat
pat -> \hprime{lpat qconop pat} & (\tr{infix constructor})
| \hprime{@-@ (integer | float)} & (\tr{negative literal})
| \hprime{lpat}
\hprime{lpat} -> apat
| \hprime{@-@ (integer | float)} & (\tr{negative literal})
| gcon apat_1 ... apat_k & (\tr{arity} gcon = k, k>=1)
apat -> var [{\tt @@} apat] & (\tr{as pattern})
......@@ -1231,9 +1191,7 @@ apat -> var [{\tt @@} apat] & (\tr{as pattern})
fpat -> qvar @=@ pat
@@@
\indexsyn{pat}%
\index{pat@@"pat^i"}%
\index{lpat@@"lpat^i"}%
\index{rpat@@"rpat^i"}%
\indexsyn{lpat}%
\indexsyn{apat}%
\indexsyn{fpats}%
\indexsyn{fpat}%
......
%**<title>The Haskell 2010 Report: Fixity Resolution</title>
%**~header
\subsection{Fixity Resolution}
\label{fixity-resolution}
\index{fixity@@resolution}
\begin{haskellprime}
The following is an example implementation of fixity resolution for
Haskell expressions. Fixity resolution also applies to Haskell
patterns, but patterns are a subset of expressions so in what follows
we consider only expressions for simplicity.
The function @resolve@ takes a list consisting of alternating
expressions and operators; i.e. an instance of the "infixexp"
non-terminal in the context-free grammar, and returns either @Just e@
where @e@ is the resolved expression, or @Nothing@ if the input does
not represent a valid expression. In a compiler, of course, it would
be better to return more information about the operators involved for
the purposes of producing a useful error message, but the @Maybe@ type
will suffice to illustrate the algorithm here.
\bprog
@
type Prec = Int
type Var = String
data Op = Op String Prec Fixity deriving Eq
data Fixity = Leftfix | Rightfix | Nonfix deriving Eq
data Exp = Var Var | OpApp Exp Op Exp | Neg Exp deriving Eq
data Tok = TExp Exp | TOp Op | TNeg deriving Eq
resolve :: [Tok] -> Maybe Exp
resolve tokens = fmap fst $ parseNeg (Op "" (-1) Nonfix) tokens
where
parseNeg :: Op -> [Tok] -> Maybe (Exp,[Tok])
parseNeg op1 (TExp e1 : rest)
= parse op1 e1 rest
parseNeg op1 (TNeg : rest)
= do guard (prec1 < 6)
(r, rest') <- parseNeg (Op "-" 6 Leftfix) rest
parse op1 (Neg r) rest'
where
Op _ prec1 fix1 = op1
parse :: Op -> Exp -> [Tok] -> Maybe (Exp, [Tok])
parse _ e1 [] = Just (e1, [])
parse op1 e1 (TOp op2 : rest)
-- case (1): check for illegal expressions
| prec1 == prec2 && (fix1 /= fix2 || fix1 == Nonfix)
= Nothing
-- case (2): op1 and op2 should associate to the left
| prec1 > prec2 || (prec1 == prec2 && fix1 == Leftfix)
= Just (e1, TOp op2 : rest)
-- case (3): op1 and op2 should associate to the right
| otherwise
= do (r,rest') <- parseNeg op2 rest
parse op1 (OpApp e1 op2 r) rest'
where
Op _ prec1 fix1 = op1
Op _ prec2 fix2 = op2
@
\eprog
The algorithm works as follows. At each stage we have a call
@
parse op1 E1 (op2 : tokens)
@
which means that we are looking at an expression like
@
E0 `op1` E1 `op2` ... (1)
@
(the caller holds E0). The job of @parse@ is to build the expression
to the right of @op1@, returning the expression and any remaining
input.
There are three cases to consider:
\begin{enumerate}
\item if @op1@ and @op2@ have the same precedence, but they do not
have the same associativity, or they are declared to be nonfix, then
the expression is illegal.
\item If @op1@ has a higher precedence than @op2@, or @op1@ and @op2@
should left-associate, then we know that the expression to the right
of @op1@ is @E1@, so we return this to the caller.
\item Otherwise, we know we want to build an expression of the form
@E1 `op2` R@. To find @R@, we call @parseNeg op2 tokens@ to compute
the expression to the right of @op2@, namely @R@ (more about
@parseNeg@ below, but essentially if @tokens@ is of the form @(E2 : rest)@,
then this is equivalent to @parse op2 E2 rest@). Now, we
have
\[
@E0 `op1` (E1 `op2` R) `op3` ...@
\]
where @op3@ is the next operator in the input. This is an instance of
(1) above, so to continue we call parse, with the new @E1 == (E1 `op2` R)@.
\end{enumerate}
To initialise the algorithm, we set @op1@ to be an imaginary operator
with precedence lower than anyything else. Hence @parse@ will consume
the whole input, and return the resulting expression.
The handling of the prefix negation operator, @-@, complicates matters
only slightly. Recall that prefix negation has the same fixity as
infix negation: left-associative with precedence 6. The operator to
the left of @-@, if there is one, must have precedence lower than 6
for the expression to be legal. The negation operator itself may
left-associate with operators of the same fixity (e.g. @+@). So for
example @-a + b@ is legal and resolves as @(-a) + b@, but @a + -b@ is
illegal.
The function @parseNeg@ handles prefix negation. If we encounter a
negation operator, and it is legal in this position (the operator to
the left has precedence lower than 6), then we proceed in a similar
way to case (3) above: compute the argument to '-' by recursively
calling @parseNeg@, and then continue by calling @parse@.
Note that this algorithm is insensitive to the range and resolution of
precedences. There is no reason in principle that Haskell should be
limited to integral precedences in the range 1 to 10; a larger range,
or fractional values, would present no additional difficulties.
\end{haskellprime}
%**~footer
......@@ -411,6 +411,7 @@ that it does not claim to be a definition of the language Haskell 98.
% \appendix
\input{standard-prelude}\startnewsection
\input{syntax-iso}\startnewsection
\input{fixity}\startnewsection
\input{derived}\startnewsection
\input{pragmas}
%%
......
......@@ -11,7 +11,7 @@ files= basic.verb
files= io-13.verb
files=standard-prelude.verb
files=syntax-iso.verb
files=literate.verb
files=fixity.verb
files=derived.verb
files=pragmas.verb
......
......@@ -26,19 +26,6 @@ BNF-like syntax is used throughout, with productions having the form:
nonterm -> alt_1 | alt_2 | ... | alt_n
@@@
There are some families of nonterminals indexed by
precedence levels (written as a superscript). Similarly, the
nonterminals "op", "varop", and "conop" may have a double index:
a letter "l", "r", or "n" for left-, right- or nonassociativity and
a precedence level. A precedence-level variable "i" ranges from 0 to 9;
an associativity variable "a" varies over "\{l, r, n\}".
Thus, for example
@@@
aexp -> @(@ exp^{i+1} qop^{(a,i)} @)@
@@@
actually stands for 30 productions, with 10 substitutions for "i"
and 3 for "a".
In both the lexical and the context-free syntax, there are some
ambiguities that are to be resolved by making grammatical phrases as
long as possible, proceeding from left to right (in shift-reduce
......@@ -318,23 +305,6 @@ is valid, because it translates to
@
\eprog
The close brace is inserted due to the parse error rule above.
The parse-error rule is hard to implement in its full generality, because
doing so involves fixities. For example, the expression
\bprog
@
do a == b == c
@
\eprog
has a single unambiguous (albeit probably type-incorrect) parse, namely
\bprog
@
(do { a == b }) == c
@
\eprog
because @(==)@ is non-associative. Programmers are therefore advised to avoid
writing code that requires the parser to insert a closing brace in such
situations.
\startnewsection
\subsection{Literate comments}
......@@ -480,7 +450,7 @@ topdecl -> @type@ simpletype @=@ type
@@@
decls -> @{@ decl_1 @;@ ... @;@ decl_n @}@ & (n>=0)
decl -> gendecl
| (funlhs | pat^0) rhs
| (funlhs | \hprime{pat}) rhs
cdecls -> @{@ cdecl_1 @;@ ... @;@ cdecl_n @}@ & (n>=0)
cdecl -> gendecl
......@@ -576,9 +546,7 @@ inst -> gtycon
@@@
funlhs -> var apat \{ apat \}
| pat^{i+1} varop^{(a,i)} pat^{i+1}
| lpat^i varop^{({\rm{}l},i)} pat^{i+1}
| pat^{i+1} varop^{({\rm{}r},i)} rpat^i
| \hprime{pat varop pat}
| @(@ funlhs @)@ apat \{ apat \}
rhs -> @=@ exp [@where@ decls]
......@@ -589,20 +557,20 @@ gdrhs -> gd @=@ exp [gdrhs]
gd -> @|@ exp^0
@@@
\indexsyn{funlhs}%
\indexsyn{pat}%
\indexsyn{rhs}%
\indexsyn{gdrhs}%
\indexsyn{gd}%
@@@
exp -> exp^0 @::@ [context @=>@] type & (\tr{expression type signature})
| exp^0
exp^i -> exp^{i+1} [qop^{({\rm{}n},i)} exp^{i+1}]
| lexp^i
| rexp^i
lexp^i -> (lexp^i | exp^{i+1}) qop^{({\rm{}l},i)} exp^{i+1}
lexp^6 -> @-@ exp^7
rexp^i -> exp^{i+1} qop^{({\rm{}r},i)} (rexp^i | exp^{i+1})
exp^{10} -> @\@ apat_1 ... apat_n @->@ exp & (\tr{lambda abstraction}, n>=1)
exp -> \hprime{infixexp @::@ [context @=>@] type} & (\tr{expression type signature})
| \hprime{infixexp}
\hprime{infixexp} -> \hprime{lexp qop infixexp} & (\tr{infix operator application})
| \hprime{@-@ infixexp} & (\tr{prefix negation})
| \hprime{lexp}
\hprime{lexp} -> @\@ apat_1 ... apat_n @->@ exp & (\tr{lambda abstraction}, n>=1)
| @let@ decls @in@ exp & ({\tr{let expression}})
| @if@ exp \hprime{[@;@]} @then@ exp \hprime{[@;@]} @else@ exp & (\tr{conditional})
| @case@ exp @of@ @{@ alts @}@ & (\tr{case expression})
......@@ -611,9 +579,9 @@ exp^{10} -> @\@ apat_1 ... apat_n @->@ exp & (\tr{lambda abstraction}, n>=1)
fexp -> [fexp] aexp & (\tr{function application})
@@@
\indexsyn{exp}%
\index{exp@@"exp^i"}%
\index{lexp@@"lexp^i"}%
\index{rexp@@"rexp^i"}%
\indexsyn{infixexp}%
\indexsyn{lexp}%
\indexsyn{aexp}%
\indexsyn{fexp}%
@@@
......@@ -625,10 +593,8 @@ aexp -> qvar & (\tr{variable})
| @[@ exp_1 @,@ ... @,@ exp_k @]@ & (\tr{list}, k>=1)
| @[@ exp_1 [@,@ exp_2] @..@ [exp_3] @]@ & (\tr{arithmetic sequence})
| @[@ exp @|@ qual_1 @,@ ... @,@ qual_n @]@ & (\tr{list comprehension}, n>=1)
| @(@ exp^{i+1} qop^{(a,i)} @)@ & (\tr{left section})
| @(@ lexp^{i} qop^{(l,i)} @)@ & (\tr{left section})
| @(@ qop^{(a,i)}_{\langle@-@\rangle} exp^{i+1} @)@ & (\tr{right section})
| @(@ qop^{(r,i)}_{\langle@-@\rangle} rexp^{i} @)@ & (\tr{right section})
| @(@ \hprime{infixexp qop} @)@ & (\tr{left section})
| @(@ \hprime{qop_{\langle@-@\rangle} infixexp} @)@ & (\tr{right section})
| qcon @{@ fbind_1 @,@ ... @,@ fbind_n @}@ & (\tr{labeled construction}, n>=0)
| aexp_{\langle{}qcon\rangle{}} @{@ fbind_1 @,@ ... @,@ fbind_n @}@ & (\tr{labeled update}, n >= 1)
@@@
......@@ -664,23 +630,19 @@ fbind -> qvar @=@ exp
\indexsyn{fbind}%
@@@
pat -> var @+@ integer & (\tr{successor pattern})
| pat^0
pat^i -> pat^{i+1} [qconop^{({\rm{}n},i)} pat^{i+1}]
| lpat^i
| rpat^i
lpat^i -> (lpat^i | pat^{i+1}) qconop^{({\rm{}l},i)} pat^{i+1}
lpat^6 -> @-@ (integer | float) & (\tr{negative literal})
rpat^i -> pat^{i+1} qconop^{({\rm{}r},i)} (rpat^i | pat^{i+1})
pat^{10} -> apat
pat -> \hprime{lpat qconop pat} & (\tr{infix constructor})
| \hprime{@-@ (integer | float)} & (\tr{negative literal})
| \hprime{lpat}
\hprime{lpat} -> apat
| \hprime{@-@ (integer | float)} & (\tr{negative literal})
| gcon apat_1 ... apat_k & (\tr{arity} gcon = k, k>=1)
@@@
\indexsyn{pat}%
\index{pat@@"pat^i"}%
\index{lpat@@"lpat^i"}%
\index{rpat@@"rpat^i"}%
\indexsyn{fpat}%
\indexsyn{lpat}%
\indexsyn{apat}%
\indexsyn{fpats}%
\indexsyn{fpat}%
@@@
apat -> var [{\tt @@} apat] & (\tr{as pattern})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment