diff --git a/.gitignore b/.gitignore index b20c3fb6ea32549bd5961734bcee1131a3014541..da3524f17dcf4d60dffb44d010c23dfa02a0d9f5 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ *.fls *.log *.out +*.bbl +*.blg +*.bib.bak haskell.tex haskell.pdf diff --git a/grammar.ott b/grammar.ott index 035d4922ed35ca6c97ca8b06eb4c2e9af8f51758..ac64182898159ada1b839a84a954675257866cb1 100644 --- a/grammar.ott +++ b/grammar.ott @@ -8,9 +8,9 @@ metavar x, y, z, f, g, h::= {{ com Term variables }} metavar a, b ::= {{ com Type varaibles (internal) }} metavar c ::= {{ com Type variables (user-specified) }} -metavar K {{ tex \mathrm K }} ::= {{ com Data constructors }} -metavar P {{ tex \mathrm P }} ::= {{ com Pattern synonyms }} -metavar T {{ tex \mathrm T }} ::= {{ com Type constructors }} +metavar K ::= {{ com Data constructors }} +metavar P ::= {{ com Pattern synonyms }} +metavar T ::= {{ com Type constructors }} indexvar i, j, k, l, n ::= {{ com Index variables }} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -23,8 +23,15 @@ funlhs :: 'FunLhs_' ::= {{ com Function left-hand sides }} | f ps :: :: Plain {{ com Standard }} | f ps '::' s :: :: Signature {{ com Result signature }} -fundecl :: 'FunDecl_ ::= {{ com Function declarations }} -| funlhs = e :: :: Decl +fundecl :: 'FunDecl_' ::= {{ com Function declarations }} +| funeqs :: :: Decl + +funeqs {{ tex \overline{\ottnt{funeq} } }} :: 'FunEqs_' ::= {{ com List of function equations }} +| funeq :: :: One +| funeqs1 ; .... ; funeqsi :: :: Many + +funeq :: 'FunEq_' ::= {{ com Function equations }} +| funlhs = e :: :: Eqn decl :: 'Decl_' ::= {{ com Declarations }} | fundecl :: :: FunBind {{ com Function binding }} @@ -177,8 +184,7 @@ terminals :: 'terminals_' ::= | ~ :: :: Eq {{ tex \sim }} | |- :: :: VDash1 {{ tex \vdash }} | ||- :: :: VDash2 {{ tex \Vdash }} -| |-p :: :: VDashp {{ tex \ent{P} }} -| |-ps :: :: VDashpstar {{ tex \ent{PS} }} +| |-PS :: :: VDashpstar {{ tex \ent{PS} }} | |-sb* :: :: VDashsbstar {{ tex \ent{sb}^{\!\ast} }} | := :: :: Def {{ tex \coloneqq }} | in :: :: In {{ tex \in }} diff --git a/haskell.bib b/haskell.bib new file mode 100644 index 0000000000000000000000000000000000000000..620580316e44b4d06b578430ad6cee39630a0dfd --- /dev/null +++ b/haskell.bib @@ -0,0 +1,21 @@ +% Encoding: UTF-8 + +@InProceedings{type-vars-in-pats, + author = {Eisenberg, Richard A. and Breitner, Joachim and Peyton Jones, Simon}, + title = {Type Variables in Patterns}, + booktitle = {Proceedings of the 11th ACM SIGPLAN International Symposium on Haskell}, + year = {2018}, + series = {Haskell 2018}, + pages = {94--105}, + address = {New York, NY, USA}, + publisher = {ACM}, + acmid = {3242753}, + doi = {10.1145/3242744.3242753}, + isbn = {978-1-4503-5835-4}, + keywords = {Haskell, Patterns, polymorphism, type variables}, + location = {St. Louis, MO, USA}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/3242744.3242753}, +} + +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/haskell.mng b/haskell.mng index 96048fe4b19ac2d3052e4fb8ec53113f9f06141a..26036b7084c5bdf1d18d93b9a31be3cf895cf987 100644 --- a/haskell.mng +++ b/haskell.mng @@ -1,10 +1,13 @@ \documentclass{article} +\usepackage{hyperref} \usepackage[supertabular,implicitLineBreakHack]{ottalt} \usepackage[dvipsnames]{xcolor} \usepackage{amsmath} \usepackage{amssymb} \usepackage{fullpage} +\usepackage{mdframed} +\usepackage{natbib} \inputott{ott.tex} @@ -14,8 +17,8 @@ \newcommand{\id}[1]{\textsf{\textsl{#1}}} \newcommand{\varid}[1]{\textcolor{Sepia}{\id{#1}}} -\newcommand{\ent}[1]{\vdash_{\!\!\!\raisebox{-1pt}{\scriptsize\textsf{#1}}}} -\newcommand{\eent}[1]{\Vdash_{\!\!\mathsf{#1}}} +\newcommand{\ent}[1]{\ensuremath{\vdash_{\!\!\!\raisebox{-1pt}{\scriptsize\textsf{#1}}}}} +\newcommand{\Ent}[1]{\ensuremath{\Vdash_{\!\!\mathsf{#1}}}} \renewcommand\ottaltinferrule[4]{ \inferrule*[narrower=0.3,right=#1,#2] @@ -23,33 +26,100 @@ {#4} } +\newmdenv[leftmargin=3ex,rightmargin=3ex]{asidebox} +\newenvironment{aside}{\begin{asidebox}\textbf{Aside:}}{\end{asidebox}} \begin{document} \section{Metavariables} +The metavariables below stand for lexical tokens. The type variables $[[a]]$ and $[[b]]$ +are for variables invented by GHC. User-written type variables are always denoted with +$[[c]]$. + \ottmetavars \section{Grammar} -\nonterms{decl,fundecl,funlhs,CL,v,e,p,s,Q,tv,theta,t,G} +\subsection{Haskell} + +The definitions below describe familiar parts of the Haskell grammar. +Constraints use a slightly different syntax to aid in building the theory. + +Limitations: +\begin{itemize} +\item This formalism is meant to study \emph{types}, not \emph{syntax}. The syntactic +forms here might be subtly different than those in surface Haskell. +\item The formalism currently does not support higher-rank types and bidirectional type-checking. +\end{itemize} + +\nonterms{decl,fundecl,funeq,funlhs,CL,v,e,p,s,Q,tv,t} + +\subsection{Theoretical constructs} + +We will need a few more definitions to power our theory, described here. + +\nonterms{theta,G} \section{Typing rules} We assume the Barendregt convention where all variables in contexts $[[G]]$ are always -distinct; this convention is maintained by renaming variables as necessary. +distinct; this convention is maintained by renaming variables as necessary. Additionally, +abstract judgments $[[G ||- Q]]$ denotes entailment and $[[G ||- s1 <= s2]]$ denotes +subtyping ($[[s1]]$ is more general than $[[s2]]$). -\drules[D]{$[[G |- decls -| G']]$}{Declarations}{Fun,AFun} -\drules[FD]{$[[G |- fundecl -| f ; s]]$}{Function declarations}{Fun} -\drules[FL]{$[[G |- funlhs : ts -| G'; f; s]]$}{Function left-hand sides}{Std,ResSig} +\drules[D]{$[[G |-D decls -| G']]$}{Declarations}{Fun,AFun} -\drules[E]{$[[G |- e : t]]$}{Expressions}{VarCon,Eq,App,Abs,Case,Let} +Checking a declaration produces an output context $[[G']]$ that contains the new binding. +When we look at a function declaration, we add the assumptions (type variables and the +constraint $[[Q]]$) to the context. In \rref{d-afun}, we do a subtype check to make sure +the inferred type of the function is at least as general as the declared type. Note that +these rules require a function type signature to occur directly before the function itself. +This requirement is not in Haskell, but we can imagine a preprocessor that shuffles the +definitions around to make this true. -Abstract judgments $[[G ||- Q]]$ denotes entailment and $[[G ||- s1 <= s2]]$ denotes -subtyping ($[[s1]]$ is more general than $[[s2]]$). +\drules[FD]{$[[G |-FD fundecl -| f ; s]]$}{Function declarations}{One,Many} + +Checking a function declaration is different depending on whether there is one equation or +many. \Rref{FD-One} allows us to infer a polytype for the result type of a function with +only one equation, if it contains a result signature. On the other hand, \rref{FD-Many} +requires that the function result has a monotype $[[t]]$. (Note that the function may still +be polymorphic and/or constrained: it is just the \emph{result} that must be a monotype +here.) This distinction matters only in the presence of result signatures (see \rref{FL-ResSig} +below) and echoes GHC's existing behavior in requiring all multi-branch constructs to +have monotypes. + +\begin{aside} +An alternative here is to eliminate both above rules and replace them with this: + +\drules[FD]{$[[G |-FD fundecl -| f ; s]]$}{Function declarations}{XXPolyXX} + +This rule suggests finding the least upper bound of all the inferred types. We don't +wish to do this, though, so the rule is not included. Note the convention in this +document of rendering rejected rules with a ``XX'' prefix and suffix. +\end{aside} + +\drules[FL]{$[[G |-FL funlhs : ts -| G'; f; s]]$}{Function left-hand sides}{Std,ResSig} + +The \ent{FL} judgment checks a function left-hand side. In the standard case (\rref{FL-Std}), it checks +the patterns, producing an extended environment $[[G']]$. The judgment also extracts the function +name $[[f]]$ and the right-hand side type $[[u]]$; this last return value is just a guess for \rref{FL-Std}. +On the other hand, \rref{FL-ResSig} can return the (poly)type that the user specifies in a result +signature, if any. Note that \rref{FL-ResSig} brings variables in a result signature into scope +in the right-hand side. + +\drules[E]{$[[G |-E e : t]]$}{Expressions}{VarCon,Eq,App,Abs,Case,Let} + +Expressions are standard. + +\drules[P]{$[[G |-P p : s -| G']]$}{Patterns}{Var,Con,Syn,Sig} +\drules[Ps]{$[[G |-PS </ pi : si // i /> -| G']]$}{Pattern sequences}{PatSeq} + +Please refer to \citet{type-vars-in-pats} for the details on the pattern judgments. They +are not as bad as they appear. -\drules[P]{$[[G |-p p : s -| G']]$}{Patterns}{Var,Con,Syn,Sig} -\drules[Ps]{$[[G |-ps </ pi : si // i /> -| G']]$}{Pattern sequences}{PatSeq} +\bibliographystyle{plainnat} +\bibliography{haskell} \end{document} diff --git a/rules.ott b/rules.ott index a4a66559b153a6d45c58023c778ed599f36255c7..8a970fa330766098d66d0462c4abb34b02d6f83b 100644 --- a/rules.ott +++ b/rules.ott @@ -2,91 +2,120 @@ defns Ty :: '' ::= defn -G |- fundecl -| f ; s :: :: FunDecl :: 'FD_' {{ com Function declaration typing }} +G |-FD fundecl -| f ; s :: :: FunDecl :: 'FD_' {{ com Function declaration typing }} {{ tex [[G]] \ent{FD} [[fundecl]] [[-|]] [[f]]; [[s]] }} by -G |- funlhs : ts -| G'; f; forall cs. Q => u -G', <c : type>, Q |- e : u -------------------- :: Fun -G |- funlhs = e -| f ; forall cs. Q => ts -> u +G |-FE funeq -| f; s +-------------------- :: One +G |-FD funeq -| f; s -defn -G |- decls -| G' :: :: Decl :: 'D_' {{ com Declaration typing }} - {{ tex [[G]] \ent{D} [[decls]] [[-|]] [[G']] }} -by +G |-FE funeq0 -| f; s0 +G ||- s0 <= t +</ G |-FE funeqi -| f; si // i /> +</ G ||- si <= t // i /> +------------------------------- :: Many +G |-FD funeq0 ; funeqs -| f; t ---------------- :: Empty -G |- empty -| G +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% ALTERNATE RULES: +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -G, <a : type>, Q |- fundecl -| f; s -G, f : forall as. Q => s |- decls -| G' --------------------- :: Fun -G |- fundecl; decls -| G' +</ G |-FE funeqi -| f; si // i /> +</ G ||- si <= s' // i /> +------------------- :: XXPolyXX +G |-FD funeqs -| f; s' -s = forall cs. Q => ts -> u -ftv(s) (= dom(G) -G, <c : type>, Q |- fundecl -| f; s' -G ||- (forall cs. Q => s') <= s -G, f : s |- decls -| G' ------------------------- :: AFun -G |- f :: s; fundecl; decls -| G' +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% END ALTERNATE RULES +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +defn +G |-FE funeq -| f ; s :: :: FunEq :: 'FE_' {{ com Function equation typing }} + {{ tex [[G]] \ent{FE} [[funeq]] [[-|]] [[f]]; [[s]] }} +by + +G |-FL funlhs : ts -| G'; f; forall cs. Q => u +G', <c : type>, Q |- e : u +--------------------------------------------- :: Fun +G |-FE funlhs = e -| f; forall cs. Q => ts -> u defn -G |- funlhs : ts -| G' ; f ; s :: :: FunLhs :: 'FL_' {{ com Function left-hand side typing }} +G |-FL funlhs : ts -| G' ; f ; s :: :: FunLhs :: 'FL_' {{ com Function left-hand side typing }} {{ tex [[G]] \ent{FL} [[funlhs]] : [[ts]] [[-|]] ([[G']]; [[f]]; [[s]]) }} by -G |-ps </ pi : ti // i /> -| G' +G |-PS </ pi : ti // i /> -| G' -------------- :: Std -G |- f ps : ts -| G'; f; u +G |-FL f ps : ts -| G'; f; u -G |-ps </ pi : ti // i /> -| G' +G |-PS </ pi : ti // i /> -| G' cs = ftv(s) \ dom(G') G'' = G', <c : type>, <c ~ u> ------------------ :: ResSig -G |- f ps :: s : ts -| G''; f; s +G |-FL f ps :: s : ts -| G''; f; s + +defn +G |-D decls -| G' :: :: Decl :: 'D_' {{ com Declaration typing }} + {{ tex [[G]] \ent{D} [[decls]] [[-|]] [[G']] }} +by + +--------------- :: Empty +G |-D empty -| G + +G, <a : type>, Q |-FD fundecl -| f; s +G, f : forall as. Q => s |-D decls -| G' +-------------------- :: Fun +G |-D fundecl; decls -| G' + +s = forall cs. Q => ts -> u +ftv(s) (= dom(G) +G, <c : type>, Q |-FD fundecl -| f; s' +G ||- (forall cs. Q => s') <= s +G, f : s |-D decls -| G' +------------------------ :: AFun +G |-D f :: s; fundecl; decls -| G' defn -G |- e : t :: :: Ty :: 'E_' {{ com Expression typing }} +G |-E e : t :: :: Ty :: 'E_' {{ com Expression typing }} {{ tex [[G]] \ent{E} [[e]] : [[t]] }} by (v : forall as . Q => u) in G G ||- [< a |-> t >]Q -------------------------- :: VarCon -G |- v : [< a |-> t >] u +G |-E v : [< a |-> t >] u % (P : forall as. Qr => forall bs. Qp => u) in G % theta = [< a |-> t >][< b |-> t' >] % G ||- theta(Qr /\ Qp) % ------------------------ :: Syn -% G |- P : theta(u) +% G |-E P : theta(u) -G |- e : t1 +G |-E e : t1 G ||- t1 ~ t2 --------------------------------- :: Eq -G |- e : t2 +G |-E e : t2 -G |- e1 : t1 -> t2 -G |- e2 : t2 +G |-E e1 : t1 -> t2 +G |-E e2 : t2 --------------------------------- :: App -G |- e1 e2 : t2 +G |-E e1 e2 : t2 -G, (x : t1) |- e : t2 +G, (x : t1) |-E e : t2 --------------------------------- :: Abs -G |- \x.e : t1 -> t2 +G |-E \x.e : t1 -> t2 -</ G |-p pi : u -| Gi' // Gi' |- ei : t // i /> -G |- e : u +</ G |-P pi : u -| Gi' // Gi' |-E ei : t // i /> +G |-E e : u ftv(t) (= dom(G) --------------------------------- :: Case -G |- case e of {</ pi -> ei // i />} : t +G |-E case e of {</ pi -> ei // i />} : t -G |- decls -| G' -G' |- e : t +G |-D decls -| G' +G' |-E e : t ----------------------- :: Let -G |- let decls in e : t +G |-E let decls in e : t defn G ||- Q' :: :: ConImpl :: 'ConImpl_' {{ com Constraint implication }} @@ -94,16 +123,17 @@ by defn G ||- s <= s' :: :: Subtype :: 'SubType_' {{ com Subtyping relation}} - {{ tex [[G]] \eent{sub} [[s]] \le [[s']] }} + {{ tex [[G]] \Ent{sub} [[s]] \le [[s']] }} by defn -G |-p p : s -| G' :: :: Pat :: 'P_' {{ com Pattern typing }} +G |-P p : s -| G' :: :: Pat :: 'P_' {{ com Pattern typing }} + {{ tex [[G]] \Ent{P} [[p]] : [[s]] [[-|]] [[G']] }} by -------------------------------- :: Var -G |-p x : s -| G, (x : s) +G |-P x : s -| G, (x : s) (P : forall </ ai // i />. Qr => forall </ bj // j />. Qp => </ sk // k /> -> u) in G G' = G, (</ ai : type // i />), (</ ai ~ u'i // i />) @@ -114,9 +144,9 @@ G' ||- (u ~ t) /\ Qr G'' = G',(</ bj : type // j />),(</ cl : type // l />),(</ cl ~ b'l // l />),Qp </ G'' ||- t'i ~ ai // i /> </ G'' ||- t''j ~ bj // j /> -G'' |-ps </ pk : theta(sk) // k /> -| G''' +G'' |-PS </ pk : theta(sk) // k /> -| G''' -------------------------------- :: Syn -G |-p P </ @ t'i // i /> </ @ t''j // j /> </ pk // k /> : t -| G''' +G |-P P </ @ t'i // i /> </ @ t''j // j /> </ pk // k /> : t -| G''' @@ -125,27 +155,27 @@ G |-p P </ @ t'i // i /> </ @ t''j // j /> </ pk // k /> : t -| G''' </ aj // j /> # dom(G) G' = G,(</ aj : type // j />),(</ cl : type // l />),(</ cl ~ t''l // l />),(</ ui ~ ti // i />),Q </ G' ||- t'j ~ aj // j /> -G' |-ps </ pk : sk // k /> -| G'' +G' |-PS </ pk : sk // k /> -| G'' -------------------------------- :: Con -G |-p K </ @ t'j // j /> </ pk // k /> : T </ ti // i /> -| G'' +G |-P K </ @ t'j // j /> </ pk // k /> : T </ ti // i /> -| G'' cs = ftv(s') \ dom(G) G' = G, <c : type>, < c ~ t > G' ||- s <= s' -G' |-p p : s' -| G'' +G' |-P p : s' -| G'' -------------------------------- :: Sig -G |-p (p :: s') : s -| G'' +G |-P (p :: s') : s -| G'' defn -G |-ps </ pi : si // i /> -| G' :: :: PatSeq :: 'Ps_' {{ com Pattern squence typing }} +G |-PS </ pi : si // i /> -| G' :: :: PatSeq :: 'Ps_' {{ com Pattern squence typing }} by % help, how do I typeset that with G_{i-1} -</ Giminus_one |-p pi : si -| Gi // i IN 1 .. n /> +</ Giminus_one |-P pi : si -| Gi // i IN 1 .. n /> ----------------------------- :: PatSeq -G0 |-ps </ pi : si // i IN 1 .. n /> -| Gn +G0 |-PS </ pi : si // i IN 1 .. n /> -| Gn % defn