Commit 6e93da5e authored by chevalier@alum.wellesley.edu's avatar chevalier@alum.wellesley.edu
Browse files

Revive External Core parser

Huzzah, the External Core parser will now parse External Core generated by
the HEAD.

Most notably, I rewrote the parser in Parsec, but the old Happy version
remains in the repository.

I checked all the nofib benchmarks and most of the ghc-prim, base and integer
libraries to make sure they parsed; one known bug:
  - Strings like "\x0aE", in which a hex escape code is followed by a
    letter that could be a hex digit, aren't handled properly. I'm 
    investigating whether this is a bug in Parsec or expected behavior.

The checker and interpreter still don't work, but should compile.

Please mess around with the parser, report bugs, improve my code, etc.,
if you're so inclined.
parent 5d1ba397
......@@ -81,7 +81,8 @@ checkTdef0 (tcenv,tsenv) tdef = ch tdef
tcenv' <- extendM tcenv (c,k)
return (tcenv',tsenv)
where k = foldr Karrow Klifted (map snd tbs)
ch (Newtype (m,c) tbs rhs) =
-- TODO
ch (Newtype (m,c) tbs axiom rhs) =
do mn <- getMname
requireModulesEq m mn "newtype declaration" tdef False
tcenv' <- extendM tcenv (c,k)
......@@ -112,12 +113,13 @@ checkTdef tcenv cenv = ch
(foldr tArrow
(foldl Tapp (Tcon (Just mn,c))
(map (Tvar . fst) utbs)) ts) tbs
ch (tdef@(Newtype c tbs (Just t))) =
-- TODO
ch (tdef@(Newtype c tbs axiom (Just t))) =
do tvenv <- foldM extendM eempty tbs
k <- checkTy (tcenv,tvenv) t
require (k==Klifted) ("bad kind:\n" ++ show tdef)
require (k `eqKind` Klifted) ("bad kind:\n" ++ show tdef)
return cenv
ch (tdef@(Newtype c tbs Nothing)) =
ch (tdef@(Newtype c tbs axiom Nothing)) =
{- should only occur for recursive Newtypes -}
return cenv
......@@ -147,7 +149,7 @@ checkVdefg top_level (tcenv,tsenv,tvenv,cenv) (e_venv,l_venv) vdefg =
do mn <- getMname
requireModulesEq m mn "value definition" vdef True
k <- checkTy (tcenv,tvenv) t
require (k==Klifted) ("unlifted kind in:\n" ++ show vdef)
require (k `eqKind` Klifted) ("unlifted kind in:\n" ++ show vdef)
t' <- checkExp env' e
requireM (equalTy tsenv t t')
("declared type doesn't match expression type in:\n" ++ show vdef ++ "\n" ++
......@@ -160,8 +162,8 @@ checkVdefg top_level (tcenv,tsenv,tvenv,cenv) (e_venv,l_venv) vdefg =
do mn <- getMname
requireModulesEq m mn "value definition" vdef True
k <- checkTy (tcenv,tvenv) t
require (k /= Kopen) ("open kind in:\n" ++ show vdef)
require ((not top_level) || (k /= Kunlifted)) ("top-level unlifted kind in:\n" ++ show vdef)
require (not (k `eqKind` Kopen)) ("open kind in:\n" ++ show vdef)
require ((not top_level) || (not (k `eqKind` Kunlifted))) ("top-level unlifted kind in:\n" ++ show vdef)
t' <- checkExp (tcenv,tsenv,tvenv,cenv,e_venv,l_venv) e
requireM (equalTy tsenv t t')
("declared type doesn't match expression type in:\n" ++ show vdef ++ "\n" ++
......@@ -199,7 +201,7 @@ checkExp (tcenv,tsenv,tvenv,cenv,e_venv,l_venv) = ch
k' <- checkTy (tcenv,tvenv) t
case t' of
Tforall (tv,k) t0 ->
do require (k' <= k)
do require (k' `subKindOf` k)
("kind doesn't match at type application in:\n" ++ show e0 ++ "\n" ++
"operator kind: " ++ show k ++ "\n" ++
"operand kind: " ++ show k')
......@@ -301,7 +303,8 @@ checkAlt (env@(tcenv,tsenv,tvenv,cenv,e_venv,l_venv)) t0 = ch
{- check existentials -}
let (etvs,eks) = unzip etbs
let (etvs',eks') = unzip etbs'
require (eks == eks')
require (all (uncurry eqKind)
(zip eks eks'))
("existential kinds don't match in:\n" ++ show a0 ++ "\n" ++
"kinds declared in data constructor: " ++ show eks ++
"kinds declared in case alternative: " ++ show eks')
......@@ -350,7 +353,7 @@ checkTy (tcenv,tvenv) = ch
k2 <- ch t2
case k1 of
Karrow k11 k12 ->
do require (k2 <= k11)
do require (k2 `subKindOf` k11)
("kinds don't match in type application: " ++ show t ++ "\n" ++
"operator kind: " ++ show k11 ++ "\n" ++
"operand kind: " ++ show k2)
......@@ -408,21 +411,22 @@ qlookupM selector external_env local_env (m,k) =
checkLit :: Lit -> CheckResult Ty
checkLit lit =
checkLit (Literal lit t) =
case lit of
Lint _ t ->
-- TODO: restore commented-out stuff.
Lint _ ->
do {- require (elem t [tIntzh, {- tInt32zh,tInt64zh, -} tWordzh, {- tWord32zh,tWord64zh, -} tAddrzh, tCharzh])
("invalid int literal: " ++ show lit ++ "\n" ++ "type: " ++ show t) -}
return t
Lrational _ t ->
Lrational _ ->
do {- require (elem t [tFloatzh,tDoublezh])
("invalid rational literal: " ++ show lit ++ "\n" ++ "type: " ++ show t) -}
return t
Lchar _ t ->
Lchar _ ->
do {- require (t == tCharzh)
("invalid char literal: " ++ show lit ++ "\n" ++ "type: " ++ show t) -}
return t
Lstring _ t ->
Lstring _ ->
do {- require (t == tAddrzh)
("invalid string literal: " ++ show lit ++ "\n" ++ "type: " ++ show t) -}
return t
......
......@@ -7,11 +7,14 @@ data Module
data Tdef
= Data (Qual Tcon) [Tbind] [Cdef]
| Newtype (Qual Tcon) [Tbind] (Maybe Ty)
| Newtype (Qual Tcon) [Tbind] Axiom (Maybe Ty)
data Cdef
= Constr (Qual Dcon) [Tbind] [Ty]
-- Newtype coercion
type Axiom = (Qual Tcon, Kind)
data Vdefg
= Rec [Vdef]
| Nonrec Vdef
......@@ -22,15 +25,11 @@ data Exp
= Var (Qual Var)
| Dcon (Qual Dcon)
| Lit Lit
-- Why were type apps and value apps distinguished,
-- but not type lambdas and value lambdas?
| App Exp Exp
| Appt Exp Ty
| Lam Bind Exp
| Let Vdefg Exp
-- Ty is new
| Case Exp Vbind Ty [Alt] {- non-empty list -}
-- Renamed to Cast; switched order
| Cast Exp Ty
| Note String Exp
| External String Ty
......@@ -58,24 +57,25 @@ data Kind
| Kunlifted
| Kopen
| Karrow Kind Kind
deriving (Eq)
data Lit
= Lint Integer Ty
| Lrational Rational Ty
| Lchar Char Ty
| Lstring String Ty
deriving (Eq) -- with nearlyEqualTy
-- new: Pnames
-- this requires at least one module name,
-- and possibly other hierarchical names
-- an alternative would be to flatten the
| Keq Ty Ty
data Lit = Literal CoreLit Ty
deriving Eq -- with nearlyEqualTy
data CoreLit = Lint Integer
| Lrational Rational
| Lchar Char
| Lstring String
deriving Eq
-- Right now we represent module names as triples:
-- (package name, hierarchical names, leaf name)
-- An alternative to this would be to flatten the
-- module namespace, either when printing out
-- Core or (probably preferably) in a
-- preprocessor.
-- Maybe because the empty module name is a module name (represented as
-- Nothing.)
-- The empty module name (as in an unqualified name)
-- is represented as Nothing.
type Mname = Maybe AnMname
type AnMname = (Pname, [Id], Id)
......@@ -95,6 +95,15 @@ unqual = (,) Nothing
type Id = String
eqKind :: Kind -> Kind -> Bool
eqKind Klifted Klifted = True
eqKind Kunlifted Kunlifted = True
eqKind Kopen Kopen = True
eqKind (Karrow k1 k2) (Karrow l1 l2) = k1 `eqKind` l1
&& k2 `eqKind` l2
eqKind _ _ = False -- no Keq kind is ever equal to any other...
-- maybe ok for now?
--- tjc: I haven't looked at the rest of this file. ---
{- Doesn't expand out fully applied newtype synonyms
......@@ -109,16 +118,14 @@ nearlyEqualTy t1 t2 = eqTy [] [] t1 t2
eqTy e1 e2 (Tapp t1a t1b) (Tapp t2a t2b) =
eqTy e1 e2 t1a t2a && eqTy e1 e2 t1b t2b
eqTy e1 e2 (Tforall (tv1,tk1) t1) (Tforall (tv2,tk2) t2) =
tk1 == tk2 && eqTy (tv1:e1) (tv2:e2) t1 t2
tk1 `eqKind` tk2 && eqTy (tv1:e1) (tv2:e2) t1 t2
eqTy _ _ _ _ = False
instance Eq Ty where (==) = nearlyEqualTy
subKindOf :: Kind -> Kind -> Bool
_ `subKindOf` Kopen = True
k1 `subKindOf` k2 = k1 == k2 -- doesn't worry about higher kinds
instance Ord Kind where (<=) = subKindOf
k1 `subKindOf` k2 = k1 `eqKind` k2 -- doesn't worry about higher kinds
baseKind :: Kind -> Bool
baseKind (Karrow _ _ ) = False
......
......@@ -6,24 +6,44 @@
import Monad
import System.Environment
import System.Cmd
import System.Exit
import Core
import Printer
import Parser
import Lex
import ParseGlue
import ParsecParser
import Env
import Prims
import Check
import Prep
import Interp
process (senv,modules) f =
-- You may need to change this.
baseDir = "../../libraries/"
--------
-- Code for checking that the external and GHC printers print the same results
testFlag = "-t"
validateResults :: FilePath -> FilePath -> IO ()
validateResults origFile genFile = do
resultCode <- system $ "diff -u " ++ origFile ++ " " ++ genFile
putStrLn $ case resultCode of
ExitSuccess -> "Parse validated for " ++ origFile
ExitFailure 1 -> "Parse failed to validate for " ++ origFile
_ -> "Error diffing files: " ++ origFile ++ " " ++ genFile
------------------------------------------------------------------------------
process :: Bool -> (Check.Menv,[Module]) -> String -> IO (Check.Menv,[Module])
process doTest (senv,modules) f =
do putStrLn ("Processing " ++ f)
s <- readFile f
case parse s 1 of
OkP m -> do putStrLn "Parse succeeded"
{- writeFile (f ++ ".parsed") (show m) -}
resultOrErr <- parseCore f
case resultOrErr of
Right m -> do
putStrLn "Parse succeeded"
let outF = f ++ ".parsed"
writeFile outF (show m)
when doTest $ (validateResults f outF)
case checkModule senv m of
OkC senv' ->
do putStrLn "Check succeeded"
......@@ -39,60 +59,81 @@ process (senv,modules) f =
FailC s ->
do putStrLn ("Check failed: " ++ s)
error "quit"
FailP s -> do putStrLn ("Parse failed: " ++ s)
error "quit"
Left err -> do putStrLn ("Parse failed: " ++ show err)
error "quit"
main = do fname <- getSingleArg
(_,modules) <- foldM process (initialEnv,[]) [fname] -- flist
main = do args <- getArgs
let (doTest, fname) =
case args of
(f:fn:_) | f == testFlag -> (True,fn)
(fn:_) -> (False,fn)
_ -> error $
"usage: ./Driver [filename]"
mapM_ (process doTest (initialEnv,[])) (libs ++ [fname])
(_,modules) <- foldM (process doTest) (initialEnv,[]) (libs ++ [fname])
let result = evalProgram modules
putStrLn ("Result = " ++ show result)
putStrLn "All done"
-- TODO
-- see what breaks
where flist = ["Main.hcr"]
getSingleArg = getArgs >>= (\ a ->
case a of
(f:_) -> return f
_ -> error $ "usage: ./Driver [filename]")
{-
["PrelBase.hcr",
"PrelMaybe.hcr",
"PrelTup.hcr",
"PrelList.hcr",
"PrelShow.hcr",
"PrelEnum.hcr",
"PrelNum.hcr",
"PrelST.hcr",
"PrelArr.hcr",
"PrelDynamic.hcr",
"PrelReal.hcr",
"PrelFloat.hcr",
"PrelRead.hcr",
"PrelIOBase.hcr",
"PrelException.hcr",
"PrelErr.hcr",
"PrelConc.hcr",
"PrelPtr.hcr",
"PrelByteArr.hcr",
"PrelPack.hcr",
"PrelBits.hcr",
"PrelWord.hcr",
"PrelInt.hcr",
"PrelCTypes.hcr",
"PrelStable.hcr",
"PrelCTypesISO.hcr",
"Monad.hcr",
"PrelStorable.hcr",
"PrelMarshalAlloc.hcr",
"PrelMarshalUtils.hcr",
"PrelMarshalArray.hcr",
"PrelCString.hcr",
"PrelMarshalError.hcr",
"PrelCError.hcr",
"PrelPosix.hcr",
"PrelHandle.hcr",
"PrelIO.hcr",
"Prelude.hcr",
"Main.hcr" ]
-}
\ No newline at end of file
where libs = map (baseDir ++) ["./ghc-prim/GHC/Generics.hcr",
"./ghc-prim/GHC/PrimopWrappers.hcr",
"./ghc-prim/GHC/Bool.hcr",
"./ghc-prim/GHC/IntWord64.hcr",
"./base/GHC/Base.hcr",
"./base/GHC/List.hcr",
"./base/GHC/Enum.hcr",
"./base/GHC/Show.hcr",
"./base/GHC/Num.hcr",
"./base/GHC/ST.hcr",
"./base/GHC/Real.hcr",
"./base/GHC/STRef.hcr",
"./base/GHC/Arr.hcr",
"./base/GHC/Float.hcr",
"./base/GHC/Read.hcr",
"./base/GHC/Ptr.hcr",
"./base/GHC/Word.hcr",
"./base/GHC/Int.hcr",
"./base/GHC/Unicode.hcr",
"./base/GHC/IOBase.hcr",
"./base/GHC/Err.hcr",
"./base/GHC/Exception.hcr",
"./base/GHC/Stable.hcr",
"./base/GHC/Storable.hcr",
"./base/GHC/Pack.hcr",
"./base/GHC/Weak.hcr",
"./base/GHC/Handle.hcr",
"./base/GHC/IO.hcr",
"./base/GHC/Dotnet.hcr",
"./base/GHC/Environment.hcr",
"./base/GHC/Exts.hcr",
"./base/GHC/PArr.hcr",
"./base/GHC/TopHandler.hcr",
"./base/GHC/Desugar.hcr",
"./base/Data/Ord.hcr",
"./base/Data/Maybe.hcr",
"./base/Data/Bits.hcr",
"./base/Data/STRef/Lazy.hcr",
"./base/Data/Generics/Basics.hcr",
"./base/Data/Generics/Aliases.hcr",
"./base/Data/Generics/Twins.hcr",
"./base/Data/Generics/Instances.hcr",
"./base/Data/Generics/Text.hcr",
"./base/Data/Generics/Schemes.hcr",
"./base/Data/Tuple.hcr",
"./base/Data/String.hcr",
"./base/Data/Either.hcr",
"./base/Data/Char.hcr",
"./base/Data/List.hcr",
"./base/Data/HashTable.hcr",
"./base/Data/Typeable.hcr",
"./base/Data/Dynamic.hcr",
"./base/Data/Function.hcr",
"./base/Data/IORef.hcr",
"./base/Data/Fixed.hcr",
"./base/Data/Monoid.hcr",
"./base/Data/Ratio.hcr",
"./base/Data/STRef.hcr",
"./base/Data/Version.hcr",
"./base/Data/Complex.hcr",
"./base/Data/Unique.hcr",
"./base/Data/Foldable.hcr",
"./base/Data/Traversable.hcr"]
\ No newline at end of file
{-# OPTIONS -XPatternGuards #-}
{-
Interprets the subset of well-typed Core programs for which
(a) All constructor and primop applications are saturated
......@@ -400,16 +401,16 @@ evalExternal :: String -> [Value] -> Eval Value
evalExternal s vs = error "evalExternal undefined for now" -- etc.,etc.
evalLit :: Lit -> PrimValue
evalLit l =
evalLit (Literal l t) =
case l of
Lint i (Tcon(_,"Intzh")) -> PIntzh i
Lint i (Tcon(_,"Wordzh")) -> PWordzh i
Lint i (Tcon(_,"Addrzh")) -> PAddrzh i
Lint i (Tcon(_,"Charzh")) -> PCharzh i
Lrational r (Tcon(_,"Floatzh")) -> PFloatzh r
Lrational r (Tcon(_,"Doublezh")) -> PDoublezh r
Lchar c (Tcon(_,"Charzh")) -> PCharzh (toEnum (ord c))
Lstring s (Tcon(_,"Addrzh")) -> PAddrzh 0 -- should really be address of non-heap copy of C-format string s
Lint i | (Tcon(_,"Intzh")) <- t -> PIntzh i
Lint i | (Tcon(_,"Wordzh")) <- t -> PWordzh i
Lint i | (Tcon(_,"Addrzh")) <- t -> PAddrzh i
Lint i | (Tcon(_,"Charzh"))<- t -> PCharzh i
Lrational r | (Tcon(_,"Floatzh")) <- t -> PFloatzh r
Lrational r | (Tcon(_,"Doublezh")) <- t -> PDoublezh r
Lchar c | (Tcon(_,"Charzh")) <- t -> PCharzh (toEnum (ord c))
Lstring s | (Tcon(_,"Addrzh")) <- t -> PAddrzh 0 -- should really be address of non-heap copy of C-format string s
{- Utilities -}
......
all: Check.hs Core.hs Driver.hs Env.hs Interp.hs Lex.hs ParseGlue.hs Parser.hs Prep.hs Prims.hs Printer.hs
all: Check.hs Core.hs Driver.hs Env.hs Interp.hs ParsecParser.hs ParseGlue.hs Prep.hs Prims.hs Printer.hs
ghc --make -fglasgow-exts -o Driver Driver.hs
Parser.hs: Parser.y
happy -o Parser.hs Parser.y
\ No newline at end of file
#Parser.hs: Parser.y
# happy -ad -ihappy.debug -o Parser.hs Parser.y
\ No newline at end of file
......@@ -64,7 +64,14 @@ data Token =
-- ugh
splitModuleName mn =
let decoded = zDecodeString mn
parts = filter (notElem '.') $ groupBy
-- Triple ugh.
-- We re-encode the individual parts so that:
-- main:Foo_Bar.Quux.baz
-- prints as:
-- main:FoozuBarziQuux.baz
-- and not:
-- main:Foo_BarziQuux.baz
parts = map zEncodeString $ filter (notElem '.') $ groupBy
(\ c1 c2 -> c1 /= '.' && c2 /= '.')
decoded in
(take (length parts - 1) parts, last parts)
......
module ParsecParser where
import Core
import ParseGlue
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language
import Data.Ratio
parseCore :: FilePath -> IO (Either ParseError Module)
parseCore = parseFromFile coreModule
coreModule :: Parser Module
coreModule = do
whiteSpace
reserved "module"
mName <- coreModuleName
whiteSpace
tdefs <- option [] coreTdefs
vdefGroups <- coreVdefGroups
eof
return $ Module mName tdefs vdefGroups
coreModuleName :: Parser AnMname
coreModuleName = do
pkgName <- corePackageName
char ':'
(modHierarchy,baseName) <- coreHierModuleNames
return (pkgName, modHierarchy, baseName)
corePackageName :: Parser Pname
corePackageName = identifier
coreHierModuleNames :: Parser ([Id], Id)
coreHierModuleNames = do
parentName <- upperName
return $ splitModuleName parentName
upperName :: Parser Id
upperName = do
firstChar <- upper
rest <- many (identLetter extCoreDef)
return $ firstChar:rest
coreTdefs :: Parser [Tdef]
coreTdefs = many coreTdef
coreTdef :: Parser Tdef
coreTdef = withSemi (try (try coreDataDecl <|> try coreNewtypeDecl))
withSemi p = try p `withTerminator` ";"
withTerminator p term = do
x <- try p
try $ symbol term
return x
coreDataDecl :: Parser Tdef
coreDataDecl = do
reserved "data"
tyCon <- coreQualifiedCon
whiteSpace -- important
tBinds <- coreTbinds
whiteSpace
symbol "="
whiteSpace
cDefs <- braces coreCdefs
return $ Data tyCon tBinds cDefs
coreNewtypeDecl :: Parser Tdef
coreNewtypeDecl = do
reserved "newtype"
tyCon <- coreQualifiedCon
whiteSpace
tBinds <- coreTbinds
symbol "^"
axiom <- coreAxiom
tyRep <- try coreTRep
return $ Newtype tyCon tBinds axiom tyRep
coreQualifiedCon :: Parser (Mname, Id)
coreQualifiedCon = coreQualifiedGen upperName
coreQualifiedName = coreQualifiedGen identifier
coreQualifiedGen p = do
maybeMname <- coreMaybeMname
theId <- p
return (maybeMname, theId)
coreMaybeMname = optionMaybe coreMname
coreRequiredQualifiedName = do
mname <- coreMname
theId <- identifier
return (Just mname, theId)
coreMname = do
-- Notice the '^' goes here:
-- it's part of a variable *occurrence*, not a module name.
char '^'
nm <- try coreModuleName
symbol "."
return nm
coreAxiom :: Parser Axiom
coreAxiom = parens (do
coercionName <- coreQualifiedCon
whiteSpace
symbol "::"
whiteSpace
coercionKind <- coreKind
return (coercionName, coercionKind))
coreTbinds :: Parser [Tbind]
coreTbinds = many coreTbind
coreTbindsGen :: CharParser () String -> Parser [Tbind]
-- The "try" here is important. Otherwise, when parsing:
-- "Node (^base:DataziTuple.Z3T)" (a cdef), we commit to
-- parsing (^base...) as a tbind rather than a type.
coreTbindsGen separator = many (try $ coreTbindGen separator)
coreTbind :: Parser Tbind
coreTbind = coreTbindGen whiteSpace
coreTbindGen :: CharParser () a -> Parser Tbind
coreTbindGen sep = (parens (do
sep
tyVar <- identifier
kind <- symbol "::" >> coreKind
return (tyVar, kind))) <|>
(sep >> identifier >>= (return . (\ tv -> (tv,Klifted))))
coreCdefs :: Parser [Cdef]
coreCdefs = sepBy1 coreCdef (symbol ";")
coreCdef :: Parser Cdef
coreCdef = do
dataConName <- coreQualifiedCon
whiteSpace -- important!
tBinds <- try $ coreTbindsGen (symbol "@")
-- This should be equivalent to (many coreAty)
-- But it isn't. WHY??
tys <- sepBy coreAty whiteSpace
return $ Constr dataConName tBinds tys
coreTRep :: Parser (Maybe Ty)
-- note that the "=" is inside here since if there's
-- no rhs for the newtype, there's no "="
coreTRep = optionMaybe (do
symbol "="
try coreType)
coreType :: Parser Ty
coreType = coreForallTy <|> (do
hd <- coreBty